Hello.
I want the features in this code to be turned into an extension. I have spent a lot of time struggling with AI and testing it over and over, but I’m exhausted from the repeated errors. I thought maybe you, as experienced programmers, could help me.
import re
import xml.etree.ElementTree as ET
import uno
from com.sun.star.awt import MessageBoxButtons as MBButtons
from com.sun.star.awt.MessageBoxType import MESSAGEBOX
# ← Convert English digits to Persian
def en_to_fa_numbers(text):
en_digits = "0123456789"
fa_digits = "۰۱۲۳۴۵۶۷۸۹"
return text.translate(str.maketrans(en_digits, fa_digits))
# ← Load incorrect/correct words from XML file
def load_replacements(path):
tree = ET.parse(path)
root = tree.getroot()
replacements = {}
ns = {"bl": "http://openoffice.org/2001/block-list"}
for block in root.findall("bl:block", ns):
wrong = block.get("{http://openoffice.org/2001/block-list}abbreviated-name")
correct = block.get("{http://openoffice.org/2001/block-list}name")
if wrong and correct:
replacements[wrong] = correct
return replacements
REPLACEMENTS = load_replacements("/home/afshin/.config/libreoffice/4/user/Scripts/python/DocumentList.xml")
def fix_text_full(event=None):
ctx = uno.getComponentContext()
smgr = ctx.ServiceManager
desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
doc = desktop.getCurrentComponent()
if not doc.supportsService("com.sun.star.text.TextDocument"):
try:
parent_win = doc.CurrentController.Frame.ContainerWindow
mb = parent_win.getToolkit().createMessageBox(
parent_win, MESSAGEBOX, MBButtons.BUTTONS_OK,
"Report", "This macro can only run on a Writer document."
)
mb.execute()
except:
pass
return
text = doc.getText()
cursor = text.createTextCursor()
paragraphs = []
para_enum = text.createEnumeration()
while para_enum.hasMoreElements():
para = para_enum.nextElement()
paragraphs.append(para)
report_counts = {
"می": 0,
"ك→ک": 0,
"ي→ی": 0,
"،؛؟": 0,
"Quotes": 0,
"EN digits→FA": 0,
"Arabic digits→FA": 0,
"ه ی → هٔ": 0,
"Multiple question marks": 0,
"Extra space before punctuation": 0,
"Spelling corrections (bank)": 0,
}
def fix_paragraph_text(full_text):
new_text = full_text
# 1. "می " → "می"
pattern_mi = r"می(?!\u200c)\s+([\u0600-\u06FF]+)"
matches = re.findall(pattern_mi, new_text)
if matches:
report_counts["می"] += len(matches)
new_text = re.sub(pattern_mi, lambda m: "می\u200c" + m.group(1), new_text)
# 2. Arabic Kaf → Persian Kaf
c_before = new_text.count("ك")
if c_before:
report_counts["ك→ک"] += c_before
new_text = new_text.replace("ك", "ک")
# 3. Arabic Yeh → Persian Yeh
y_before = new_text.count("ي")
if y_before:
report_counts["ي→ی"] += y_before
new_text = new_text.replace("ي", "ی")
# 4. Punctuation marks
punct_map = {",": "،", ";": "؛", "?": "؟"}
for en_punct, fa_punct in punct_map.items():
count = new_text.count(en_punct)
if count:
report_counts["،؛؟"] += count
new_text = new_text.replace(en_punct, fa_punct)
# 5. English quotes → Persian quotes
count_quotes = new_text.count('"')
if count_quotes:
result = []
open_quote = True
for ch in new_text:
if ch == '"':
if open_quote:
result.append('«')
else:
result.append('»')
open_quote = not open_quote
else:
result.append(ch)
new_text = ''.join(result)
report_counts["Quotes"] += count_quotes
# 6. English digits → Persian digits
en_digits = "0123456789"
fa_digits = "۰۱۲۳۴۵۶۷۸۹"
trans_digits = str.maketrans(en_digits, fa_digits)
for d in en_digits:
report_counts["EN digits→FA"] += new_text.count(d)
new_text = new_text.translate(trans_digits)
# 7. Arabic digits → Persian digits
arabic_digits = "٠١٢٣٤٥٦٧٨٩"
trans_ar_digits = str.maketrans(arabic_digits, fa_digits)
for d in arabic_digits:
report_counts["Arabic digits→FA"] += new_text.count(d)
new_text = new_text.translate(trans_ar_digits)
# 8. "ه ی" → "هٔ"
pattern_he_ye = r"(\S*ه)[\s\u200c]ی\b"
matches = re.findall(pattern_he_ye, new_text)
if matches:
report_counts["ه ی → هٔ"] += len(matches)
new_text = re.sub(pattern_he_ye, lambda m: m.group(1) + "ٔ", new_text)
# 9. Multiple question marks → one
matches = re.findall(r"\?{2,}", new_text)
if matches:
report_counts["Multiple question marks"] += len(matches)
new_text = re.sub(r"\?{2,}", "؟", new_text)
# 10. Extra space before punctuation
matches = re.findall(r"\s+([،؛؟.])", new_text)
if matches:
report_counts["Extra space before punctuation"] += len(matches)
new_text = re.sub(r"\s+([،؛؟.])", r"\1", new_text)
return new_text
def apply_replacements(text, replacements):
for wrong, correct in replacements.items():
pattern = r"\b" + re.escape(wrong) + r"\b"
matches = re.findall(pattern, text)
if matches:
report_counts["Spelling corrections (bank)"] += len(matches)
text = re.sub(pattern, correct, text)
return text
for para in paragraphs:
cursor.gotoRange(para.getStart(), False)
cursor.goRight(len(para.getString()), True)
fixed_text = fix_paragraph_text(para.getString())
fixed_text = apply_replacements(fixed_text, REPLACEMENTS)
cursor.setString(fixed_text)
# ← Final report with Persian digits
total = sum(report_counts.values())
if total > 0:
lines = [f"{k}: {en_to_fa_numbers(str(v))}" for k, v in report_counts.items() if v > 0]
report = f"Total corrections: {en_to_fa_numbers(str(total))}\n" + "\n".join(lines)
parent_win = doc.CurrentController.Frame.ContainerWindow
toolkit = parent_win.getToolkit()
mb = toolkit.createMessageBox(
parent_win, MESSAGEBOX, MBButtons.BUTTONS_OK,
"Text Correction Report", report
)
# ← Try to enlarge and fix the window size
mb.setPosSize(100, 100, 500, 400, 15) # X, Y, Width, Height, Flags
mb.execute()