Convert python macro to en extension

Hello.
I want the features in this code to be turned into an extension. I have spent a lot of time struggling with AI and testing it over and over, but I’m exhausted from the repeated errors. I thought maybe you, as experienced programmers, could help me.

import re
import xml.etree.ElementTree as ET
import uno
from com.sun.star.awt import MessageBoxButtons as MBButtons
from com.sun.star.awt.MessageBoxType import MESSAGEBOX

# ← Convert English digits to Persian
def en_to_fa_numbers(text):
    en_digits = "0123456789"
    fa_digits = "۰۱۲۳۴۵۶۷۸۹"
    return text.translate(str.maketrans(en_digits, fa_digits))

# ← Load incorrect/correct words from XML file
def load_replacements(path):
    tree = ET.parse(path)
    root = tree.getroot()
    replacements = {}
    ns = {"bl": "http://openoffice.org/2001/block-list"}
    for block in root.findall("bl:block", ns):
        wrong = block.get("{http://openoffice.org/2001/block-list}abbreviated-name")
        correct = block.get("{http://openoffice.org/2001/block-list}name")
        if wrong and correct:
            replacements[wrong] = correct
    return replacements

REPLACEMENTS = load_replacements("/home/afshin/.config/libreoffice/4/user/Scripts/python/DocumentList.xml")

def fix_text_full(event=None):
    ctx = uno.getComponentContext()
    smgr = ctx.ServiceManager
    desktop = smgr.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
    doc = desktop.getCurrentComponent()

    if not doc.supportsService("com.sun.star.text.TextDocument"):
        try:
            parent_win = doc.CurrentController.Frame.ContainerWindow
            mb = parent_win.getToolkit().createMessageBox(
                parent_win, MESSAGEBOX, MBButtons.BUTTONS_OK,
                "Report", "This macro can only run on a Writer document."
            )
            mb.execute()
        except:
            pass
        return

    text = doc.getText()
    cursor = text.createTextCursor()

    paragraphs = []
    para_enum = text.createEnumeration()
    while para_enum.hasMoreElements():
        para = para_enum.nextElement()
        paragraphs.append(para)

    report_counts = {
        "می‌": 0,
        "ك→ک": 0,
        "ي→ی": 0,
        "،؛؟": 0,
        "Quotes": 0,
        "EN digits→FA": 0,
        "Arabic digits→FA": 0,
        "ه ی → هٔ": 0,
        "Multiple question marks": 0,
        "Extra space before punctuation": 0,
        "Spelling corrections (bank)": 0,
    }

    def fix_paragraph_text(full_text):
        new_text = full_text

        # 1. "می " → "می‌"
        pattern_mi = r"می(?!\u200c)\s+([\u0600-\u06FF]+)"
        matches = re.findall(pattern_mi, new_text)
        if matches:
            report_counts["می‌"] += len(matches)
            new_text = re.sub(pattern_mi, lambda m: "می\u200c" + m.group(1), new_text)

        # 2. Arabic Kaf → Persian Kaf
        c_before = new_text.count("ك")
        if c_before:
            report_counts["ك→ک"] += c_before
            new_text = new_text.replace("ك", "ک")

        # 3. Arabic Yeh → Persian Yeh
        y_before = new_text.count("ي")
        if y_before:
            report_counts["ي→ی"] += y_before
            new_text = new_text.replace("ي", "ی")

        # 4. Punctuation marks
        punct_map = {",": "،", ";": "؛", "?": "؟"}
        for en_punct, fa_punct in punct_map.items():
            count = new_text.count(en_punct)
            if count:
                report_counts["،؛؟"] += count
                new_text = new_text.replace(en_punct, fa_punct)

        # 5. English quotes → Persian quotes
        count_quotes = new_text.count('"')
        if count_quotes:
            result = []
            open_quote = True
            for ch in new_text:
                if ch == '"':
                    if open_quote:
                        result.append('«')
                    else:
                        result.append('»')
                    open_quote = not open_quote
                else:
                    result.append(ch)
            new_text = ''.join(result)
            report_counts["Quotes"] += count_quotes

        # 6. English digits → Persian digits
        en_digits = "0123456789"
        fa_digits = "۰۱۲۳۴۵۶۷۸۹"
        trans_digits = str.maketrans(en_digits, fa_digits)
        for d in en_digits:
            report_counts["EN digits→FA"] += new_text.count(d)
        new_text = new_text.translate(trans_digits)

        # 7. Arabic digits → Persian digits
        arabic_digits = "٠١٢٣٤٥٦٧٨٩"
        trans_ar_digits = str.maketrans(arabic_digits, fa_digits)
        for d in arabic_digits:
            report_counts["Arabic digits→FA"] += new_text.count(d)
        new_text = new_text.translate(trans_ar_digits)

        # 8. "ه ی" → "هٔ"
        pattern_he_ye = r"(\S*ه)[\s\u200c]ی\b"
        matches = re.findall(pattern_he_ye, new_text)
        if matches:
            report_counts["ه ی → هٔ"] += len(matches)
            new_text = re.sub(pattern_he_ye, lambda m: m.group(1) + "ٔ", new_text)

        # 9. Multiple question marks → one
        matches = re.findall(r"\?{2,}", new_text)
        if matches:
            report_counts["Multiple question marks"] += len(matches)
            new_text = re.sub(r"\?{2,}", "؟", new_text)

        # 10. Extra space before punctuation
        matches = re.findall(r"\s+([،؛؟.])", new_text)
        if matches:
            report_counts["Extra space before punctuation"] += len(matches)
            new_text = re.sub(r"\s+([،؛؟.])", r"\1", new_text)

        return new_text

    def apply_replacements(text, replacements):
        for wrong, correct in replacements.items():
            pattern = r"\b" + re.escape(wrong) + r"\b"
            matches = re.findall(pattern, text)
            if matches:
                report_counts["Spelling corrections (bank)"] += len(matches)
                text = re.sub(pattern, correct, text)
        return text

    for para in paragraphs:
        cursor.gotoRange(para.getStart(), False)
        cursor.goRight(len(para.getString()), True)

        fixed_text = fix_paragraph_text(para.getString())
        fixed_text = apply_replacements(fixed_text, REPLACEMENTS)
        cursor.setString(fixed_text)

    # ← Final report with Persian digits
    total = sum(report_counts.values())
    if total > 0:
        lines = [f"{k}: {en_to_fa_numbers(str(v))}" for k, v in report_counts.items() if v > 0]
        report = f"Total corrections: {en_to_fa_numbers(str(total))}\n" + "\n".join(lines)

        parent_win = doc.CurrentController.Frame.ContainerWindow
        toolkit = parent_win.getToolkit()
        mb = toolkit.createMessageBox(
            parent_win, MESSAGEBOX, MBButtons.BUTTONS_OK,
            "Text Correction Report", report
        )

        # ← Try to enlarge and fix the window size
        mb.setPosSize(100, 100, 500, 400, 15)  # X, Y, Width, Height, Flags

        mb.execute()

paragraphs = [ para for para in text ]
1 Like

broken syntax!!

1 Like

This is a bit half-baked but I’ll post it as a solution anyway.

LibreOffice files are zip files so you can load the files to a repository then run a workflow to assemble them and zip them up. It’s a fair while since I looked at it but
GitHub - flywire/libreoffice-code-highlighter at BuildDevVer demonstrated the process.

iirc GitHub zips a zip (ie there is no need to put the oxt file in the repository) but the releases final unzipped file needs to be oxt. The original repository owner wasn’t receptive to the solution of doing it all within workflows instead of a desktop process so I didn’t pursue it.

1 Like