Extracting EMF images with API

I am trying to automatically extract EMF images from documents using the LibreOffice API from Python. I am using LibreOffice 7.3.2.2.

When I save the images using the GUI, the resulting files have the correct contents. When I save them using the API, the results seem to be heavily modified.

I’ve attached a minimal example ODT file containing one EMF image with one curve in it. I’ve also put the source for my image extracting script at the bottom of this post.

In the GUI, when I right-click the image and open the properties, I see that the image type is reported as WMF, rather than EMF.

When I right-click the image and click “Save…” I am prompted to choose a .wmf filename. However, the resulting file is actually EMF format, and is identical to the file that was put into the document. This is fine for my purposes, I can easily rename the file.

When I run my image extracting script, it again reports the image type as WMF (image/x-wmf). However, the WMF file it saves is an actual WMF file. It appears similar to the input EMF file, but the curve has been replaced with a series of polygons. The EMF file saved by the script also appears similar to the input EMF file but is much more complex.

Is there any way to use the API to get the same result as the “Save…” context menu entry?


Image extracting script:

import argparse
import pathlib
import shutil
import subprocess
import sys
import tempfile
import time
import uno

from com.sun.star.beans import PropertyValue
from com.sun.star.connection import NoConnectException

class soffice_connection:
  def __init__(self):
    self.profile_dir = pathlib.Path(tempfile.mkdtemp(prefix='MYUNO_'))
    pipename = self.profile_dir.name
    
    soffice_path = pathlib.Path(sys.executable).parent.parent.with_name('soffice.exe')
    command = f'"{str(soffice_path)}" --accept="pipe,name={pipename};urp;" -env:UserInstallation="{self.profile_dir.as_uri()}" --headless'
    subprocess.Popen(command)

    local_context = uno.getComponentContext()
    resolver = local_context.ServiceManager.createInstanceWithContext(
        "com.sun.star.bridge.UnoUrlResolver", local_context
    )

    # soffice will take a few seconds to start
    context = None
    for i in range(30):
      time.sleep(1)
      try:
        context = resolver.resolve(
          f"uno:pipe,name={pipename};urp;StarOffice.ComponentContext"
        )
      except NoConnectException:
        pass
      else:
        break
        
    if context is None:
      raise NoConnectException

    naming_service = resolver.resolve(
        f"uno:pipe,name={pipename};urp;StarOffice.NamingService"
    )

    service_manager = naming_service.getRegisteredObject (
                            'StarOffice.ServiceManager')
                            
    self.component_loader = service_manager.createInstance ( 'com.sun.star.frame.Desktop' )
    self.graphic_provider = service_manager.createInstance ( 'com.sun.star.graphic.GraphicProvider' )
    
  def __del__(self):
    self.component_loader.terminate()
    time.sleep(3) # should be plenty of time for soffice to shut down
    shutil.rmtree(self.profile_dir)

def process_file(input_file_path, soffice):
  doc = soffice.component_loader.loadComponentFromURL(
    input_file_path.as_uri(), "_blank", 0,
    (PropertyValue(Name='ReadOnly',Value=True),)
  )
  
  output_dir = pathlib.Path('out')
  output_dir.mkdir(exist_ok=True)
  
  graphic_objects = doc.getGraphicObjects()
  for go_name in graphic_objects.getElementNames():
    go = graphic_objects.getByName(go_name)
    print(go.Graphic.MimeType)
    graphic_filename = go_name + '.wmf'
    graphic_path = output_dir / graphic_filename
    graphic_path = graphic_path.resolve()
    soffice.graphic_provider.storeGraphic(go.Graphic, 
      (
        PropertyValue(Name='URL',Value=graphic_path.as_uri()),
        PropertyValue(Name='MimeType',Value='image/x-wmf')
      )
    )
    emf_filename = go_name + '.emf'
    emf_path = output_dir / emf_filename
    emf_path = emf_path.resolve()
    soffice.graphic_provider.storeGraphic(go.Graphic, 
      (
        PropertyValue(Name='URL',Value=emf_path.as_uri()),
        PropertyValue(Name='MimeType',Value='image/x-emf')
      )
    )

def main():
  parser = argparse.ArgumentParser(description='Minimal graphic extraction test')
  parser.add_argument('input', type=pathlib.Path, help='File to extract graphics from')
  args = parser.parse_args()
  
  conn = soffice_connection()
  
  input = args.input.resolve()
  
  process_file(input, conn)

  return 0

if __name__ == '__main__':
  sys.exit(main())

test.odt (19.1 KB)

I am now working around this problem by opening the odt file as a zip file and extracting the image files from it.

Try with service com.sun.star.drawing.GraphicExportFilter

import uno
from com.sun.star.beans import PropertyValue

CTX = uno.getComponentContext()
SM = CTX.getServiceManager()


def _export_image(image):
    path = f'file:///home/mau/{image.Name}.emf'
    service = 'com.sun.star.drawing.GraphicExportFilter'
    exporter = SM.createInstance(service)
    args =(
        PropertyValue(Name='URL', Value=path),
        PropertyValue(Name='MimeType', Value='image/x-emf')
    )
    exporter.setSourceDocument(image)
    exporter.filter(args)
    return


def main():
    doc = XSCRIPTCONTEXT.getDocument()

    for image in doc.GraphicObjects:
        _export_image(image)
        app.debug(image.Name)
    return

Unfortunately, this gives exactly the same result.