I am trying to automatically extract EMF images from documents using the LibreOffice API from Python. I am using LibreOffice 7.3.2.2.
When I save the images using the GUI, the resulting files have the correct contents. When I save them using the API, the results seem to be heavily modified.
I’ve attached a minimal example ODT file containing one EMF image with one curve in it. I’ve also put the source for my image extracting script at the bottom of this post.
In the GUI, when I right-click the image and open the properties, I see that the image type is reported as WMF, rather than EMF.
When I right-click the image and click “Save…” I am prompted to choose a .wmf filename. However, the resulting file is actually EMF format, and is identical to the file that was put into the document. This is fine for my purposes, I can easily rename the file.
When I run my image extracting script, it again reports the image type as WMF (image/x-wmf
). However, the WMF file it saves is an actual WMF file. It appears similar to the input EMF file, but the curve has been replaced with a series of polygons. The EMF file saved by the script also appears similar to the input EMF file but is much more complex.
Is there any way to use the API to get the same result as the “Save…” context menu entry?
Image extracting script:
import argparse
import pathlib
import shutil
import subprocess
import sys
import tempfile
import time
import uno
from com.sun.star.beans import PropertyValue
from com.sun.star.connection import NoConnectException
class soffice_connection:
def __init__(self):
self.profile_dir = pathlib.Path(tempfile.mkdtemp(prefix='MYUNO_'))
pipename = self.profile_dir.name
soffice_path = pathlib.Path(sys.executable).parent.parent.with_name('soffice.exe')
command = f'"{str(soffice_path)}" --accept="pipe,name={pipename};urp;" -env:UserInstallation="{self.profile_dir.as_uri()}" --headless'
subprocess.Popen(command)
local_context = uno.getComponentContext()
resolver = local_context.ServiceManager.createInstanceWithContext(
"com.sun.star.bridge.UnoUrlResolver", local_context
)
# soffice will take a few seconds to start
context = None
for i in range(30):
time.sleep(1)
try:
context = resolver.resolve(
f"uno:pipe,name={pipename};urp;StarOffice.ComponentContext"
)
except NoConnectException:
pass
else:
break
if context is None:
raise NoConnectException
naming_service = resolver.resolve(
f"uno:pipe,name={pipename};urp;StarOffice.NamingService"
)
service_manager = naming_service.getRegisteredObject (
'StarOffice.ServiceManager')
self.component_loader = service_manager.createInstance ( 'com.sun.star.frame.Desktop' )
self.graphic_provider = service_manager.createInstance ( 'com.sun.star.graphic.GraphicProvider' )
def __del__(self):
self.component_loader.terminate()
time.sleep(3) # should be plenty of time for soffice to shut down
shutil.rmtree(self.profile_dir)
def process_file(input_file_path, soffice):
doc = soffice.component_loader.loadComponentFromURL(
input_file_path.as_uri(), "_blank", 0,
(PropertyValue(Name='ReadOnly',Value=True),)
)
output_dir = pathlib.Path('out')
output_dir.mkdir(exist_ok=True)
graphic_objects = doc.getGraphicObjects()
for go_name in graphic_objects.getElementNames():
go = graphic_objects.getByName(go_name)
print(go.Graphic.MimeType)
graphic_filename = go_name + '.wmf'
graphic_path = output_dir / graphic_filename
graphic_path = graphic_path.resolve()
soffice.graphic_provider.storeGraphic(go.Graphic,
(
PropertyValue(Name='URL',Value=graphic_path.as_uri()),
PropertyValue(Name='MimeType',Value='image/x-wmf')
)
)
emf_filename = go_name + '.emf'
emf_path = output_dir / emf_filename
emf_path = emf_path.resolve()
soffice.graphic_provider.storeGraphic(go.Graphic,
(
PropertyValue(Name='URL',Value=emf_path.as_uri()),
PropertyValue(Name='MimeType',Value='image/x-emf')
)
)
def main():
parser = argparse.ArgumentParser(description='Minimal graphic extraction test')
parser.add_argument('input', type=pathlib.Path, help='File to extract graphics from')
args = parser.parse_args()
conn = soffice_connection()
input = args.input.resolve()
process_file(input, conn)
return 0
if __name__ == '__main__':
sys.exit(main())
test.odt (19.1 KB)