From 867b8baa40e74a2e86d753df427ef2fcc93a70ed Mon Sep 17 00:00:00 2001 From: jbaranec Date: Wed, 6 Nov 2024 23:11:09 +0100 Subject: [PATCH] tag names support and language fix --- config.json | 31 ++++++++++++++----------------- src/main.py | 2 ++ src/process_pdf.py | 14 +++++++++----- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/config.json b/config.json index c692a14..14a60d6 100644 --- a/config.json +++ b/config.json @@ -11,24 +11,14 @@ "category": "Tags", "configurations": [ { - "program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\" --lang \"${lang}\"", - "name": "Alt Text with OpenAI", - "desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI" + "program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --tags \"$(tag_name)\" --openai \"${openai_key}\" --lang \"${lang}\"", + "name": "Generate Alt Text for all specified Tags with missing alternate description", + "desc": "Generate and set the alternate descriptions to listed tags in PDF files using PDFix SDK and OpenAI" }, { - "program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\" --lang \"${lang}\" --overwrite", - "name": "Alt Text with OpenAI", - "desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI" - }, - { - "program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\"", - "name": "Alt Text with OpenAI", - "desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI" - }, - { - "program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --openai \"${openai_key}\" --overwrite", - "name": "Alt Text with OpenAI", - "desc": "Generate and applies alternative image descriptions to PDF files using PDFix SDK and OpenAI" + "program": "docker run --platform linux/amd64 -v \"${working_directory}:/data\" -w /data --rm pdfix/alt-text-openai:latest --name \"${license_name}\" --key \"${license_key}\" detect -i \"${input_pdf}\" -o \"${output_pdf}\" --tags \"$(tag_name)\" --openai \"${openai_key}\" --lang \"${lang}\" --overwrite", + "name": "Generate Alt Text for all specified Tags", + "desc": "Generate and set the alternate descriptions to listed tags in PDF files using PDFix SDK and OpenAI" } ], "args": [ @@ -56,7 +46,14 @@ "value": "" }, { - "title": "Alternate description language", + "title": "Tag Name", + "name": "tag_name", + "desc": "Tag name defined by a regular expression", + "type": "string", + "value": "Figure|Formula" + }, + { + "title": "Language", "name": "lang", "desc": "Alternate description language", "type": "string", diff --git a/src/main.py b/src/main.py index 4b211e9..0ba67b9 100644 --- a/src/main.py +++ b/src/main.py @@ -57,6 +57,7 @@ def main(): ) pars_detect.add_argument("--openai", type=str, required=True, help="OpenAI API key") + pars_detect.add_argument("--tags", type=str, required=True, help="Regular expression defining the tag name") pars_detect.add_argument( "--overwrite", action="store_true", @@ -103,6 +104,7 @@ def main(): alt_text( input_file, output_file, + args.tags, args.name, args.key, args.openai, diff --git a/src/process_pdf.py b/src/process_pdf.py index 35a52df..bedf478 100644 --- a/src/process_pdf.py +++ b/src/process_pdf.py @@ -1,4 +1,5 @@ import ctypes +import re from pdfixsdk.Pdfix import ( GetPdfix, @@ -59,6 +60,7 @@ def update_image_alt( doc: PdfDoc, api_key: str, overwrite: bool, + lang: str ) -> None: img = "image_" + str(elem.GetObject().GetId()) + ".jpg" @@ -95,7 +97,7 @@ def update_image_alt( with open(img, "wb") as bf: bf.write(data) - response = alt_description(img, api_key) + response = alt_description(img, api_key, lang) # print(response.message.content) alt = response.message.content @@ -109,6 +111,7 @@ def update_image_alt( def browse_figure_tags( parent: PdsStructElement, doc: PdfDoc, + tags: str, api_key: str, overwrite: bool, lang: str, @@ -119,19 +122,20 @@ def browse_figure_tags( if parent.GetChildType(i) != kPdsStructChildElement: continue child_elem = struct_tree.GetStructElementFromObject(parent.GetChildObject(i)) - if child_elem.GetType(True) == "Figure": + if re.match(tags, child_elem.GetType(True)): # process figure element update_image_alt(child_elem, doc, api_key, overwrite, lang) else: - browse_figure_tags(child_elem, doc, api_key, overwrite, lang) + browse_figure_tags(child_elem, doc, tags, api_key, overwrite, lang) def alt_text( input_path: str, output_path: str, + tags: str, license_name: str, license_key: str, - api_key: str, + api_key: str, overwrite: bool, lang: str, ) -> None: @@ -176,7 +180,7 @@ def alt_text( child_elem = struct_tree.GetStructElementFromObject(struct_tree.GetChildObject(0)) try: - browse_figure_tags(child_elem, doc, api_key, overwrite, lang) + browse_figure_tags(child_elem, doc, tags, api_key, overwrite, lang) except Exception as e: raise e