From 83371cfb1f71a09218aabc1adaf0df51521d8169 Mon Sep 17 00:00:00 2001 From: Varsha U N <varshamaddur2006@gmail.com> Date: Tue, 14 Jan 2025 18:49:17 +0530 Subject: [PATCH 1/2] Support extracting Libre office documents Signed-off-by: Varsha U N <varshamaddur2006@gmail.com> --- src/extractcode/extract.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/extractcode/extract.py b/src/extractcode/extract.py index 8c547f2..1a30ab3 100644 --- a/src/extractcode/extract.py +++ b/src/extractcode/extract.py @@ -230,6 +230,14 @@ def extract_files( logger.debug('extract:walk:recurse:extraction event: %(xevent)r' % locals()) yield xevent +def extract_libre_office_document(location, target): + """ Extract Libre Office documents (e.g., .ods files) as ZIP archives. """ + if not zipfile.is_zipfile(location): + return + with zipfile.ZipFile(location, 'r') as zip_ref: + zip_ref.extractall(target) + print(f"Extracted Libre Office document from {location} to {target}") + def extract_file( location, From fd532ff2130fd5d047b9e37dcf0467bcb733b9aa Mon Sep 17 00:00:00 2001 From: Varsha U N <varshamaddur2006@gmail.com> Date: Sun, 19 Jan 2025 08:39:11 +0530 Subject: [PATCH 2/2] Allow extraction of libre-office-docs Signed-off-by: Varsha U N <varshamaddur2006@gmail.com> --- src/extractcode/extract.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/extractcode/extract.py b/src/extractcode/extract.py index 1a30ab3..5dfcf6a 100644 --- a/src/extractcode/extract.py +++ b/src/extractcode/extract.py @@ -9,6 +9,7 @@ import logging import traceback +import zipfile from collections import namedtuple from functools import partial @@ -229,14 +230,15 @@ def extract_files( if TRACE: logger.debug('extract:walk:recurse:extraction event: %(xevent)r' % locals()) yield xevent - + def extract_libre_office_document(location, target): - """ Extract Libre Office documents (e.g., .ods files) as ZIP archives. """ - if not zipfile.is_zipfile(location): - return - with zipfile.ZipFile(location, 'r') as zip_ref: - zip_ref.extractall(target) - print(f"Extracted Libre Office document from {location} to {target}") + """Extract Libre Office documents (e.g., .ods files) as ZIP archives.""" + try: + with zipfile.ZipFile(location, 'r') as zip_ref: + zip_ref.extractall(target) + logger.info(f"Extracted Libre Office document from {location} to {target}") + except zipfile.BadZipFile: + raise ValueError(f"File at {location} is not a valid ZIP archive.") def extract_file(