From 97df15d35a08f09662c7e6e42652db1261f196ee Mon Sep 17 00:00:00 2001 From: Jordan Reiter Date: Fri, 14 Feb 2014 13:31:20 -0500 Subject: [PATCH] Throw an exception if the docx file is invalid. Right now if the file is a doc file (i.e. not a zip file) it throws a zipfile.BadZipFile exception, and if it's a zip file missing the right xml files it throws a KeyError. This catches both exceptions and throws a more useful exception that can be caught by a function/tool using python-docx. --- docx.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docx.py b/docx.py index 9d3f6e6..1217142 100755 --- a/docx.py +++ b/docx.py @@ -77,11 +77,19 @@ 'dcmitype': 'http://purl.org/dc/dcmitype/', 'dcterms': 'http://purl.org/dc/terms/'} +class BaseDocxError(Exception): + pass + +class BadDocxFile(BaseDocxError): + pass def opendocx(file): '''Open a docx file, return a document XML tree''' - mydoc = zipfile.ZipFile(file) - xmlcontent = mydoc.read('word/document.xml') + try: + mydoc = zipfile.ZipFile(file) + xmlcontent = mydoc.read('word/document.xml') + except (zipfile.BadZipfile, KeyError): + raise BadDocxFile("File is not a docx file") document = etree.fromstring(xmlcontent) return document