diff --git a/.gitattributes b/.gitattributes index 4bb443c..e8270cc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ -*.pdf binary \ No newline at end of file +*.pdf binary +*.pdf diff diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f0f211..bddec07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Fixed +- Fixed `Name must start with a leading slash` when adding an external document to a `pdfjs` document, and adding that result to another `pdfjs` document again (caused by a `null` object reference due to object streams). Also fixes duplicate objects when adding external documents with +pages in object streams. ## [2.5.2] ### Fixed diff --git a/lib/external.js b/lib/external.js index 34cb3b6..b1abfe3 100644 --- a/lib/external.js +++ b/lib/external.js @@ -48,6 +48,12 @@ module.exports = class ExternalDocument { const kids = this.pages.get("Kids"); const filter = page ? (i) => i === page - 1 : undefined; + // As a first iteration, simply register all page objects to ensure they exist for inter-page + // references (as they are not part of the `addObjectsRecursive` in the second iteration) + for (const page of this._iterPagesRecursively(doc, kids, filter)) { + doc._registerObject(page, true); + } + for (const page of this._iterPagesRecursively(doc, kids, filter)) { // if the page object does not define its MediaBox, explicitly set its MediaBox to the // value defined by its parent Pages object @@ -55,9 +61,6 @@ module.exports = class ExternalDocument { page.properties.set("MediaBox", this.mediaBox); } - // add single page - doc._registerObject(page, true); - // first, register objects to assign IDs (for references) const objects = []; Parser.addObjectsRecursive(objects, page, 0); diff --git a/lib/object/reference.js b/lib/object/reference.js index 76eef72..77618e0 100644 --- a/lib/object/reference.js +++ b/lib/object/reference.js @@ -21,6 +21,9 @@ class PDFReference { } toString() { + if (this.object.id === null) { + throw new TypeError("Tried to write reference with `null` object id"); + } return this.object.id + " " + this.object.rev + " R"; } diff --git a/lib/parser/parser.js b/lib/parser/parser.js index 55d6816..c6629b8 100644 --- a/lib/parser/parser.js +++ b/lib/parser/parser.js @@ -68,6 +68,13 @@ class Parser { if (objects.indexOf(value.object) > -1) { break; } + + // skip references to other pages + const type = value.object.properties.get("Type"); + if (type && type.toString() === "/Page") { + break; + } + objects.push(value.object); Parser.addObjectsRecursive(objects, value.object); break; diff --git a/test/pdfs/external/addallpages.pdf b/test/pdfs/external/addallpages.pdf index d79eaa4..4ea8454 100644 Binary files a/test/pdfs/external/addallpages.pdf and b/test/pdfs/external/addallpages.pdf differ diff --git a/test/pdfs/external/addinbetween.pdf b/test/pdfs/external/addinbetween.pdf index af84d59..3feb893 100644 Binary files a/test/pdfs/external/addinbetween.pdf and b/test/pdfs/external/addinbetween.pdf differ diff --git a/test/pdfs/external/self.pdf b/test/pdfs/external/self.pdf index 1e1ae6b..d7ef3f8 100644 Binary files a/test/pdfs/external/self.pdf and b/test/pdfs/external/self.pdf differ diff --git a/test/pdfs/issue/issue-112.pdf b/test/pdfs/issue/issue-112.pdf index 6d97786..59600dd 100644 Binary files a/test/pdfs/issue/issue-112.pdf and b/test/pdfs/issue/issue-112.pdf differ diff --git a/test/pdfs/issue/issue-117-nested-pages.pdf b/test/pdfs/issue/issue-117-nested-pages.pdf index 58d422d..d4be53a 100644 Binary files a/test/pdfs/issue/issue-117-nested-pages.pdf and b/test/pdfs/issue/issue-117-nested-pages.pdf differ