diff --git a/dspace-api/src/main/java/org/dspace/app/mediafilter/ImageMagickThumbnailFilter.java b/dspace-api/src/main/java/org/dspace/app/mediafilter/ImageMagickThumbnailFilter.java index 408982d157e5..7543410a7968 100644 --- a/dspace-api/src/main/java/org/dspace/app/mediafilter/ImageMagickThumbnailFilter.java +++ b/dspace-api/src/main/java/org/dspace/app/mediafilter/ImageMagickThumbnailFilter.java @@ -14,7 +14,7 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; -import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.dspace.content.Bitstream; @@ -153,8 +153,8 @@ public File getImageFile(File f, boolean verbose) // the CropBox is missing or empty because pdfbox will set it to the // same size as the MediaBox if it doesn't exist. Also note that we // only need to check the first page, since that's what we use for - // generating the thumbnail (PDDocument uses a zero-based index). - PDPage pdfPage = PDDocument.load(f).getPage(0); + // generating the thumbnail (PDPage uses a zero-based index). + PDPage pdfPage = Loader.loadPDF(f).getPage(0); PDRectangle pdfPageMediaBox = pdfPage.getMediaBox(); PDRectangle pdfPageCropBox = pdfPage.getCropBox(); diff --git a/dspace-api/src/main/java/org/dspace/app/mediafilter/PDFBoxThumbnail.java b/dspace-api/src/main/java/org/dspace/app/mediafilter/PDFBoxThumbnail.java index 3acb6900dbda..94c463b2808f 100644 --- a/dspace-api/src/main/java/org/dspace/app/mediafilter/PDFBoxThumbnail.java +++ b/dspace-api/src/main/java/org/dspace/app/mediafilter/PDFBoxThumbnail.java @@ -11,6 +11,8 @@ import java.io.InputStream; import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.io.RandomAccessReadBuffer; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; import org.apache.pdfbox.rendering.PDFRenderer; @@ -71,7 +73,7 @@ public InputStream getDestinationStream(Item currentItem, InputStream source, bo BufferedImage buf; // Render the page image. - try ( PDDocument doc = PDDocument.load(source); ) { + try ( PDDocument doc = Loader.loadPDF(new RandomAccessReadBuffer(source)); ) { PDFRenderer renderer = new PDFRenderer(doc); buf = renderer.renderImage(0); } catch (InvalidPasswordException ex) { diff --git a/dspace-api/src/main/java/org/dspace/content/packager/PDFPackager.java b/dspace-api/src/main/java/org/dspace/content/packager/PDFPackager.java index 6c7baad45497..f63585f3c498 100644 --- a/dspace-api/src/main/java/org/dspace/content/packager/PDFPackager.java +++ b/dspace-api/src/main/java/org/dspace/content/packager/PDFPackager.java @@ -18,11 +18,11 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.Loader; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.io.MemoryUsageSetting; -import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; +import org.apache.pdfbox.io.RandomAccessReadBuffer; import org.apache.pdfbox.io.ScratchFile; -import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.dspace.authorize.AuthorizeException; @@ -330,19 +330,24 @@ private void crosswalkPDF(Context context, Item item, InputStream metadata) COSDocument cos = null; try { - ScratchFile scratchFile = null; + PDDocument document = null; + try { - long useRAM = Runtime.getRuntime().freeMemory() * 80 / 100; // use up to 80% of JVM free memory - scratchFile = new ScratchFile( - MemoryUsageSetting.setupMixed(useRAM)); // then fallback to temp file (unlimited size) + // Use up to 80% of JVM free memory and fall back to a temp file (unlimited size) + long useRAM = Runtime.getRuntime().freeMemory() * 80 / 100; + document = Loader.loadPDF( + new RandomAccessReadBuffer(metadata), + () -> new ScratchFile(MemoryUsageSetting.setupMixed(useRAM))); } catch (IOException ioe) { log.warn("Error initializing scratch file: " + ioe.getMessage()); } - PDFParser parser = new PDFParser(new RandomAccessBufferedFileInputStream(metadata), scratchFile); - parser.parse(); - cos = parser.getDocument(); + // sanity check: loaded PDF document must not be null. + if (document == null) { + throw new MetadataValidationException("The provided stream could not be parsed into a PDF document."); + } + cos = document.getDocument(); // sanity check: PDFBox breaks on encrypted documents, so give up. if (cos.getEncryptionDictionary() != null) { throw new MetadataValidationException("This packager cannot accept an encrypted PDF document."); diff --git a/dspace-api/src/main/java/org/dspace/disseminate/CitationDocumentServiceImpl.java b/dspace-api/src/main/java/org/dspace/disseminate/CitationDocumentServiceImpl.java index c20961db7544..1aa31d4db9e5 100644 --- a/dspace-api/src/main/java/org/dspace/disseminate/CitationDocumentServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/disseminate/CitationDocumentServiceImpl.java @@ -23,6 +23,8 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.io.RandomAccessReadBuffer; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; @@ -30,6 +32,7 @@ import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.service.AuthorizeService; import org.dspace.content.Bitstream; @@ -304,7 +307,7 @@ public Pair makeCitedDocument(Context context, Bitstream bitstream Item item = (Item) bitstreamService.getParentObject(context, bitstream); final InputStream inputStream = bitstreamService.retrieve(context, bitstream); try { - sourceDocument = sourceDocument.load(inputStream); + sourceDocument = Loader.loadPDF(new RandomAccessReadBuffer(inputStream)); } finally { inputStream.close(); } @@ -335,9 +338,10 @@ protected void generateCoverPage(Context context, PDDocument document, PDPage co int xwidth = 550; int ygap = 20; - PDFont fontHelvetica = PDType1Font.HELVETICA; - PDFont fontHelveticaBold = PDType1Font.HELVETICA_BOLD; - PDFont fontHelveticaOblique = PDType1Font.HELVETICA_OBLIQUE; + PDFont fontHelvetica = new PDType1Font(Standard14Fonts.FontName.HELVETICA); + PDFont fontHelveticaBold = new PDType1Font(Standard14Fonts.FontName.HELVETICA_BOLD); + PDFont fontHelveticaOblique = new PDType1Font(Standard14Fonts.FontName.HELVETICA_OBLIQUE); + contentStream.setNonStrokingColor(Color.BLACK); String[][] content = {header1}; diff --git a/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java b/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java index 552f607827a8..4ec1f4db39e7 100644 --- a/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/importer/external/epo/service/EpoImportMetadataSourceServiceImpl.java @@ -29,9 +29,9 @@ import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpException; import org.apache.http.client.utils.URIBuilder; +import org.apache.jena.ext.xerces.impl.dv.util.Base64; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.xerces.impl.dv.util.Base64; import org.dspace.app.util.XMLUtils; import org.dspace.content.Item; import org.dspace.importer.external.datamodel.ImportRecord; diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java index 691927c6e457..1e7d6440ff24 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java @@ -58,6 +58,8 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.CharEncoding; import org.apache.commons.lang3.StringUtils; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.io.RandomAccessReadBuffer; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.solr.client.solrj.SolrServerException; @@ -989,7 +991,7 @@ private String extractPDFText(byte[] content) throws IOException { try (ByteArrayInputStream source = new ByteArrayInputStream(content); Writer writer = new StringWriter(); - PDDocument pdfDoc = PDDocument.load(source)) { + PDDocument pdfDoc = Loader.loadPDF(new RandomAccessReadBuffer(source))) { pts.writeText(pdfDoc, writer); return writer.toString(); @@ -998,7 +1000,7 @@ private String extractPDFText(byte[] content) throws IOException { private int getNumberOfPdfPages(byte[] content) throws IOException { try (ByteArrayInputStream source = new ByteArrayInputStream(content); - PDDocument pdfDoc = PDDocument.load(source)) { + PDDocument pdfDoc = Loader.loadPDF(new RandomAccessReadBuffer(source))) { return pdfDoc.getNumberOfPages(); } } diff --git a/pom.xml b/pom.xml index 956bf13df4cf..bdd38dd8cf84 100644 --- a/pom.xml +++ b/pom.xml @@ -38,12 +38,12 @@ 4.0.5 1.1.1 - 9.4.57.v20241219 - 2.24.3 - 2.0.34 + 9.4.58.v20250814 + 2.25.2 + 3.0.5 1.19.0 2.0.17 - 2.9.4 + 3.2.3 1.81 8.0.1