From 5aa17622b176b98543ad8bcfedc9336d8c7e883c Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 24 Mar 2025 14:51:30 +0100 Subject: [PATCH 01/14] Add SruOpener (#510) Every single output is a valid XML by itself. --- metafacture-io/build.gradle | 2 + .../java/org/metafacture/io/SruOpener.java | 234 ++++++++++++++++++ .../main/resources/flux-commands.properties | 1 + 3 files changed, 237 insertions(+) create mode 100644 metafacture-io/src/main/java/org/metafacture/io/SruOpener.java diff --git a/metafacture-io/build.gradle b/metafacture-io/build.gradle index 4f1363d6c..a5c054f29 100644 --- a/metafacture-io/build.gradle +++ b/metafacture-io/build.gradle @@ -19,6 +19,8 @@ description = 'Modules for reading and writing data streams' dependencies { api project(':metafacture-framework') + api project(':metafacture-formatting') + api project(':metafacture-xml') implementation project(':metafacture-commons') implementation "commons-io:commons-io:${versions.commons_io}" implementation "org.apache.commons:commons-compress:${versions.commons_compress}" diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java new file mode 100644 index 000000000..3380fc8e8 --- /dev/null +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -0,0 +1,234 @@ +/* Copyright 2013 Pascal Christoph. + * Licensed under the Eclipse Public License 1.0 */ + +package org.metafacture.io; + +import org.metafacture.framework.FluxCommand; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.ObjectReceiver; +import org.metafacture.framework.annotations.Description; +import org.metafacture.framework.annotations.In; +import org.metafacture.framework.annotations.Out; +import org.metafacture.framework.helpers.DefaultObjectPipe; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.xml.sax.SAXException; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.StringWriter; +import java.net.HttpURLConnection; +import java.net.URL; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +/** + * Opens an SRU (Search Retrieval by URL) stream and passes a reader to the receiver. Pages through the SRU. + * + * @author Pascal Christoph (dr0i) + */ +@Description( + "Opens a SRU stream and passes a reader to the receiver. The input is the base URL of the SRU service " + + "to be retrieved from. Mandatory argument is: QUERY.\n" + + "The output is an XML document holding the user defined \"maximumRecords\" as documents. If there are" + + "more documents than defined by MAXIMUM_RECORDS and there are more documents wanted (defined by " + + "\"totalRecords\") there will be consecutive XML documents output as it pages through the SRU.") +@In(String.class) +@Out(java.io.Reader.class) +@FluxCommand("open-sru") +public final class SruOpener extends DefaultObjectPipe> { + + private static final String OPERATION = "searchRetrieve"; + private static final String RECORD_SCHEMA = "MARC21-xml"; + private static final String USER_AGENT = "metafacture-core"; + private static final String VERSION = "2.0"; + + private static final int CONNECTION_TIMEOUT = 11000; + private static final int MAXIMUM_RECORDS = 10; + private static final int START_RECORD = 1; + private String operation = OPERATION; + private String query; + private String recordSchema = RECORD_SCHEMA; + private String userAgent = USER_AGENT; + private String version = VERSION; + + private int maximumRecords = MAXIMUM_RECORDS; + private int startRecord = START_RECORD; + private int totalRecords = Integer.MAX_VALUE; + int numberOfRecords = Integer.MAX_VALUE; + + private boolean stopRetrieving; + private int recordsRetrieved; + + private String xmlDeclarationTemplate = ""; + private String xmlDeclaration; + + /** + * Default constructor + */ + public SruOpener() { + } + + /** + * Sets the User Agent to use. Default value: {@value USER_AGENT}. + * + * @param userAgent a user agent to be used when opening a URL + */ + public void setUserAgent(final String userAgent) { + this.userAgent = userAgent; + } + + /** + * Sets the query of the search. + * Setting a query is mandatory. + * + * @param query the query + */ + + public void setQuery(final String query) { + this.query = query; + } + + /** + * Sets total number of records to be retrieved. Default value: indefinite (as in "all") + * . + * + * @param totalRecords total number of records to be retrieved + */ + public void setTotal(final String totalRecords) { + this.totalRecords = Integer.parseInt(totalRecords); + } + + /** + * Sets the maximum of records returned in one lookup. Default value: {@value MAXIMUM_RECORDS}. + * The lookup is repeated as long as {@link #maximumRecords} is lesser than {@link #totalRecords}. + * + * @param maximumRecords maximum of records returned in one lookup + */ + public void setMaximumRecords(final String maximumRecords) { + this.maximumRecords = Integer.parseInt(maximumRecords); + } + + /** + * Sets where to start when retrieving records. Default value: {@value START_RECORD}. + * + * @param startRecord where to start when retrieving records + */ + public void setStartRecord(final String startRecord) { + this.startRecord = Integer.parseInt(startRecord); + } + + /** + * Sets the format of the retrieved record data. Default value: {@value RECORD_SCHEMA}. + * + * @param recordSchema the format of the data of the records + */ + public void setRecordSchema(final String recordSchema) { + this.recordSchema = recordSchema; + } + + /** + * Sets the kind of operation of the lookup. Default value: {@value OPERATION}. + * + * @param operation the kind of operation of the lookup + */ + public void setOperation(final String operation) { + this.operation = operation; + } + + /** + * Sets the version of the lookup. Default value: {@value VERSION}. + * + * @param version the version of the lookup + */ + public void setVersion(final String version) { + this.version = version; + } + + @Override + public void process(final String baseUrl) { + + StringBuilder srUrl = new StringBuilder(baseUrl); + if (query != null) { + srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=") + .append(recordSchema).append("&version=").append(version); + } + else { + throw new IllegalArgumentException("Missing mandatory parameter 'query'"); + } + + while (!stopRetrieving && recordsRetrieved < totalRecords && (startRecord < numberOfRecords)) { + InputStream inputStream = getXmlDocsViaSru(srUrl); + getReceiver().process(new InputStreamReader(inputStream)); + } + + } + + private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { + try { + InputStream inputStreamOfURl = retrieveUrl(srUrl, startRecord, maximumRecords); + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder docBuilder = factory.newDocumentBuilder(); + Document xmldoc = docBuilder.parse(inputStreamOfURl); + + Transformer t = TransformerFactory.newInstance().newTransformer(); + StringWriter stringWriter = new StringWriter(); + t.transform(new DOMSource(xmldoc), new StreamResult(stringWriter)); + + numberOfRecords = getIntegerValueFromElement(xmldoc,"numberOfRecords", 0); + int recordPosition = getIntegerValueFromElement(xmldoc,"recordPosition", 0); + int nextRecordPosition = getIntegerValueFromElement(xmldoc,"nextRecordPosition", totalRecords); + + recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition; + startRecord = nextRecordPosition; // grenzwert : wenn maximumRcords > als in echt + + return new ByteArrayInputStream(stringWriter.toString().getBytes()); + + } + catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { + throw new MetafactureException(e); + } + } + + private int getIntegerValueFromElement(final Document xmlDoc, final String tagName, final int fallback) { + Node node = xmlDoc.getElementsByTagName(tagName).item(0); + if (node != null) { + return Integer.parseInt(node.getTextContent()); + } + return fallback; + } + + private InputStream retrieveUrl(StringBuilder srUrl, int startRecord, int maximumRecords) throws IOException { + final URL urlToOpen = + new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord); + final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); + + connection.setConnectTimeout(CONNECTION_TIMEOUT); + if (!userAgent.isEmpty()) { + connection.setRequestProperty("User-Agent", userAgent); + } + InputStream inputStream = getInputStream(connection); + + return inputStream; + } + + private InputStream getInputStream(final HttpURLConnection connection) { + try { + return connection.getInputStream(); + } + catch (final IOException e) { + stopRetrieving = true; + return connection.getErrorStream(); + } + } + +} diff --git a/metafacture-io/src/main/resources/flux-commands.properties b/metafacture-io/src/main/resources/flux-commands.properties index 39540d47e..e69c03c40 100644 --- a/metafacture-io/src/main/resources/flux-commands.properties +++ b/metafacture-io/src/main/resources/flux-commands.properties @@ -22,3 +22,4 @@ write org.metafacture.io.ObjectWriter as-records org.metafacture.io.RecordReader open-resource org.metafacture.io.ResourceOpener open-tar org.metafacture.io.TarReader +open-sru org.metafacture.io.SruOpener From 65e7592dc693e81c1c711632142cd0fbe36b3bed Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Thu, 10 Jul 2025 13:28:00 +0200 Subject: [PATCH 02/14] WIP add test (#510) --- .../org/metafacture/io/SruOpenerTest.java | 196 ++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java new file mode 100644 index 000000000..35045801e --- /dev/null +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -0,0 +1,196 @@ +package org.metafacture.io; + +import org.junit.*; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.ObjectReceiver; + +import com.github.tomakehurst.wiremock.client.MappingBuilder; +import com.github.tomakehurst.wiremock.client.ResponseDefinitionBuilder; +import com.github.tomakehurst.wiremock.client.WireMock; +import com.github.tomakehurst.wiremock.core.WireMockConfiguration; +import com.github.tomakehurst.wiremock.http.RequestMethod; +import com.github.tomakehurst.wiremock.junit.WireMockRule; +import com.github.tomakehurst.wiremock.matching.RequestPatternBuilder; +import com.github.tomakehurst.wiremock.matching.StringValuePattern; +import com.github.tomakehurst.wiremock.matching.UrlPattern; +import org.metafacture.framework.helpers.DefaultObjectPipe; +import org.metafacture.framework.helpers.DefaultObjectReceiver; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnit; +import org.mockito.junit.MockitoRule; + + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.Reader; +import java.util.Arrays; +import java.util.function.BiConsumer; +import java.util.function.Consumer; + + +public final class SruOpenerTest { + + private static StringBuilder resultCollector = new StringBuilder(); + private static final String RESPONSE_BODY = "response bödy"; // UTF-8 + private static final String TEST_URL = "/test/path"; + private static SruOpener sruOpener = new SruOpener(); + + + @Rule + public MockitoRule mockitoRule = MockitoJUnit.rule(); + + @Rule + public WireMockRule wireMockRule = new WireMockRule(WireMockConfiguration.wireMockConfig(). + jettyAcceptors(Runtime.getRuntime() + .availableProcessors()) + .dynamicPort()); + + @Mock + private ObjectReceiver receiver; + + public SruOpenerTest() { + } + + @Before + public void setUp() { + sruOpener = new SruOpener(); + final char[] buffer = new char[ 1024 * 1024 * 16]; + sruOpener.setReceiver(new DefaultObjectPipe>() { + @Override + public void process(final Reader reader) { + int size; + try { + while ((size = reader.read(buffer)) != -1) { + int offset = 0; + for (int i = 0; i < size; ++i) { + resultCollector.append(buffer, offset, size - offset); + offset = i + 1; + } + } + } + catch (final IOException e) { + throw new MetafactureException(e); + } + } + }); + } + + + @Test + public void test_(){ + + // sruOpener.setQuery("dnb.isil%3DDE-Sol1"); + sruOpener.setQuery("WVN%3D24A05"); + sruOpener.setRecordSchema("MARC21plus-xml"); + sruOpener.setVersion("1.1"); + sruOpener.setStartRecord("1890"); + sruOpener.setMaximumRecords("1"); + sruOpener.setTotal("3"); + sruOpener.process("https://services.dnb.de/sru/dnb"); + System.out.println(resultCollector.toString()); + } + +/* @Test + public void shouldPerformGetRequestWithInputAsUrlByDefault() throws IOException { + SruOpener sruOpener = new SruOpener(); + sruOpener.setQuery("WVN%3D24A05"); + sruOpener.setRecordSchema("MARC21plus-xml"); + sruOpener.setVersion("1.1"); + sruOpener.setStartRecord("1890"); + sruOpener.setTotal("32"); + shouldPerformRequest(TEST_URL,sruOpener); + }*/ + + + //mach lieber wie in metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java wiremock + /* private void shouldPerformRequest(String input, SruOpener sruOpener) throws IOException { // checkstyle-disable-line ParameterNumber + + final BiConsumer consumer = null; + final Consumer stubConsumer; + final Consumer requestConsumer; + final Consumer responseConsumer = null; + final String responseBody; + final ResponseDefinitionBuilder response = WireMock.ok().withBody(RESPONSE_BODY); + if (responseConsumer != null) { + responseConsumer.accept(response); + } + + final String baseUrl = wireMockRule.baseUrl(); + final String url = String.format(TEST_URL, baseUrl); + + final UrlPattern urlPattern = WireMock.urlPathEqualTo(TEST_URL); + + final SruOpener opener = new SruOpener(); + opener.setReceiver(receiver); + final MappingBuilder stub = WireMock.request("GET", urlPattern).willReturn(response); + if (stubConsumer != null) { + stubConsumer.accept(stub); + } + + final RequestPatternBuilder request = new RequestPatternBuilder(RequestMethod.fromString("GET"), urlPattern) + .withRequestBody(method.getRequestHasBody() ? WireMock.equalTo(REQUEST_BODY) : WireMock.absent()); + if (requestConsumer != null) { + requestConsumer.accept(request); + } + + WireMock.stubFor(stub); + + opener.process(String.format(input, baseUrl)); + + // use the opener a second time in a workflow: + opener.process(String.format(input, baseUrl)); + + opener.closeStream(); + + + WireMock.verify(request); + } +*/ + + @Test + public void test(){ + SruOpener sruOpener = new SruOpener(); + RecordReader recordReader = new RecordReader(); + recordReader.setReceiver(new ObjectStdoutWriter()); + sruOpener.setReceiver(recordReader);// { + + + /* @Override + public void process(final XmlReceiver obj) { + BufferedReader in = new BufferedReader(obj); + String line = null; + StringBuilder rslt = new StringBuilder(); + while (true) { + try { + if (!((line = in.readLine()) != null)) break; + } + catch (IOException e) { + throw new RuntimeException(e); + }final InOrder ordered = Mockito.inOrder(receiver); + + rslt.append(line); + }*/ + /* StreamLiteralFormatter streamLiteralFormatter = new StreamLiteralFormatter(); + ObjectStdoutWriter objectStdoutWriter = new ObjectStdoutWriter(); + XmlElementSplitter xmlElementSplitter = new XmlElementSplitter(); + streamLiteralFormatter.setReceiver(objectStdoutWriter); + xmlElementSplitter.setReceiver(streamLiteralFormatter); + xmlDecoder.setReceiver(xmlElementSplitter);*/ + // System.out.println(rslt.toString()); + // resultCollector.append(obj); + //} + + sruOpener.setQuery("dnb.isil%3DDE-Sol1"); + // sruOpener.setQuery("WVN%3D24A05"); + sruOpener.setRecordSchema("MARC21plus-xml"); + sruOpener.setVersion("1.1"); + sruOpener.setStartRecord("3029"); + sruOpener.setMaximumRecords("1"); + sruOpener.setTotal("1"); + // sruOpener.process("https://services.dnb.de/sru/dnb"); + sruOpener.process("https://services.dnb.de/sru/zdb"); + // sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/"); + + // System.out.println(resultCollector.toString()); + } +} From ecd05fe2db48776dc881ce4991ce420cee8aaef0 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Thu, 11 Sep 2025 16:02:03 +0200 Subject: [PATCH 03/14] Fix loop when getting data --- .../org/metafacture/io/SruOpenerTest.java | 31 ++++++------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java index 35045801e..3478f0907 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -1,31 +1,20 @@ package org.metafacture.io; -import org.junit.*; -import org.metafacture.framework.MetafactureException; -import org.metafacture.framework.ObjectReceiver; - -import com.github.tomakehurst.wiremock.client.MappingBuilder; -import com.github.tomakehurst.wiremock.client.ResponseDefinitionBuilder; -import com.github.tomakehurst.wiremock.client.WireMock; import com.github.tomakehurst.wiremock.core.WireMockConfiguration; -import com.github.tomakehurst.wiremock.http.RequestMethod; import com.github.tomakehurst.wiremock.junit.WireMockRule; -import com.github.tomakehurst.wiremock.matching.RequestPatternBuilder; -import com.github.tomakehurst.wiremock.matching.StringValuePattern; -import com.github.tomakehurst.wiremock.matching.UrlPattern; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.ObjectReceiver; import org.metafacture.framework.helpers.DefaultObjectPipe; -import org.metafacture.framework.helpers.DefaultObjectReceiver; import org.mockito.Mock; import org.mockito.junit.MockitoJUnit; import org.mockito.junit.MockitoRule; - import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; -import java.util.Arrays; -import java.util.function.BiConsumer; -import java.util.function.Consumer; public final class SruOpenerTest { @@ -60,13 +49,11 @@ public void setUp() { public void process(final Reader reader) { int size; try { - while ((size = reader.read(buffer)) != -1) { - int offset = 0; - for (int i = 0; i < size; ++i) { - resultCollector.append(buffer, offset, size - offset); - offset = i + 1; + BufferedReader bufferedReader = new BufferedReader(reader); + String line; + while ((line = bufferedReader.readLine()) != null) { + resultCollector.append(line+"\n"); } - } } catch (final IOException e) { throw new MetafactureException(e); From 406c5ce3a8219fc40e69cc3c3dbe69137a9d9be7 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 20 Oct 2025 10:26:56 +0200 Subject: [PATCH 04/14] Remove unused dependecies See https://github.com/metafacture/metafacture-core/pull/682#discussion_r2221792686. --- metafacture-io/build.gradle | 2 -- 1 file changed, 2 deletions(-) diff --git a/metafacture-io/build.gradle b/metafacture-io/build.gradle index a5c054f29..4f1363d6c 100644 --- a/metafacture-io/build.gradle +++ b/metafacture-io/build.gradle @@ -19,8 +19,6 @@ description = 'Modules for reading and writing data streams' dependencies { api project(':metafacture-framework') - api project(':metafacture-formatting') - api project(':metafacture-xml') implementation project(':metafacture-commons') implementation "commons-io:commons-io:${versions.commons_io}" implementation "org.apache.commons:commons-compress:${versions.commons_compress}" From 415deceba34d69a3bb417bd6531a825be4f9bc22 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 20 Oct 2025 10:46:30 +0200 Subject: [PATCH 05/14] Adjust to fulfill checkstyle conditions --- .../java/org/metafacture/io/SruOpener.java | 56 ++++++++++--------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 3380fc8e8..14ab60bf6 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -10,6 +10,7 @@ import org.metafacture.framework.annotations.In; import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultObjectPipe; + import org.w3c.dom.Document; import org.w3c.dom.Node; import org.xml.sax.SAXException; @@ -55,20 +56,21 @@ public final class SruOpener extends DefaultObjectPipe"; private String xmlDeclaration; @@ -102,10 +104,10 @@ public void setQuery(final String query) { * Sets total number of records to be retrieved. Default value: indefinite (as in "all") * . * - * @param totalRecords total number of records to be retrieved + * @param totalrecords total number of records to be retrieved */ - public void setTotal(final String totalRecords) { - this.totalRecords = Integer.parseInt(totalRecords); + public void setTotal(final String totalrecords) { + this.totalRecords = Integer.parseInt(totalrecords); } /** @@ -157,7 +159,7 @@ public void setVersion(final String version) { @Override public void process(final String baseUrl) { - StringBuilder srUrl = new StringBuilder(baseUrl); + final StringBuilder srUrl = new StringBuilder(baseUrl); if (query != null) { srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=") .append(recordSchema).append("&version=").append(version); @@ -166,8 +168,8 @@ public void process(final String baseUrl) { throw new IllegalArgumentException("Missing mandatory parameter 'query'"); } - while (!stopRetrieving && recordsRetrieved < totalRecords && (startRecord < numberOfRecords)) { - InputStream inputStream = getXmlDocsViaSru(srUrl); + while (!stopRetrieving && recordsRetrieved < totalRecords && startRecord < numberOfRecords) { + final InputStream inputStream = getXmlDocsViaSru(srUrl); getReceiver().process(new InputStreamReader(inputStream)); } @@ -175,18 +177,18 @@ public void process(final String baseUrl) { private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { try { - InputStream inputStreamOfURl = retrieveUrl(srUrl, startRecord, maximumRecords); - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - DocumentBuilder docBuilder = factory.newDocumentBuilder(); - Document xmldoc = docBuilder.parse(inputStreamOfURl); + final InputStream inputStreamOfURl = retrieveUrl(srUrl, startRecord, maximumRecords); + final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + final DocumentBuilder docBuilder = factory.newDocumentBuilder(); + final Document xmldoc = docBuilder.parse(inputStreamOfURl); - Transformer t = TransformerFactory.newInstance().newTransformer(); - StringWriter stringWriter = new StringWriter(); + final Transformer t = TransformerFactory.newInstance().newTransformer(); + final StringWriter stringWriter = new StringWriter(); t.transform(new DOMSource(xmldoc), new StreamResult(stringWriter)); - numberOfRecords = getIntegerValueFromElement(xmldoc,"numberOfRecords", 0); - int recordPosition = getIntegerValueFromElement(xmldoc,"recordPosition", 0); - int nextRecordPosition = getIntegerValueFromElement(xmldoc,"nextRecordPosition", totalRecords); + numberOfRecords = getIntegerValueFromElement(xmldoc, "numberOfRecords", 0); + final int recordPosition = getIntegerValueFromElement(xmldoc, "recordPosition", 0); + final int nextRecordPosition = getIntegerValueFromElement(xmldoc, "nextRecordPosition", totalRecords); recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition; startRecord = nextRecordPosition; // grenzwert : wenn maximumRcords > als in echt @@ -200,23 +202,23 @@ private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { } private int getIntegerValueFromElement(final Document xmlDoc, final String tagName, final int fallback) { - Node node = xmlDoc.getElementsByTagName(tagName).item(0); + final Node node = xmlDoc.getElementsByTagName(tagName).item(0); if (node != null) { return Integer.parseInt(node.getTextContent()); } return fallback; } - private InputStream retrieveUrl(StringBuilder srUrl, int startRecord, int maximumRecords) throws IOException { + private InputStream retrieveUrl(final StringBuilder srUrl, final int startrecord, final int maximumrecords) throws IOException { final URL urlToOpen = - new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord); + new URL(srUrl.toString() + "&maximumRecords=" + maximumrecords + "&startRecord=" + startrecord); final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); connection.setConnectTimeout(CONNECTION_TIMEOUT); if (!userAgent.isEmpty()) { connection.setRequestProperty("User-Agent", userAgent); } - InputStream inputStream = getInputStream(connection); + final InputStream inputStream = getInputStream(connection); return inputStream; } From a111affdd163608707fae67117b39258457a59e6 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 20 Oct 2025 10:51:13 +0200 Subject: [PATCH 06/14] Fix license --- .../main/java/org/metafacture/io/SruOpener.java | 16 ++++++++++++++-- .../java/org/metafacture/io/SruOpenerTest.java | 16 ++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 14ab60bf6..300cca7b8 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -1,5 +1,17 @@ -/* Copyright 2013 Pascal Christoph. - * Licensed under the Eclipse Public License 1.0 */ +/* Copyright 2025 Pascal Christoph + + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.metafacture.io; diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java index 3478f0907..108112d8e 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -1,3 +1,19 @@ +/* + * Copyright 2025 hbz + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.metafacture.io; import com.github.tomakehurst.wiremock.core.WireMockConfiguration; From afc756c87eb5e1a7a6f28d74b6f143d448657604 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 20 Oct 2025 11:07:06 +0200 Subject: [PATCH 07/14] Use int in setters --- .../src/main/java/org/metafacture/io/SruOpener.java | 12 ++++++------ .../test/java/org/metafacture/io/SruOpenerTest.java | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 300cca7b8..beb12655c 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -118,8 +118,8 @@ public void setQuery(final String query) { * * @param totalrecords total number of records to be retrieved */ - public void setTotal(final String totalrecords) { - this.totalRecords = Integer.parseInt(totalrecords); + public void setTotal(final int totalrecords) { + this.totalRecords = totalrecords; } /** @@ -128,8 +128,8 @@ public void setTotal(final String totalrecords) { * * @param maximumRecords maximum of records returned in one lookup */ - public void setMaximumRecords(final String maximumRecords) { - this.maximumRecords = Integer.parseInt(maximumRecords); + public void setMaximumRecords(final int maximumRecords) { + this.maximumRecords = maximumRecords; } /** @@ -137,8 +137,8 @@ public void setMaximumRecords(final String maximumRecords) { * * @param startRecord where to start when retrieving records */ - public void setStartRecord(final String startRecord) { - this.startRecord = Integer.parseInt(startRecord); + public void setStartRecord(final int startRecord) { + this.startRecord = startRecord; } /** diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java index 108112d8e..2f7075892 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -86,9 +86,9 @@ public void test_(){ sruOpener.setQuery("WVN%3D24A05"); sruOpener.setRecordSchema("MARC21plus-xml"); sruOpener.setVersion("1.1"); - sruOpener.setStartRecord("1890"); - sruOpener.setMaximumRecords("1"); - sruOpener.setTotal("3"); + sruOpener.setStartRecord(1890); + sruOpener.setMaximumRecords(1); + sruOpener.setTotal(3); sruOpener.process("https://services.dnb.de/sru/dnb"); System.out.println(resultCollector.toString()); } @@ -187,9 +187,9 @@ public void process(final XmlReceiver obj) { // sruOpener.setQuery("WVN%3D24A05"); sruOpener.setRecordSchema("MARC21plus-xml"); sruOpener.setVersion("1.1"); - sruOpener.setStartRecord("3029"); - sruOpener.setMaximumRecords("1"); - sruOpener.setTotal("1"); + sruOpener.setStartRecord(3029); + sruOpener.setMaximumRecords(1); + sruOpener.setTotal(1); // sruOpener.process("https://services.dnb.de/sru/dnb"); sruOpener.process("https://services.dnb.de/sru/zdb"); // sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/"); From 94da562b3787f260e0d98454a40fb1c6f48bd342 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 20 Oct 2025 11:08:01 +0200 Subject: [PATCH 08/14] Update metafacture-io/src/main/java/org/metafacture/io/SruOpener.java Co-authored-by: Jens Wille --- metafacture-io/src/main/java/org/metafacture/io/SruOpener.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index beb12655c..61257e215 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -124,7 +124,7 @@ public void setTotal(final int totalrecords) { /** * Sets the maximum of records returned in one lookup. Default value: {@value MAXIMUM_RECORDS}. - * The lookup is repeated as long as {@link #maximumRecords} is lesser than {@link #totalRecords}. + * The lookup is repeated as long as {@link #maximumRecords} is less than {@link #totalRecords}. * * @param maximumRecords maximum of records returned in one lookup */ From 2fd8da6d299ddbd8e93e1d4d7d2ac4ee1eee9e51 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 20 Oct 2025 11:39:27 +0200 Subject: [PATCH 09/14] URLencode query parameter --- .../src/main/java/org/metafacture/io/SruOpener.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index beb12655c..df8b0b2e3 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -33,8 +33,11 @@ import java.io.InputStreamReader; import java.io.Reader; import java.io.StringWriter; +import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.URL; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -109,7 +112,12 @@ public void setUserAgent(final String userAgent) { */ public void setQuery(final String query) { - this.query = query; + try { + this.query = URLEncoder.encode(query, StandardCharsets.UTF_8.toString()); + } + catch (final UnsupportedEncodingException e) { + throw new MetafactureException(e); + } } /** From 7f7beab813c2e2a0992d18f9294796411229c463 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 20 Oct 2025 11:49:26 +0200 Subject: [PATCH 10/14] Use try to invoke auto closeable --- .../src/main/java/org/metafacture/io/SruOpener.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 120c21c72..9c85b9ce2 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -189,8 +189,11 @@ public void process(final String baseUrl) { } while (!stopRetrieving && recordsRetrieved < totalRecords && startRecord < numberOfRecords) { - final InputStream inputStream = getXmlDocsViaSru(srUrl); - getReceiver().process(new InputStreamReader(inputStream)); + try (final InputStream inputStream = getXmlDocsViaSru(srUrl)) { + getReceiver().process(new InputStreamReader(inputStream)); + } catch (Exception e) { + throw new MetafactureException(e); + } } } From 1291754c7caa6e6c16be10fe2eecb1afc4aa51b8 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 20 Oct 2025 14:17:46 +0200 Subject: [PATCH 11/14] Inline method thus getting rid of two member variables --- .../java/org/metafacture/io/SruOpener.java | 92 ++++++++----------- 1 file changed, 37 insertions(+), 55 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 9c85b9ce2..78fdecb33 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -33,7 +33,6 @@ import java.io.InputStreamReader; import java.io.Reader; import java.io.StringWriter; -import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLEncoder; @@ -77,8 +76,6 @@ public final class SruOpener extends DefaultObjectPipe"; - private String xmlDeclaration; - /** * Default constructor */ @@ -98,10 +92,10 @@ public SruOpener() { /** * Sets the User Agent to use. Default value: {@value USER_AGENT}. * - * @param userAgent a user agent to be used when opening a URL + * @param useragent a user agent to be used when opening a URL */ - public void setUserAgent(final String userAgent) { - this.userAgent = userAgent; + public void setUserAgent(final String useragent) { + userAgent = useragent; } /** @@ -112,12 +106,7 @@ public void setUserAgent(final String userAgent) { */ public void setQuery(final String query) { - try { - this.query = URLEncoder.encode(query, StandardCharsets.UTF_8.toString()); - } - catch (final UnsupportedEncodingException e) { - throw new MetafactureException(e); - } + this.query = URLEncoder.encode(query, StandardCharsets.UTF_8); } /** @@ -127,26 +116,26 @@ public void setQuery(final String query) { * @param totalrecords total number of records to be retrieved */ public void setTotal(final int totalrecords) { - this.totalRecords = totalrecords; + totalRecords = totalrecords; } /** * Sets the maximum of records returned in one lookup. Default value: {@value MAXIMUM_RECORDS}. * The lookup is repeated as long as {@link #maximumRecords} is less than {@link #totalRecords}. * - * @param maximumRecords maximum of records returned in one lookup + * @param maximumrecords maximum of records returned in one lookup */ - public void setMaximumRecords(final int maximumRecords) { - this.maximumRecords = maximumRecords; + public void setMaximumRecords(final int maximumrecords) { + maximumRecords = maximumrecords; } /** * Sets where to start when retrieving records. Default value: {@value START_RECORD}. * - * @param startRecord where to start when retrieving records + * @param startrecord where to start when retrieving records */ - public void setStartRecord(final int startRecord) { - this.startRecord = startRecord; + public void setStartRecord(final int startrecord) { + startRecord = startrecord; } /** @@ -187,41 +176,36 @@ public void process(final String baseUrl) { else { throw new IllegalArgumentException("Missing mandatory parameter 'query'"); } - + int recordsRetrieved = 0; + int numberOfRecords = Integer.MAX_VALUE; while (!stopRetrieving && recordsRetrieved < totalRecords && startRecord < numberOfRecords) { - try (final InputStream inputStream = getXmlDocsViaSru(srUrl)) { - getReceiver().process(new InputStreamReader(inputStream)); - } catch (Exception e) { - throw new MetafactureException(e); - } - } - - } - - private InputStream getXmlDocsViaSru(final StringBuilder srUrl) { - try { - final InputStream inputStreamOfURl = retrieveUrl(srUrl, startRecord, maximumRecords); - final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - final DocumentBuilder docBuilder = factory.newDocumentBuilder(); - final Document xmldoc = docBuilder.parse(inputStreamOfURl); - final Transformer t = TransformerFactory.newInstance().newTransformer(); - final StringWriter stringWriter = new StringWriter(); - t.transform(new DOMSource(xmldoc), new StreamResult(stringWriter)); + try { + final InputStream inputStreamOfURl = retrieveUrl(srUrl); + final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + final DocumentBuilder docBuilder = factory.newDocumentBuilder(); + final Document xmldoc = docBuilder.parse(inputStreamOfURl); - numberOfRecords = getIntegerValueFromElement(xmldoc, "numberOfRecords", 0); - final int recordPosition = getIntegerValueFromElement(xmldoc, "recordPosition", 0); - final int nextRecordPosition = getIntegerValueFromElement(xmldoc, "nextRecordPosition", totalRecords); + final Transformer t = TransformerFactory.newInstance().newTransformer(); + final StringWriter stringWriter = new StringWriter(); + t.transform(new DOMSource(xmldoc), new StreamResult(stringWriter)); - recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition; - startRecord = nextRecordPosition; // grenzwert : wenn maximumRcords > als in echt + numberOfRecords = getIntegerValueFromElement(xmldoc, "numberOfRecords", 0); + final int recordPosition = getIntegerValueFromElement(xmldoc, "recordPosition", 0); + final int nextRecordPosition = getIntegerValueFromElement(xmldoc, "nextRecordPosition", totalRecords); - return new ByteArrayInputStream(stringWriter.toString().getBytes()); + recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition; + startRecord = nextRecordPosition; + try (InputStream inputStream = new ByteArrayInputStream(stringWriter.toString().getBytes())) { + getReceiver().process(new InputStreamReader(inputStream)); + } + } + catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { + throw new MetafactureException(e); + } } - catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) { - throw new MetafactureException(e); - } + } private int getIntegerValueFromElement(final Document xmlDoc, final String tagName, final int fallback) { @@ -232,18 +216,16 @@ private int getIntegerValueFromElement(final Document xmlDoc, final String tagNa return fallback; } - private InputStream retrieveUrl(final StringBuilder srUrl, final int startrecord, final int maximumrecords) throws IOException { + private InputStream retrieveUrl(final StringBuilder srUrl) throws IOException { final URL urlToOpen = - new URL(srUrl.toString() + "&maximumRecords=" + maximumrecords + "&startRecord=" + startrecord); + new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord); final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); connection.setConnectTimeout(CONNECTION_TIMEOUT); if (!userAgent.isEmpty()) { connection.setRequestProperty("User-Agent", userAgent); } - final InputStream inputStream = getInputStream(connection); - - return inputStream; + return getInputStream(connection); } private InputStream getInputStream(final HttpURLConnection connection) { From 907fdfc77667845b54a0ea88444cd9ef2c920027 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 20 Oct 2025 14:38:17 +0200 Subject: [PATCH 12/14] Fix test endless loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - adjust test query to work with URLencoding Complements 2fd8da6d299ddbd8e93e1d4d7d2ac4ee1eee9e51. i#Bitte geben Sie eine Commit-Beschreibung für Ihre Änderungen ein. Zeilen, --- .../org/metafacture/io/SruOpenerTest.java | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java index 2f7075892..8e48857fd 100644 --- a/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java +++ b/metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java @@ -58,21 +58,18 @@ public SruOpenerTest() { @Before public void setUp() { - sruOpener = new SruOpener(); - final char[] buffer = new char[ 1024 * 1024 * 16]; + sruOpener = new SruOpener(); + final char[] buffer = new char[1024 * 1024 * 16]; sruOpener.setReceiver(new DefaultObjectPipe>() { @Override public void process(final Reader reader) { int size; try { - BufferedReader bufferedReader = new BufferedReader(reader); - String line; - while ((line = bufferedReader.readLine()) != null) { - resultCollector.append(line+"\n"); - } - } - catch (final IOException e) { - throw new MetafactureException(e); + while ((size = reader.read(buffer)) != -1) { + resultCollector.append(buffer, 0, size); + } + } catch (IOException e) { + throw new RuntimeException(e); } } }); @@ -83,7 +80,7 @@ public void process(final Reader reader) { public void test_(){ // sruOpener.setQuery("dnb.isil%3DDE-Sol1"); - sruOpener.setQuery("WVN%3D24A05"); + sruOpener.setQuery("WVN=24A05"); sruOpener.setRecordSchema("MARC21plus-xml"); sruOpener.setVersion("1.1"); sruOpener.setStartRecord(1890); @@ -183,7 +180,7 @@ public void process(final XmlReceiver obj) { // resultCollector.append(obj); //} - sruOpener.setQuery("dnb.isil%3DDE-Sol1"); + sruOpener.setQuery("dnb.isil=DE-Sol1"); // sruOpener.setQuery("WVN%3D24A05"); sruOpener.setRecordSchema("MARC21plus-xml"); sruOpener.setVersion("1.1"); From aa161ce53c9df107e8d0a20ee97b59660e9f6136 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 20 Oct 2025 14:51:04 +0200 Subject: [PATCH 13/14] Set maximumRecords earlier as it is invariant --- .../src/main/java/org/metafacture/io/SruOpener.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index 78fdecb33..f93627633 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -171,7 +171,7 @@ public void process(final String baseUrl) { final StringBuilder srUrl = new StringBuilder(baseUrl); if (query != null) { srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=") - .append(recordSchema).append("&version=").append(version); + .append(recordSchema).append("&version=").append(version).append("&maximumRecords=" + maximumRecords); } else { throw new IllegalArgumentException("Missing mandatory parameter 'query'"); @@ -218,7 +218,7 @@ private int getIntegerValueFromElement(final Document xmlDoc, final String tagNa private InputStream retrieveUrl(final StringBuilder srUrl) throws IOException { final URL urlToOpen = - new URL(srUrl.toString() + "&maximumRecords=" + maximumRecords + "&startRecord=" + startRecord); + new URL(srUrl.toString() + "&startRecord=" + startRecord); final HttpURLConnection connection = (HttpURLConnection) urlToOpen.openConnection(); connection.setConnectTimeout(CONNECTION_TIMEOUT); From ad7ae5f77eaf13700d8becb2dd5c4cf247fb4e76 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 24 Oct 2025 15:13:05 +0200 Subject: [PATCH 14/14] Update metafacture-io/src/main/java/org/metafacture/io/SruOpener.java Co-authored-by: Jens Wille --- metafacture-io/src/main/java/org/metafacture/io/SruOpener.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java index f93627633..f6e6165ba 100644 --- a/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java +++ b/metafacture-io/src/main/java/org/metafacture/io/SruOpener.java @@ -115,7 +115,7 @@ public void setQuery(final String query) { * * @param totalrecords total number of records to be retrieved */ - public void setTotal(final int totalrecords) { + public void setTotalRecords(final int totalRecords) { totalRecords = totalrecords; }