From cfc5b7cf585b26b0df789736a48aabe332a79664 Mon Sep 17 00:00:00 2001 From: datalogics-jacksonm Date: Fri, 26 Jul 2024 12:19:39 -0500 Subject: [PATCH 1/4] Create new examples for OCR -> Extract Text workflow Adds new examples to show how to pair the new OCR tool with the Extract Text tool. --- .../ocr-with-extract-text.cs | 62 ++++++++++ .../OcrWithExtractText.java | 106 ++++++++++++++++++ .../ocr-with-extract-text.js | 74 ++++++++++++ .../ocr-with-extract-text.php | 72 ++++++++++++ .../ocr-with-extract-text.py | 67 +++++++++++ .../ocr-with-extract-text.sh | 32 ++++++ 6 files changed, 413 insertions(+) create mode 100644 DotNET/Complex Flow Examples/ocr-with-extract-text.cs create mode 100644 Java/Complex Flow Examples/OcrWithExtractText.java create mode 100644 JavaScript/Complex Flow Examples/ocr-with-extract-text.js create mode 100644 PHP/Complex Flow Examples/ocr-with-extract-text.php create mode 100644 Python/Complex Flow Examples/ocr-with-extract-text.py create mode 100644 cURL/Complex Flow Examples/ocr-with-extract-text.sh diff --git a/DotNET/Complex Flow Examples/ocr-with-extract-text.cs b/DotNET/Complex Flow Examples/ocr-with-extract-text.cs new file mode 100644 index 0000000..e9daf52 --- /dev/null +++ b/DotNET/Complex Flow Examples/ocr-with-extract-text.cs @@ -0,0 +1,62 @@ +using Newtonsoft.Json.Linq; +using System; +using System.IO; +using System.Net.Http; +using System.Text; +using System.Threading.Tasks; + +class Program +{ + private static readonly string apiKey = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; // Your API key here + + static async Task Main(string[] args) + { + using (var httpClient = new HttpClient { BaseAddress = new Uri("https://api.pdfrest.com") }) + { + // Upload PDF for OCR + using var ocrRequest = new HttpRequestMessage(HttpMethod.Post, "pdf-with-ocr-text"); + + ocrRequest.Headers.TryAddWithoutValidation("Api-Key", apiKey); + ocrRequest.Headers.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("application/json")); + var ocrMultipartContent = new MultipartFormDataContent(); + + var pdfByteArray = File.ReadAllBytes("/path/to/file.pdf"); + var pdfByteArrayContent = new ByteArrayContent(pdfByteArray); + ocrMultipartContent.Add(pdfByteArrayContent, "file", "file.pdf"); + pdfByteArrayContent.Headers.TryAddWithoutValidation("Content-Type", "application/pdf"); + ocrMultipartContent.Add(new StringContent("example_pdf-with-ocr-text_out"), "output"); + + ocrRequest.Content = ocrMultipartContent; + var ocrResponse = await httpClient.SendAsync(ocrRequest); + + var ocrResult = await ocrResponse.Content.ReadAsStringAsync(); + Console.WriteLine("OCR response received."); + Console.WriteLine(ocrResult); + + dynamic ocrResponseData = JObject.Parse(ocrResult); + string ocrPDFID = ocrResponseData.outputId; + + // Extract text from OCR'd PDF + using var extractTextRequest = new HttpRequestMessage(HttpMethod.Post, "extracted-text"); + + extractTextRequest.Headers.TryAddWithoutValidation("Api-Key", apiKey); + extractTextRequest.Headers.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("application/json")); + var extractTextMultipartContent = new MultipartFormDataContent(); + + extractTextMultipartContent.Add(new StringContent(ocrPDFID), "id"); + + extractTextRequest.Content = extractTextMultipartContent; + var extractTextResponse = await httpClient.SendAsync(extractTextRequest); + + var extractTextResult = await extractTextResponse.Content.ReadAsStringAsync(); + Console.WriteLine("Extract text response received."); + Console.WriteLine(extractTextResult); + + dynamic extractTextResponseData = JObject.Parse(extractTextResult); + string fullText = extractTextResponseData.fullText; + + Console.WriteLine("Extracted text:"); + Console.WriteLine(fullText); + } + } +} \ No newline at end of file diff --git a/Java/Complex Flow Examples/OcrWithExtractText.java b/Java/Complex Flow Examples/OcrWithExtractText.java new file mode 100644 index 0000000..e2558b8 --- /dev/null +++ b/Java/Complex Flow Examples/OcrWithExtractText.java @@ -0,0 +1,106 @@ +import io.github.cdimascio.dotenv.Dotenv; +import java.io.File; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import okhttp3.*; +import org.json.JSONObject; + +/* In this sample, we will show how to convert a scanned document into a PDF with + * searchable and extractable text using Optical Character Recognition (OCR), and then + * extract that text from the newly created document. + * + * First, we will upload a scanned PDF to the /pdf-with-ocr-text route and capture the + * output ID. Then, we will send the output ID to the /extracted-text route, which will + * return the newly added text. + */ + +public class OcrWithExtractText { + + // Specify the path to your PDF file here, or as the first argument when running the program. + private static final String DEFAULT_PDF_FILE_PATH = "/path/to/file.pdf"; + + // Specify your API key here, or in the environment variable PDFREST_API_KEY. + // You can also put the environment variable in a .env file. + private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; + + public static void main(String[] args) { + File pdfFile; + if (args.length > 0) { + pdfFile = new File(args[0]); + } else { + pdfFile = new File(DEFAULT_PDF_FILE_PATH); + } + + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + + final RequestBody pdfFileRequestBody = + RequestBody.create(pdfFile, MediaType.parse("application/pdf")); + RequestBody ocrRequestBody = + new MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("file", pdfFile.getName(), pdfFileRequestBody) + .addFormDataPart("output", "example_pdf-with-ocr-text_out") + .build(); + Request ocrRequest = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .url("https://api.pdfrest.com/pdf-with-ocr-text") + .post(ocrRequestBody) + .build(); + try { + OkHttpClient ocrClient = + new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build(); + + Response ocrResponse = ocrClient.newCall(ocrRequest).execute(); + + System.out.println("Response status code: " + ocrResponse.code()); + if (ocrResponse.body() != null) { + String ocrResponseString = ocrResponse.body().string(); + + JSONObject ocrJSON = new JSONObject(ocrResponseString); + if (ocrJSON.has("error")) { + System.out.println("Error during OCR call: " + ocrResponseString); + return; + } + + String ocrPDFID = ocrJSON.get("outputId").toString(); + System.out.println("Got the output ID: " + ocrPDFID); + + RequestBody extractRequestBody = + new MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("id", ocrPDFID) + .build(); + Request extractRequest = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .url("https://api.pdfrest.com/extracted-text") + .post(extractRequestBody) + .build(); + try { + OkHttpClient extractClient = + new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build(); + + Response extractResponse = extractClient.newCall(extractRequest).execute(); + + System.out.println("Response status code: " + extractResponse.code()); + if (extractResponse.body() != null) { + String extractResponseString = extractResponse.body().string(); + + JSONObject extractJSON = new JSONObject(extractResponseString); + if (extractJSON.has("error")) { + System.out.println("Error during text extraction call: " + extractResponseString); + return; + } + + System.out.println(extractJSON.getString("fullText")); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file diff --git a/JavaScript/Complex Flow Examples/ocr-with-extract-text.js b/JavaScript/Complex Flow Examples/ocr-with-extract-text.js new file mode 100644 index 0000000..17b053b --- /dev/null +++ b/JavaScript/Complex Flow Examples/ocr-with-extract-text.js @@ -0,0 +1,74 @@ +var axios = require("axios"); +var FormData = require("form-data"); +var fs = require("fs"); + +/* In this sample, we will show how to convert a scanned document into a PDF with +* searchable and extractable text using Optical Character Recognition (OCR), and then +* extract that text from the newly created document. +* +* First, we will upload a scanned PDF to the /pdf-with-ocr-text route and capture the +* output ID. Then, we will send the output ID to the /extracted-text route, which will +* return the newly added text. +*/ + +var apiKey = "xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; // Replace with your API key + +var ocrData = new FormData(); +ocrData.append("file", fs.createReadStream("/path/to/file.pdf"), "file_name.pdf"); +ocrData.append("output", "example_pdf-with-ocr-text_out"); + +var ocrConfig = { + method: "post", + maxBodyLength: Infinity, + url: "https://api.pdfrest.com/pdf-with-ocr-text", + headers: { + "Api-Key": apiKey, + ...ocrData.getHeaders(), + }, + data: ocrData, +}; + +console.log("Sending POST request to OCR endpoint..."); +axios(ocrConfig) + .then(function (response) { + console.log("Response status code: " + response.status); + + if (response.status === 200) { + var ocrPDFID = response.data.outputId; + console.log("Got the output ID: " + ocrPDFID); + + var extractData = new FormData(); + extractData.append("id", ocrPDFID); + + var extractConfig = { + method: "post", + maxBodyLength: Infinity, + url: "https://api.pdfrest.com/extracted-text", + headers: { + "Api-Key": apiKey, + ...extractData.getHeaders(), + }, + data: extractData, + }; + + console.log("Sending POST request to extract text endpoint..."); + axios(extractConfig) + .then(function (extractResponse) { + console.log("Response status code: " + extractResponse.status); + + if (extractResponse.status === 200) { + console.log(extractResponse.data.fullText); + } else { + console.log(extractResponse.data); + } + }) + .catch(function (error) { + console.log(error.response ? error.response.data : error.message); + }); + } else { + console.log(response.data); + } + }) + .catch(function (error) { + console.log(error.response ? error.response.data : error.message); + }); \ No newline at end of file diff --git a/PHP/Complex Flow Examples/ocr-with-extract-text.php b/PHP/Complex Flow Examples/ocr-with-extract-text.php new file mode 100644 index 0000000..4eb612b --- /dev/null +++ b/PHP/Complex Flow Examples/ocr-with-extract-text.php @@ -0,0 +1,72 @@ + 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' // Replace with your API key +]; + +// Upload PDF for OCR +$pdfToOCROptions = [ + 'multipart' => [ + [ + 'name' => 'file', + 'contents' => Utils::tryFopen('/path/to/file.pdf', 'r'), + 'filename' => 'file.pdf', + 'headers' => [ + 'Content-Type' => 'application/pdf' + ] + ], + [ + 'name' => 'output', + 'contents' => 'example_pdf-with-ocr-text_out' + ] + ] +]; + +$pdfToOCRRequest = new Request('POST', 'https://api.pdfrest.com/pdf-with-ocr-text', $headers); + +echo "Sending POST request to OCR endpoint...\n"; +$pdfToOCRResponse = $client->sendAsync($pdfToOCRRequest, $pdfToOCROptions)->wait(); + +echo "Response status code: " . $pdfToOCRResponse->getStatusCode() . "\n"; + +$ocrPDFID = json_decode($pdfToOCRResponse->getBody())->outputId; +echo "Got the output ID: " . $ocrPDFID . "\n"; + +// Extract text from OCR'd PDF +$extractTextOptions = [ + 'multipart' => [ + [ + 'name' => 'id', + 'contents' => $ocrPDFID + ] + ] +]; + +$extractTextRequest = new Request('POST', 'https://api.pdfrest.com/extracted-text', $headers); + +echo "Sending POST request to extract text endpoint...\n"; +$extractTextResponse = $client->sendAsync($extractTextRequest, $extractTextOptions)->wait(); + +echo "Response status code: " . $extractTextResponse->getStatusCode() . "\n"; + +$fullText = json_decode($extractTextResponse->getBody())->fullText; +echo $fullText . "\n"; + +?> \ No newline at end of file diff --git a/Python/Complex Flow Examples/ocr-with-extract-text.py b/Python/Complex Flow Examples/ocr-with-extract-text.py new file mode 100644 index 0000000..3f80b82 --- /dev/null +++ b/Python/Complex Flow Examples/ocr-with-extract-text.py @@ -0,0 +1,67 @@ +from requests_toolbelt import MultipartEncoder +import requests + + +# In this sample, we will show how to convert a scanned document into a PDF with +# searchable and extractable text using Optical Character Recognition (OCR), and then +# extract that text from the newly created document. +# +# First, we will upload a scanned PDF to the /pdf-with-ocr-text route and capture the +# output ID. Then, we will send the output ID to the /extracted-text route, which will +# return the newly added text. + +api_key = 'xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' # place your api key here + +ocr_endpoint_url = 'https://api.pdfrest.com/pdf-with-ocr-text' +mp_encoder_pdf = MultipartEncoder( + fields={ + 'file': ('file_name.pdf', open('/path/to/file.pdf', 'rb'), 'application/pdf'), + 'output': 'example_pdf-with-ocr-text_out', + } +) + +image_headers = { + 'Accept': 'application/json', + 'Content-Type': mp_encoder_pdf.content_type, + 'Api-Key': api_key +} + +print("Sending POST request to OCR endpoint...") +response = requests.post(ocr_endpoint_url, data=mp_encoder_pdf, headers=image_headers) + +print("Response status code: " + str(response.status_code)) + +if response.ok: + response_json = response.json() + ocr_pdf_id = response_json["outputId"] + print("Got the output ID: " + ocr_pdf_id) + + extract_endpoint_url = 'https://api.pdfrest.com/extracted-text' + + mp_encoder_extract_text = MultipartEncoder( + fields={ + 'id': ocr_pdf_id + } + ) + + extract_text_headers = { + 'Accept': 'application/json', + 'Content-Type': mp_encoder_extract_text.content_type, + 'Api-Key': api_key + } + + print("Sending POST request to extract text endpoint...") + extract_response = requests.post(extract_endpoint_url, data=mp_encoder_extract_text, headers=extract_text_headers) + + print("Response status code: " + str(extract_response.status_code)) + + if extract_response.ok: + extract_json = extract_response.json() + print(extract_json["fullText"]) + + else: + print(extract_response.text) + + +else: + print(response.text) \ No newline at end of file diff --git a/cURL/Complex Flow Examples/ocr-with-extract-text.sh b/cURL/Complex Flow Examples/ocr-with-extract-text.sh new file mode 100644 index 0000000..ccee7c0 --- /dev/null +++ b/cURL/Complex Flow Examples/ocr-with-extract-text.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +# In this sample, we will show how to convert a scanned document into a PDF with +# searchable and extractable text using Optical Character Recognition (OCR), and then +# extract that text from the newly created document. +# +# First, we will upload a scanned PDF to the /pdf-with-ocr-text route and capture the +# output ID. Then, we will send the output ID to the /extracted-text route, which will +# return the newly added text. + +API_KEY="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" # Replace with your API key + +# Upload PDF for OCR +OCR_PDF_ID=$(curl -s -X POST "https://api.pdfrest.com/pdf-with-ocr-text" \ + -H "Accept: application/json" \ + -H "Content-Type: multipart/form-data" \ + -H "Api-Key: $API_KEY" \ + -F "file=@/path/to/file.pdf" \ + -F "output=example_pdf-with-ocr-text_out"\ + | jq -r '.outputId') + + +# Extract text from OCR'd PDF +EXTRACT_TEXT_RESPONSE=$(curl -s -X POST "https://api.pdfrest.com/extracted-text" \ + -H "Accept: application/json" \ + -H "Content-Type: multipart/form-data" \ + -H "Api-Key: $API_KEY" \ + -F "id=$OCR_PDF_ID") + + +FULL_TEXT=$(echo $EXTRACT_TEXT_RESPONSE | jq -r '.fullText') +echo "Extracted text: $FULL_TEXT" \ No newline at end of file From 93aa3ae453abdaeda33b93040fd2d4acd552cc84 Mon Sep 17 00:00:00 2001 From: datalogics-jacksonm Date: Mon, 29 Jul 2024 10:29:36 -0500 Subject: [PATCH 2/4] Various fixes to C# example --- DotNET/Complex Flow Examples/ocr-with-extract-text.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DotNET/Complex Flow Examples/ocr-with-extract-text.cs b/DotNET/Complex Flow Examples/ocr-with-extract-text.cs index e9daf52..efb60d5 100644 --- a/DotNET/Complex Flow Examples/ocr-with-extract-text.cs +++ b/DotNET/Complex Flow Examples/ocr-with-extract-text.cs @@ -5,7 +5,7 @@ using System.Text; using System.Threading.Tasks; -class Program +class OcrWithExtractText { private static readonly string apiKey = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; // Your API key here @@ -24,7 +24,6 @@ static async Task Main(string[] args) var pdfByteArrayContent = new ByteArrayContent(pdfByteArray); ocrMultipartContent.Add(pdfByteArrayContent, "file", "file.pdf"); pdfByteArrayContent.Headers.TryAddWithoutValidation("Content-Type", "application/pdf"); - ocrMultipartContent.Add(new StringContent("example_pdf-with-ocr-text_out"), "output"); ocrRequest.Content = ocrMultipartContent; var ocrResponse = await httpClient.SendAsync(ocrRequest); @@ -43,7 +42,8 @@ static async Task Main(string[] args) extractTextRequest.Headers.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("application/json")); var extractTextMultipartContent = new MultipartFormDataContent(); - extractTextMultipartContent.Add(new StringContent(ocrPDFID), "id"); + var byteArrayOption = new ByteArrayContent(Encoding.UTF8.GetBytes(ocrPDFID)); + extractTextMultipartContent.Add(byteArrayOption, "id"); extractTextRequest.Content = extractTextMultipartContent; var extractTextResponse = await httpClient.SendAsync(extractTextRequest); From 7a7cacc3c36866b536ad0c61c96140c8509e7488 Mon Sep 17 00:00:00 2001 From: datalogics-jacksonm Date: Mon, 29 Jul 2024 10:30:22 -0500 Subject: [PATCH 3/4] Make script executable --- cURL/Complex Flow Examples/ocr-with-extract-text.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 cURL/Complex Flow Examples/ocr-with-extract-text.sh diff --git a/cURL/Complex Flow Examples/ocr-with-extract-text.sh b/cURL/Complex Flow Examples/ocr-with-extract-text.sh old mode 100644 new mode 100755 From 27a85eadacf25c870814d326ff6e3893fa4ac12e Mon Sep 17 00:00:00 2001 From: datalogics-jacksonm Date: Mon, 29 Jul 2024 10:31:29 -0500 Subject: [PATCH 4/4] Run `mvn spotless:apply` --- .../OcrWithExtractText.java | 162 +++++++++--------- 1 file changed, 81 insertions(+), 81 deletions(-) diff --git a/Java/Complex Flow Examples/OcrWithExtractText.java b/Java/Complex Flow Examples/OcrWithExtractText.java index e2558b8..dbacd17 100644 --- a/Java/Complex Flow Examples/OcrWithExtractText.java +++ b/Java/Complex Flow Examples/OcrWithExtractText.java @@ -16,91 +16,91 @@ public class OcrWithExtractText { - // Specify the path to your PDF file here, or as the first argument when running the program. - private static final String DEFAULT_PDF_FILE_PATH = "/path/to/file.pdf"; - - // Specify your API key here, or in the environment variable PDFREST_API_KEY. - // You can also put the environment variable in a .env file. - private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; - - public static void main(String[] args) { - File pdfFile; - if (args.length > 0) { - pdfFile = new File(args[0]); - } else { - pdfFile = new File(DEFAULT_PDF_FILE_PATH); + // Specify the path to your PDF file here, or as the first argument when running the program. + private static final String DEFAULT_PDF_FILE_PATH = "/path/to/file.pdf"; + + // Specify your API key here, or in the environment variable PDFREST_API_KEY. + // You can also put the environment variable in a .env file. + private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; + + public static void main(String[] args) { + File pdfFile; + if (args.length > 0) { + pdfFile = new File(args[0]); + } else { + pdfFile = new File(DEFAULT_PDF_FILE_PATH); + } + + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + + final RequestBody pdfFileRequestBody = + RequestBody.create(pdfFile, MediaType.parse("application/pdf")); + RequestBody ocrRequestBody = + new MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("file", pdfFile.getName(), pdfFileRequestBody) + .addFormDataPart("output", "example_pdf-with-ocr-text_out") + .build(); + Request ocrRequest = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .url("https://api.pdfrest.com/pdf-with-ocr-text") + .post(ocrRequestBody) + .build(); + try { + OkHttpClient ocrClient = + new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build(); + + Response ocrResponse = ocrClient.newCall(ocrRequest).execute(); + + System.out.println("Response status code: " + ocrResponse.code()); + if (ocrResponse.body() != null) { + String ocrResponseString = ocrResponse.body().string(); + + JSONObject ocrJSON = new JSONObject(ocrResponseString); + if (ocrJSON.has("error")) { + System.out.println("Error during OCR call: " + ocrResponseString); + return; } - final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); - - final RequestBody pdfFileRequestBody = - RequestBody.create(pdfFile, MediaType.parse("application/pdf")); - RequestBody ocrRequestBody = - new MultipartBody.Builder() - .setType(MultipartBody.FORM) - .addFormDataPart("file", pdfFile.getName(), pdfFileRequestBody) - .addFormDataPart("output", "example_pdf-with-ocr-text_out") - .build(); - Request ocrRequest = - new Request.Builder() - .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) - .url("https://api.pdfrest.com/pdf-with-ocr-text") - .post(ocrRequestBody) - .build(); + String ocrPDFID = ocrJSON.get("outputId").toString(); + System.out.println("Got the output ID: " + ocrPDFID); + + RequestBody extractRequestBody = + new MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("id", ocrPDFID) + .build(); + Request extractRequest = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .url("https://api.pdfrest.com/extracted-text") + .post(extractRequestBody) + .build(); try { - OkHttpClient ocrClient = - new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build(); - - Response ocrResponse = ocrClient.newCall(ocrRequest).execute(); - - System.out.println("Response status code: " + ocrResponse.code()); - if (ocrResponse.body() != null) { - String ocrResponseString = ocrResponse.body().string(); - - JSONObject ocrJSON = new JSONObject(ocrResponseString); - if (ocrJSON.has("error")) { - System.out.println("Error during OCR call: " + ocrResponseString); - return; - } - - String ocrPDFID = ocrJSON.get("outputId").toString(); - System.out.println("Got the output ID: " + ocrPDFID); - - RequestBody extractRequestBody = - new MultipartBody.Builder() - .setType(MultipartBody.FORM) - .addFormDataPart("id", ocrPDFID) - .build(); - Request extractRequest = - new Request.Builder() - .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) - .url("https://api.pdfrest.com/extracted-text") - .post(extractRequestBody) - .build(); - try { - OkHttpClient extractClient = - new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build(); - - Response extractResponse = extractClient.newCall(extractRequest).execute(); - - System.out.println("Response status code: " + extractResponse.code()); - if (extractResponse.body() != null) { - String extractResponseString = extractResponse.body().string(); - - JSONObject extractJSON = new JSONObject(extractResponseString); - if (extractJSON.has("error")) { - System.out.println("Error during text extraction call: " + extractResponseString); - return; - } - - System.out.println(extractJSON.getString("fullText")); - } - } catch (IOException e) { - throw new RuntimeException(e); - } + OkHttpClient extractClient = + new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build(); + + Response extractResponse = extractClient.newCall(extractRequest).execute(); + + System.out.println("Response status code: " + extractResponse.code()); + if (extractResponse.body() != null) { + String extractResponseString = extractResponse.body().string(); + + JSONObject extractJSON = new JSONObject(extractResponseString); + if (extractJSON.has("error")) { + System.out.println("Error during text extraction call: " + extractResponseString); + return; } + + System.out.println(extractJSON.getString("fullText")); + } } catch (IOException e) { - throw new RuntimeException(e); + throw new RuntimeException(e); } + } + } catch (IOException e) { + throw new RuntimeException(e); } -} \ No newline at end of file + } +}