From cfc5b7cf585b26b0df789736a48aabe332a79664 Mon Sep 17 00:00:00 2001
From: datalogics-jacksonm <jacksonm@datalogics.com>
Date: Fri, 26 Jul 2024 12:19:39 -0500
Subject: [PATCH 1/4] Create new examples for OCR -> Extract Text workflow

Adds new examples to show how to pair the new OCR tool with the Extract
Text tool.
---
 .../ocr-with-extract-text.cs                  |  62 ++++++++++
 .../OcrWithExtractText.java                   | 106 ++++++++++++++++++
 .../ocr-with-extract-text.js                  |  74 ++++++++++++
 .../ocr-with-extract-text.php                 |  72 ++++++++++++
 .../ocr-with-extract-text.py                  |  67 +++++++++++
 .../ocr-with-extract-text.sh                  |  32 ++++++
 6 files changed, 413 insertions(+)
 create mode 100644 DotNET/Complex Flow Examples/ocr-with-extract-text.cs
 create mode 100644 Java/Complex Flow Examples/OcrWithExtractText.java
 create mode 100644 JavaScript/Complex Flow Examples/ocr-with-extract-text.js
 create mode 100644 PHP/Complex Flow Examples/ocr-with-extract-text.php
 create mode 100644 Python/Complex Flow Examples/ocr-with-extract-text.py
 create mode 100644 cURL/Complex Flow Examples/ocr-with-extract-text.sh

diff --git a/DotNET/Complex Flow Examples/ocr-with-extract-text.cs b/DotNET/Complex Flow Examples/ocr-with-extract-text.cs
new file mode 100644
index 0000000..e9daf52
--- /dev/null
+++ b/DotNET/Complex Flow Examples/ocr-with-extract-text.cs	
@@ -0,0 +1,62 @@
+using Newtonsoft.Json.Linq;
+using System;
+using System.IO;
+using System.Net.Http;
+using System.Text;
+using System.Threading.Tasks;
+
+class Program
+{
+    private static readonly string apiKey = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; // Your API key here
+
+    static async Task Main(string[] args)
+    {
+        using (var httpClient = new HttpClient { BaseAddress = new Uri("https://api.pdfrest.com") })
+        {
+            // Upload PDF for OCR
+            using var ocrRequest = new HttpRequestMessage(HttpMethod.Post, "pdf-with-ocr-text");
+
+            ocrRequest.Headers.TryAddWithoutValidation("Api-Key", apiKey);
+            ocrRequest.Headers.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("application/json"));
+            var ocrMultipartContent = new MultipartFormDataContent();
+
+            var pdfByteArray = File.ReadAllBytes("/path/to/file.pdf");
+            var pdfByteArrayContent = new ByteArrayContent(pdfByteArray);
+            ocrMultipartContent.Add(pdfByteArrayContent, "file", "file.pdf");
+            pdfByteArrayContent.Headers.TryAddWithoutValidation("Content-Type", "application/pdf");
+            ocrMultipartContent.Add(new StringContent("example_pdf-with-ocr-text_out"), "output");
+
+            ocrRequest.Content = ocrMultipartContent;
+            var ocrResponse = await httpClient.SendAsync(ocrRequest);
+
+            var ocrResult = await ocrResponse.Content.ReadAsStringAsync();
+            Console.WriteLine("OCR response received.");
+            Console.WriteLine(ocrResult);
+
+            dynamic ocrResponseData = JObject.Parse(ocrResult);
+            string ocrPDFID = ocrResponseData.outputId;
+
+            // Extract text from OCR'd PDF
+            using var extractTextRequest = new HttpRequestMessage(HttpMethod.Post, "extracted-text");
+
+            extractTextRequest.Headers.TryAddWithoutValidation("Api-Key", apiKey);
+            extractTextRequest.Headers.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("application/json"));
+            var extractTextMultipartContent = new MultipartFormDataContent();
+
+            extractTextMultipartContent.Add(new StringContent(ocrPDFID), "id");
+
+            extractTextRequest.Content = extractTextMultipartContent;
+            var extractTextResponse = await httpClient.SendAsync(extractTextRequest);
+
+            var extractTextResult = await extractTextResponse.Content.ReadAsStringAsync();
+            Console.WriteLine("Extract text response received.");
+            Console.WriteLine(extractTextResult);
+
+            dynamic extractTextResponseData = JObject.Parse(extractTextResult);
+            string fullText = extractTextResponseData.fullText;
+
+            Console.WriteLine("Extracted text:");
+            Console.WriteLine(fullText);
+        }
+    }
+}
\ No newline at end of file
diff --git a/Java/Complex Flow Examples/OcrWithExtractText.java b/Java/Complex Flow Examples/OcrWithExtractText.java
new file mode 100644
index 0000000..e2558b8
--- /dev/null
+++ b/Java/Complex Flow Examples/OcrWithExtractText.java	
@@ -0,0 +1,106 @@
+import io.github.cdimascio.dotenv.Dotenv;
+import java.io.File;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+import okhttp3.*;
+import org.json.JSONObject;
+
+/* In this sample, we will show how to convert a scanned document into a PDF with
+ * searchable and extractable text using Optical Character Recognition (OCR), and then
+ * extract that text from the newly created document.
+ *
+ * First, we will upload a scanned PDF to the /pdf-with-ocr-text route and capture the
+ * output ID. Then, we will send the output ID to the /extracted-text route, which will
+ * return the newly added text.
+ */
+
+public class OcrWithExtractText {
+
+    // Specify the path to your PDF file here, or as the first argument when running the program.
+    private static final String DEFAULT_PDF_FILE_PATH = "/path/to/file.pdf";
+
+    // Specify your API key here, or in the environment variable PDFREST_API_KEY.
+    // You can also put the environment variable in a .env file.
+    private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx";
+
+    public static void main(String[] args) {
+        File pdfFile;
+        if (args.length > 0) {
+            pdfFile = new File(args[0]);
+        } else {
+            pdfFile = new File(DEFAULT_PDF_FILE_PATH);
+        }
+
+        final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load();
+
+        final RequestBody pdfFileRequestBody =
+                RequestBody.create(pdfFile, MediaType.parse("application/pdf"));
+        RequestBody ocrRequestBody =
+                new MultipartBody.Builder()
+                        .setType(MultipartBody.FORM)
+                        .addFormDataPart("file", pdfFile.getName(), pdfFileRequestBody)
+                        .addFormDataPart("output", "example_pdf-with-ocr-text_out")
+                        .build();
+        Request ocrRequest =
+                new Request.Builder()
+                        .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY))
+                        .url("https://api.pdfrest.com/pdf-with-ocr-text")
+                        .post(ocrRequestBody)
+                        .build();
+        try {
+            OkHttpClient ocrClient =
+                    new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build();
+
+            Response ocrResponse = ocrClient.newCall(ocrRequest).execute();
+
+            System.out.println("Response status code: " + ocrResponse.code());
+            if (ocrResponse.body() != null) {
+                String ocrResponseString = ocrResponse.body().string();
+
+                JSONObject ocrJSON = new JSONObject(ocrResponseString);
+                if (ocrJSON.has("error")) {
+                    System.out.println("Error during OCR call: " + ocrResponseString);
+                    return;
+                }
+
+                String ocrPDFID = ocrJSON.get("outputId").toString();
+                System.out.println("Got the output ID: " + ocrPDFID);
+
+                RequestBody extractRequestBody =
+                        new MultipartBody.Builder()
+                                .setType(MultipartBody.FORM)
+                                .addFormDataPart("id", ocrPDFID)
+                                .build();
+                Request extractRequest =
+                        new Request.Builder()
+                                .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY))
+                                .url("https://api.pdfrest.com/extracted-text")
+                                .post(extractRequestBody)
+                                .build();
+                try {
+                    OkHttpClient extractClient =
+                            new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build();
+
+                    Response extractResponse = extractClient.newCall(extractRequest).execute();
+
+                    System.out.println("Response status code: " + extractResponse.code());
+                    if (extractResponse.body() != null) {
+                        String extractResponseString = extractResponse.body().string();
+
+                        JSONObject extractJSON = new JSONObject(extractResponseString);
+                        if (extractJSON.has("error")) {
+                            System.out.println("Error during text extraction call: " + extractResponseString);
+                            return;
+                        }
+
+                        System.out.println(extractJSON.getString("fullText"));
+                    }
+                } catch (IOException e) {
+                    throw new RuntimeException(e);
+                }
+            }
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+}
\ No newline at end of file
diff --git a/JavaScript/Complex Flow Examples/ocr-with-extract-text.js b/JavaScript/Complex Flow Examples/ocr-with-extract-text.js
new file mode 100644
index 0000000..17b053b
--- /dev/null
+++ b/JavaScript/Complex Flow Examples/ocr-with-extract-text.js	
@@ -0,0 +1,74 @@
+var axios = require("axios");
+var FormData = require("form-data");
+var fs = require("fs");
+
+/* In this sample, we will show how to convert a scanned document into a PDF with
+* searchable and extractable text using Optical Character Recognition (OCR), and then
+* extract that text from the newly created document.
+*
+* First, we will upload a scanned PDF to the /pdf-with-ocr-text route and capture the
+* output ID. Then, we will send the output ID to the /extracted-text route, which will
+* return the newly added text.
+*/
+
+var apiKey = "xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; // Replace with your API key
+
+var ocrData = new FormData();
+ocrData.append("file", fs.createReadStream("/path/to/file.pdf"), "file_name.pdf");
+ocrData.append("output", "example_pdf-with-ocr-text_out");
+
+var ocrConfig = {
+  method: "post",
+  maxBodyLength: Infinity,
+  url: "https://api.pdfrest.com/pdf-with-ocr-text",
+  headers: {
+    "Api-Key": apiKey,
+    ...ocrData.getHeaders(),
+  },
+  data: ocrData,
+};
+
+console.log("Sending POST request to OCR endpoint...");
+axios(ocrConfig)
+  .then(function (response) {
+    console.log("Response status code: " + response.status);
+
+    if (response.status === 200) {
+      var ocrPDFID = response.data.outputId;
+      console.log("Got the output ID: " + ocrPDFID);
+
+      var extractData = new FormData();
+      extractData.append("id", ocrPDFID);
+
+      var extractConfig = {
+        method: "post",
+        maxBodyLength: Infinity,
+        url: "https://api.pdfrest.com/extracted-text",
+        headers: {
+          "Api-Key": apiKey,
+          ...extractData.getHeaders(),
+        },
+        data: extractData,
+      };
+
+      console.log("Sending POST request to extract text endpoint...");
+      axios(extractConfig)
+        .then(function (extractResponse) {
+          console.log("Response status code: " + extractResponse.status);
+
+          if (extractResponse.status === 200) {
+            console.log(extractResponse.data.fullText);
+          } else {
+            console.log(extractResponse.data);
+          }
+        })
+        .catch(function (error) {
+          console.log(error.response ? error.response.data : error.message);
+        });
+    } else {
+      console.log(response.data);
+    }
+  })
+  .catch(function (error) {
+    console.log(error.response ? error.response.data : error.message);
+  });
\ No newline at end of file
diff --git a/PHP/Complex Flow Examples/ocr-with-extract-text.php b/PHP/Complex Flow Examples/ocr-with-extract-text.php
new file mode 100644
index 0000000..4eb612b
--- /dev/null
+++ b/PHP/Complex Flow Examples/ocr-with-extract-text.php	
@@ -0,0 +1,72 @@
+<?php
+
+require 'vendor/autoload.php';
+
+use GuzzleHttp\Client;
+use GuzzleHttp\Psr7\Request;
+use GuzzleHttp\Psr7\Utils;
+
+/* In this sample, we will show how to convert a scanned document into a PDF with
+ * searchable and extractable text using Optical Character Recognition (OCR), and then
+ * extract that text from the newly created document.
+ *
+ * First, we will upload a scanned PDF to the /pdf-with-ocr-text route and capture the
+ * output ID. Then, we will send the output ID to the /extracted-text route, which will
+ * return the newly added text.
+ */
+
+$client = new Client();
+
+$headers = [
+  'Api-Key' => 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' // Replace with your API key
+];
+
+// Upload PDF for OCR
+$pdfToOCROptions = [
+  'multipart' => [
+    [
+      'name' => 'file',
+      'contents' => Utils::tryFopen('/path/to/file.pdf', 'r'),
+      'filename' => 'file.pdf',
+      'headers' => [
+        'Content-Type' => 'application/pdf'
+      ]
+    ],
+    [
+      'name' => 'output',
+      'contents' => 'example_pdf-with-ocr-text_out'
+    ]
+  ]
+];
+
+$pdfToOCRRequest = new Request('POST', 'https://api.pdfrest.com/pdf-with-ocr-text', $headers);
+
+echo "Sending POST request to OCR endpoint...\n";
+$pdfToOCRResponse = $client->sendAsync($pdfToOCRRequest, $pdfToOCROptions)->wait();
+
+echo "Response status code: " . $pdfToOCRResponse->getStatusCode() . "\n";
+
+$ocrPDFID = json_decode($pdfToOCRResponse->getBody())->outputId;
+echo "Got the output ID: " . $ocrPDFID . "\n";
+
+// Extract text from OCR'd PDF
+$extractTextOptions = [
+  'multipart' => [
+    [
+      'name' => 'id',
+      'contents' => $ocrPDFID
+    ]
+  ]
+];
+
+$extractTextRequest = new Request('POST', 'https://api.pdfrest.com/extracted-text', $headers);
+
+echo "Sending POST request to extract text endpoint...\n";
+$extractTextResponse = $client->sendAsync($extractTextRequest, $extractTextOptions)->wait();
+
+echo "Response status code: " . $extractTextResponse->getStatusCode() . "\n";
+
+$fullText = json_decode($extractTextResponse->getBody())->fullText;
+echo $fullText . "\n";
+
+?>
\ No newline at end of file
diff --git a/Python/Complex Flow Examples/ocr-with-extract-text.py b/Python/Complex Flow Examples/ocr-with-extract-text.py
new file mode 100644
index 0000000..3f80b82
--- /dev/null
+++ b/Python/Complex Flow Examples/ocr-with-extract-text.py	
@@ -0,0 +1,67 @@
+from requests_toolbelt import MultipartEncoder
+import requests
+
+
+# In this sample, we will show how to convert a scanned document into a PDF with
+# searchable and extractable text using Optical Character Recognition (OCR), and then
+# extract that text from the newly created document.
+#
+# First, we will upload a scanned PDF to the /pdf-with-ocr-text route and capture the
+# output ID. Then, we will send the output ID to the /extracted-text route, which will
+# return the newly added text.
+
+api_key = 'xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' # place your api key here
+
+ocr_endpoint_url = 'https://api.pdfrest.com/pdf-with-ocr-text'
+mp_encoder_pdf = MultipartEncoder(
+    fields={
+        'file': ('file_name.pdf', open('/path/to/file.pdf', 'rb'), 'application/pdf'),
+        'output': 'example_pdf-with-ocr-text_out',
+    }
+)
+
+image_headers = {
+    'Accept': 'application/json',
+    'Content-Type': mp_encoder_pdf.content_type,
+    'Api-Key': api_key
+}
+
+print("Sending POST request to OCR endpoint...")
+response = requests.post(ocr_endpoint_url, data=mp_encoder_pdf, headers=image_headers)
+
+print("Response status code: " + str(response.status_code))
+
+if response.ok:
+    response_json = response.json()
+    ocr_pdf_id = response_json["outputId"]
+    print("Got the output ID: " + ocr_pdf_id)
+
+    extract_endpoint_url = 'https://api.pdfrest.com/extracted-text'
+
+    mp_encoder_extract_text = MultipartEncoder(
+        fields={
+            'id': ocr_pdf_id
+        }
+    )
+
+    extract_text_headers = {
+        'Accept': 'application/json',
+        'Content-Type': mp_encoder_extract_text.content_type,
+        'Api-Key': api_key
+    }
+
+    print("Sending POST request to extract text endpoint...")
+    extract_response = requests.post(extract_endpoint_url, data=mp_encoder_extract_text, headers=extract_text_headers)
+
+    print("Response status code: " + str(extract_response.status_code))
+
+    if extract_response.ok:
+        extract_json = extract_response.json()
+        print(extract_json["fullText"])
+
+    else:
+        print(extract_response.text)
+
+
+else:
+    print(response.text)
\ No newline at end of file
diff --git a/cURL/Complex Flow Examples/ocr-with-extract-text.sh b/cURL/Complex Flow Examples/ocr-with-extract-text.sh
new file mode 100644
index 0000000..ccee7c0
--- /dev/null
+++ b/cURL/Complex Flow Examples/ocr-with-extract-text.sh	
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+# In this sample, we will show how to convert a scanned document into a PDF with
+# searchable and extractable text using Optical Character Recognition (OCR), and then
+# extract that text from the newly created document.
+#
+# First, we will upload a scanned PDF to the /pdf-with-ocr-text route and capture the
+# output ID. Then, we will send the output ID to the /extracted-text route, which will
+# return the newly added text.
+
+API_KEY="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" # Replace with your API key
+
+# Upload PDF for OCR
+OCR_PDF_ID=$(curl -s -X POST "https://api.pdfrest.com/pdf-with-ocr-text" \
+  -H "Accept: application/json" \
+  -H "Content-Type: multipart/form-data" \
+  -H "Api-Key: $API_KEY" \
+  -F "file=@/path/to/file.pdf" \
+  -F "output=example_pdf-with-ocr-text_out"\
+  | jq -r '.outputId')
+
+
+# Extract text from OCR'd PDF
+EXTRACT_TEXT_RESPONSE=$(curl -s -X POST "https://api.pdfrest.com/extracted-text" \
+  -H "Accept: application/json" \
+  -H "Content-Type: multipart/form-data" \
+  -H "Api-Key: $API_KEY" \
+  -F "id=$OCR_PDF_ID")
+
+
+FULL_TEXT=$(echo $EXTRACT_TEXT_RESPONSE | jq -r '.fullText')
+echo "Extracted text: $FULL_TEXT"
\ No newline at end of file

From 93aa3ae453abdaeda33b93040fd2d4acd552cc84 Mon Sep 17 00:00:00 2001
From: datalogics-jacksonm <jacksonm@datalogics.com>
Date: Mon, 29 Jul 2024 10:29:36 -0500
Subject: [PATCH 2/4] Various fixes to C# example

---
 DotNET/Complex Flow Examples/ocr-with-extract-text.cs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/DotNET/Complex Flow Examples/ocr-with-extract-text.cs b/DotNET/Complex Flow Examples/ocr-with-extract-text.cs
index e9daf52..efb60d5 100644
--- a/DotNET/Complex Flow Examples/ocr-with-extract-text.cs	
+++ b/DotNET/Complex Flow Examples/ocr-with-extract-text.cs	
@@ -5,7 +5,7 @@
 using System.Text;
 using System.Threading.Tasks;
 
-class Program
+class OcrWithExtractText
 {
     private static readonly string apiKey = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; // Your API key here
 
@@ -24,7 +24,6 @@ static async Task Main(string[] args)
             var pdfByteArrayContent = new ByteArrayContent(pdfByteArray);
             ocrMultipartContent.Add(pdfByteArrayContent, "file", "file.pdf");
             pdfByteArrayContent.Headers.TryAddWithoutValidation("Content-Type", "application/pdf");
-            ocrMultipartContent.Add(new StringContent("example_pdf-with-ocr-text_out"), "output");
 
             ocrRequest.Content = ocrMultipartContent;
             var ocrResponse = await httpClient.SendAsync(ocrRequest);
@@ -43,7 +42,8 @@ static async Task Main(string[] args)
             extractTextRequest.Headers.Accept.Add(new System.Net.Http.Headers.MediaTypeWithQualityHeaderValue("application/json"));
             var extractTextMultipartContent = new MultipartFormDataContent();
 
-            extractTextMultipartContent.Add(new StringContent(ocrPDFID), "id");
+            var byteArrayOption = new ByteArrayContent(Encoding.UTF8.GetBytes(ocrPDFID));
+            extractTextMultipartContent.Add(byteArrayOption, "id");
 
             extractTextRequest.Content = extractTextMultipartContent;
             var extractTextResponse = await httpClient.SendAsync(extractTextRequest);

From 7a7cacc3c36866b536ad0c61c96140c8509e7488 Mon Sep 17 00:00:00 2001
From: datalogics-jacksonm <jacksonm@datalogics.com>
Date: Mon, 29 Jul 2024 10:30:22 -0500
Subject: [PATCH 3/4] Make script executable

---
 cURL/Complex Flow Examples/ocr-with-extract-text.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 cURL/Complex Flow Examples/ocr-with-extract-text.sh

diff --git a/cURL/Complex Flow Examples/ocr-with-extract-text.sh b/cURL/Complex Flow Examples/ocr-with-extract-text.sh
old mode 100644
new mode 100755

From 27a85eadacf25c870814d326ff6e3893fa4ac12e Mon Sep 17 00:00:00 2001
From: datalogics-jacksonm <jacksonm@datalogics.com>
Date: Mon, 29 Jul 2024 10:31:29 -0500
Subject: [PATCH 4/4] Run `mvn spotless:apply`

---
 .../OcrWithExtractText.java                   | 162 +++++++++---------
 1 file changed, 81 insertions(+), 81 deletions(-)

diff --git a/Java/Complex Flow Examples/OcrWithExtractText.java b/Java/Complex Flow Examples/OcrWithExtractText.java
index e2558b8..dbacd17 100644
--- a/Java/Complex Flow Examples/OcrWithExtractText.java	
+++ b/Java/Complex Flow Examples/OcrWithExtractText.java	
@@ -16,91 +16,91 @@
 
 public class OcrWithExtractText {
 
-    // Specify the path to your PDF file here, or as the first argument when running the program.
-    private static final String DEFAULT_PDF_FILE_PATH = "/path/to/file.pdf";
-
-    // Specify your API key here, or in the environment variable PDFREST_API_KEY.
-    // You can also put the environment variable in a .env file.
-    private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx";
-
-    public static void main(String[] args) {
-        File pdfFile;
-        if (args.length > 0) {
-            pdfFile = new File(args[0]);
-        } else {
-            pdfFile = new File(DEFAULT_PDF_FILE_PATH);
+  // Specify the path to your PDF file here, or as the first argument when running the program.
+  private static final String DEFAULT_PDF_FILE_PATH = "/path/to/file.pdf";
+
+  // Specify your API key here, or in the environment variable PDFREST_API_KEY.
+  // You can also put the environment variable in a .env file.
+  private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx";
+
+  public static void main(String[] args) {
+    File pdfFile;
+    if (args.length > 0) {
+      pdfFile = new File(args[0]);
+    } else {
+      pdfFile = new File(DEFAULT_PDF_FILE_PATH);
+    }
+
+    final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load();
+
+    final RequestBody pdfFileRequestBody =
+        RequestBody.create(pdfFile, MediaType.parse("application/pdf"));
+    RequestBody ocrRequestBody =
+        new MultipartBody.Builder()
+            .setType(MultipartBody.FORM)
+            .addFormDataPart("file", pdfFile.getName(), pdfFileRequestBody)
+            .addFormDataPart("output", "example_pdf-with-ocr-text_out")
+            .build();
+    Request ocrRequest =
+        new Request.Builder()
+            .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY))
+            .url("https://api.pdfrest.com/pdf-with-ocr-text")
+            .post(ocrRequestBody)
+            .build();
+    try {
+      OkHttpClient ocrClient =
+          new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build();
+
+      Response ocrResponse = ocrClient.newCall(ocrRequest).execute();
+
+      System.out.println("Response status code: " + ocrResponse.code());
+      if (ocrResponse.body() != null) {
+        String ocrResponseString = ocrResponse.body().string();
+
+        JSONObject ocrJSON = new JSONObject(ocrResponseString);
+        if (ocrJSON.has("error")) {
+          System.out.println("Error during OCR call: " + ocrResponseString);
+          return;
         }
 
-        final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load();
-
-        final RequestBody pdfFileRequestBody =
-                RequestBody.create(pdfFile, MediaType.parse("application/pdf"));
-        RequestBody ocrRequestBody =
-                new MultipartBody.Builder()
-                        .setType(MultipartBody.FORM)
-                        .addFormDataPart("file", pdfFile.getName(), pdfFileRequestBody)
-                        .addFormDataPart("output", "example_pdf-with-ocr-text_out")
-                        .build();
-        Request ocrRequest =
-                new Request.Builder()
-                        .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY))
-                        .url("https://api.pdfrest.com/pdf-with-ocr-text")
-                        .post(ocrRequestBody)
-                        .build();
+        String ocrPDFID = ocrJSON.get("outputId").toString();
+        System.out.println("Got the output ID: " + ocrPDFID);
+
+        RequestBody extractRequestBody =
+            new MultipartBody.Builder()
+                .setType(MultipartBody.FORM)
+                .addFormDataPart("id", ocrPDFID)
+                .build();
+        Request extractRequest =
+            new Request.Builder()
+                .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY))
+                .url("https://api.pdfrest.com/extracted-text")
+                .post(extractRequestBody)
+                .build();
         try {
-            OkHttpClient ocrClient =
-                    new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build();
-
-            Response ocrResponse = ocrClient.newCall(ocrRequest).execute();
-
-            System.out.println("Response status code: " + ocrResponse.code());
-            if (ocrResponse.body() != null) {
-                String ocrResponseString = ocrResponse.body().string();
-
-                JSONObject ocrJSON = new JSONObject(ocrResponseString);
-                if (ocrJSON.has("error")) {
-                    System.out.println("Error during OCR call: " + ocrResponseString);
-                    return;
-                }
-
-                String ocrPDFID = ocrJSON.get("outputId").toString();
-                System.out.println("Got the output ID: " + ocrPDFID);
-
-                RequestBody extractRequestBody =
-                        new MultipartBody.Builder()
-                                .setType(MultipartBody.FORM)
-                                .addFormDataPart("id", ocrPDFID)
-                                .build();
-                Request extractRequest =
-                        new Request.Builder()
-                                .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY))
-                                .url("https://api.pdfrest.com/extracted-text")
-                                .post(extractRequestBody)
-                                .build();
-                try {
-                    OkHttpClient extractClient =
-                            new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build();
-
-                    Response extractResponse = extractClient.newCall(extractRequest).execute();
-
-                    System.out.println("Response status code: " + extractResponse.code());
-                    if (extractResponse.body() != null) {
-                        String extractResponseString = extractResponse.body().string();
-
-                        JSONObject extractJSON = new JSONObject(extractResponseString);
-                        if (extractJSON.has("error")) {
-                            System.out.println("Error during text extraction call: " + extractResponseString);
-                            return;
-                        }
-
-                        System.out.println(extractJSON.getString("fullText"));
-                    }
-                } catch (IOException e) {
-                    throw new RuntimeException(e);
-                }
+          OkHttpClient extractClient =
+              new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build();
+
+          Response extractResponse = extractClient.newCall(extractRequest).execute();
+
+          System.out.println("Response status code: " + extractResponse.code());
+          if (extractResponse.body() != null) {
+            String extractResponseString = extractResponse.body().string();
+
+            JSONObject extractJSON = new JSONObject(extractResponseString);
+            if (extractJSON.has("error")) {
+              System.out.println("Error during text extraction call: " + extractResponseString);
+              return;
             }
+
+            System.out.println(extractJSON.getString("fullText"));
+          }
         } catch (IOException e) {
-            throw new RuntimeException(e);
+          throw new RuntimeException(e);
         }
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
-}
\ No newline at end of file
+  }
+}