Yayka · ThomasStuart · Jul 3, 2025 · Jul 4, 2025 · Jul 4, 2025
diff --git a/.gitignore b/.gitignore
@@ -1 +1,5 @@
-.env*
+.env*
+
+# Ignore virtual environment
+venv/
+env/
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,20 @@
+# 1. Use an official, lightweight Python base image
+FROM python:3.9-slim
+
+# 2. Set a working directory inside the container
+WORKDIR /app
+
+# 3. Copy the requirements file into the container
+COPY requirements.txt .
+
+# 4. Install the Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+
+# 5. Copy the rest of your application code into the container
+COPY . .
+
+# 6. Expose the port the app will run on
+EXPOSE 8080
+
+# 7. Define the command to start your Uvicorn server
+CMD ["uvicorn", "scan:app", "--host", "0.0.0.0", "--port", "8080"]
diff --git a/PiedraSantaSteakDinner.jpg b/PiedraSantaSteakDinner.jpg
diff --git a/README.md b/README.md
@@ -0,0 +1,45 @@
+# Receipt Reader
+This project is designed to take a receipt image and convert it into text tokens.
+
+## Getting Started
+Follow these instructions to set up the project on your local machine for development and testing.
+
+## Prerequisites
+- Python 3.8 or higher
+- pip (Python package installer)
+
+## Installation
+
+Create and Activate a Virtual Environment. It is highly recommended to use a virtual environment to manage project-specific dependencies.
+
+On macOS and Linux:
+
+### Create the environment
+```
+python3 -m venv venv
+```
+
+### Activate the environment
+```
+source venv/bin/activate
+```
+
+Your terminal prompt should now be prefixed with (venv), indicating that the environment is active.
+
+### Install Dependencies
+
+With your virtual environment active, install the required packages using the requirements.txt file:
+
+```
+pip install -r requirements.txt
+```
+
+This command will download and install all the necessary libraries for the project to run correctly.
+
+Usage
+To run the main script, execute the following command from the root directory of the project:
+```
+python scan.py
+```
+Make sure your virtual environment is active when you run the script.
+
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,61 @@
+annotated-types==0.7.0
+anyio==4.9.0
+certifi==2025.6.15
+charset-normalizer==3.4.2
+click==8.1.8
+dnspython==2.7.0
+email_validator==2.2.0
+exceptiongroup==1.3.0
+fastapi==0.115.14
+fastapi-cli==0.0.7
+filelock==3.18.0
+fsspec==2025.5.1
+h11==0.16.0
+hf-xet==1.1.5
+httpcore==1.0.9
+httptools==0.6.4
+httpx==0.28.1
+huggingface-hub==0.33.2
+idna==3.10
+itsdangerous==2.2.0
+Jinja2==3.1.6
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.2.1
+numpy==2.0.2
+orjson==3.10.18
+packaging==25.0
+pillow==11.3.0
+pydantic==2.11.7
+pydantic-extra-types==2.10.5
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+Pygments==2.19.2
+python-dotenv==1.1.1
+python-multipart==0.0.20
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.4
+rich==14.0.0
+rich-toolkit==0.14.8
+safetensors==0.5.3
+sentencepiece==0.2.0
+shellingham==1.5.4
+sniffio==1.3.1
+starlette==0.46.2
+sympy==1.14.0
+tokenizers==0.21.2
+torch==2.7.1
+tqdm==4.67.1
+transformers==4.53.0
+typer==0.16.0
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+ujson==5.10.0
+urllib3==2.5.0
+uvicorn==0.35.0
+uvloop==0.21.0
+watchfiles==1.1.0
+websockets==15.0.1
diff --git a/scan.py b/scan.py
@@ -1,41 +1,58 @@
+from fastapi import FastAPI, File, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
 from transformers import DonutProcessor, VisionEncoderDecoderModel
 import torch
 import re
 from PIL import Image
 import json
+import io
 
+# Initialize the app
+app = FastAPI()
 
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allows all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods
+    allow_headers=["*"],  # Allows all headers
+)
+
+# Load the model and processor (this happens only once when the server starts)
 processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2", use_fast=False)
 model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
 
-# define image
-
-image = Image.open("DOLLARAMA.png").convert("RGB")
-pixel_values = processor(image, return_tensors="pt").pixel_values
-print(pixel_values.shape)
-
-
-task_prompt = "<s_cord-v2>"
-decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt")["input_ids"]
-
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 
-outputs = model.generate(pixel_values.to(device),
-                               decoder_input_ids=decoder_input_ids.to(device),
-                               max_length=model.decoder.config.max_position_embeddings,
-                               early_stopping=True,
-                               pad_token_id=processor.tokenizer.pad_token_id,
-                               eos_token_id=processor.tokenizer.eos_token_id,
-                               use_cache=True,
-                               num_beams=1,
-                               bad_words_ids=[[processor.tokenizer.unk_token_id]],
-                               return_dict_in_generate=True,
-                               output_scores=True,)
-
-sequence = processor.batch_decode(outputs.sequences)[0]
-sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
-sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
-print(sequence)
-
-print(json.dumps(processor.token2json(sequence), indent=4))
+@app.post("/process-receipt/")
+async def process_receipt(file: UploadFile = File(...)):
+    # 1. Read the image from the uploaded file
+    image_data = await file.read()
+    image = Image.open(io.BytesIO(image_data)).convert("RGB")
+
+    # 2. Process the image using the Donut model (your script's logic)
+    pixel_values = processor(image, return_tensors="pt").pixel_values
+    task_prompt = "<s_cord-v2>"
+    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt")["input_ids"]
+
+    outputs = model.generate(
+        pixel_values.to(device),
+        decoder_input_ids=decoder_input_ids.to(device),
+        max_length=model.decoder.config.max_position_embeddings,
+        early_stopping=True,
+        pad_token_id=processor.tokenizer.pad_token_id,
+        eos_token_id=processor.tokenizer.eos_token_id,
+        use_cache=True,
+        num_beams=1,
+        bad_words_ids=[[processor.tokenizer.unk_token_id]],
+        return_dict_in_generate=True,
+        output_scores=True,
+    )
+
+    sequence = processor.batch_decode(outputs.sequences)[0]
+    sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+    sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()
+
+    # 3. Return the structured JSON
+    return processor.token2json(sequence)