Skip to content

Commit ea5d547

Browse files
committed
feat: add text mode option for run.py and update metadata retrieval logic
1 parent c4c53c7 commit ea5d547

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ apply_split: split.py clean
7777

7878
run: first_pages run.py
7979
@echo "Starting Flask development server..."
80-
$(PYTHON) run.py
80+
$(PYTHON) run.py --kwargs text_mode=plain
8181

8282
# Clean up pyc files and __pycache__ directories
8383
clean:

app/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,13 +140,15 @@ def save_colors():
140140
@app.route("/metadata-for-page/<int:page_num>")
141141
def metadata_for_page(page_num: int):
142142
view_selection = 0
143+
text_mode = flor.arg("text_mode", default="plain").strip().lower()
144+
assert text_mode in ("ocr", "plain")
143145
with warnings.catch_warnings():
144146
warnings.simplefilter("ignore", UserWarning)
145147
record = memoized_pdfs[memoized_pdfs["document_value"] == pdf_names[-1]][
146148
memoized_pdfs["page"] == page_num + 1
147149
].to_dict(orient="records")[0]
148150
if view_selection == 0:
149-
if "page_ocr" in record:
151+
if text_mode == "ocr":
150152
return jsonify([{f"ocr-page-{page_num+1}": record["page_ocr"]}])
151153
else:
152154
return jsonify([{f"txt-page-{page_num+1}": record["page_text"]}])

0 commit comments

Comments
 (0)