Skip to content

Commit cd4c59b

Browse files
committed
Update model training data
1 parent 8195978 commit cd4c59b

25 files changed

+29586
-8469
lines changed

Sample.ipynb

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
"source": [
5757
"import os\n",
5858
"import datetime\n",
59-
"from azure.ai.formrecognizer import (DocumentModelAdministrationClient, ModelBuildMode, DocumentAnalysisClient)\n",
59+
"from azure.ai.formrecognizer import (DocumentModelAdministrationClient, ModelBuildMode, DocumentAnalysisClient, AnalyzeResult)\n",
6060
"from azure.core.credentials import AzureKeyCredential\n",
6161
"from azure.storage.blob import BlobServiceClient, ContainerSasPermissions, generate_container_sas\n",
6262
"from dotenv import dotenv_values\n",
@@ -92,7 +92,7 @@
9292
" with open(f\"{root}/{file}\", \"rb\") as data:\n",
9393
" blob_client.upload_blob(data, overwrite=True)\n",
9494
"\n",
95-
" start_time = datetime.datetime.now(datetime.timezone.utc)\n",
95+
" start_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=5)\n",
9696
" expiry_time = start_time + datetime.timedelta(days=1)\n",
9797
"\n",
9898
" sas_token = generate_container_sas(\n",
@@ -123,8 +123,25 @@
123123
" def run_layout_analysis(self, file_path):\n",
124124
" with open(file_path, \"rb\") as f:\n",
125125
" poller = self.document_analysis_client.begin_analyze_document(model_id=self.model.model_id, document=f)\n",
126-
" result = poller.result()\n",
127-
" return result"
126+
" self.analysis_result = poller.result()\n",
127+
" return self.analysis_to_json(self.analysis_result)\n",
128+
"\n",
129+
" def analysis_to_json(self, analysis_result: AnalyzeResult):\n",
130+
" return {\n",
131+
" \"status\": \"succeeded\",\n",
132+
" \"createdDateTime\": datetime.datetime.now().isoformat(),\n",
133+
" \"lastUpdatedDateTime\": datetime.datetime.now().isoformat(),\n",
134+
" \"analyzeResult\": analysis_result.to_dict()\n",
135+
" }"
136+
]
137+
},
138+
{
139+
"cell_type": "code",
140+
"execution_count": null,
141+
"metadata": {},
142+
"outputs": [],
143+
"source": [
144+
"model_training_client = ModelTrainingClient(config)"
128145
]
129146
},
130147
{
@@ -133,7 +150,6 @@
133150
"metadata": {},
134151
"outputs": [],
135152
"source": [
136-
"model_training_client = ModelTrainingClient(config)\n",
137153
"model_training_client.upload_training_data(f\"{working_dir}/model_training\")\n",
138154
"invoice_model = model_training_client.create_model(\"invoice_model\")"
139155
]
@@ -288,7 +304,7 @@
288304
" layout_analysis = model_training_client.run_layout_analysis(image_path_ref)\n",
289305
" layout_analysis_path_ref = os.path.join(pdf_dir, f'{pdf_file_name}.ocr.json')\n",
290306
" with open(layout_analysis_path_ref, 'w') as f:\n",
291-
" json.dump(layout_analysis.to_dict(), f)\n",
307+
" json.dump(layout_analysis, f)\n",
292308
" \n",
293309
" canvases[i].image_path_ref = image_path_ref\n",
294310
" canvases[i].page_ref = page_ref\n",

images/Invoice_6.pdf.page_1.jpg

44.3 KB
Loading

model_training/Invoice_1.pdf

56.3 KB
Binary file not shown.

0 commit comments

Comments
 (0)