|
56 | 56 | "source": [
|
57 | 57 | "import os\n",
|
58 | 58 | "import datetime\n",
|
59 |
| - "from azure.ai.formrecognizer import (DocumentModelAdministrationClient, ModelBuildMode, DocumentAnalysisClient)\n", |
| 59 | + "from azure.ai.formrecognizer import (DocumentModelAdministrationClient, ModelBuildMode, DocumentAnalysisClient, AnalyzeResult)\n", |
60 | 60 | "from azure.core.credentials import AzureKeyCredential\n",
|
61 | 61 | "from azure.storage.blob import BlobServiceClient, ContainerSasPermissions, generate_container_sas\n",
|
62 | 62 | "from dotenv import dotenv_values\n",
|
|
92 | 92 | " with open(f\"{root}/{file}\", \"rb\") as data:\n",
|
93 | 93 | " blob_client.upload_blob(data, overwrite=True)\n",
|
94 | 94 | "\n",
|
95 |
| - " start_time = datetime.datetime.now(datetime.timezone.utc)\n", |
| 95 | + " start_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=5)\n", |
96 | 96 | " expiry_time = start_time + datetime.timedelta(days=1)\n",
|
97 | 97 | "\n",
|
98 | 98 | " sas_token = generate_container_sas(\n",
|
|
123 | 123 | " def run_layout_analysis(self, file_path):\n",
|
124 | 124 | " with open(file_path, \"rb\") as f:\n",
|
125 | 125 | " poller = self.document_analysis_client.begin_analyze_document(model_id=self.model.model_id, document=f)\n",
|
126 |
| - " result = poller.result()\n", |
127 |
| - " return result" |
| 126 | + " self.analysis_result = poller.result()\n", |
| 127 | + " return self.analysis_to_json(self.analysis_result)\n", |
| 128 | + "\n", |
| 129 | + " def analysis_to_json(self, analysis_result: AnalyzeResult):\n", |
| 130 | + " return {\n", |
| 131 | + " \"status\": \"succeeded\",\n", |
| 132 | + " \"createdDateTime\": datetime.datetime.now().isoformat(),\n", |
| 133 | + " \"lastUpdatedDateTime\": datetime.datetime.now().isoformat(),\n", |
| 134 | + " \"analyzeResult\": analysis_result.to_dict()\n", |
| 135 | + " }" |
| 136 | + ] |
| 137 | + }, |
| 138 | + { |
| 139 | + "cell_type": "code", |
| 140 | + "execution_count": null, |
| 141 | + "metadata": {}, |
| 142 | + "outputs": [], |
| 143 | + "source": [ |
| 144 | + "model_training_client = ModelTrainingClient(config)" |
128 | 145 | ]
|
129 | 146 | },
|
130 | 147 | {
|
|
133 | 150 | "metadata": {},
|
134 | 151 | "outputs": [],
|
135 | 152 | "source": [
|
136 |
| - "model_training_client = ModelTrainingClient(config)\n", |
137 | 153 | "model_training_client.upload_training_data(f\"{working_dir}/model_training\")\n",
|
138 | 154 | "invoice_model = model_training_client.create_model(\"invoice_model\")"
|
139 | 155 | ]
|
|
288 | 304 | " layout_analysis = model_training_client.run_layout_analysis(image_path_ref)\n",
|
289 | 305 | " layout_analysis_path_ref = os.path.join(pdf_dir, f'{pdf_file_name}.ocr.json')\n",
|
290 | 306 | " with open(layout_analysis_path_ref, 'w') as f:\n",
|
291 |
| - " json.dump(layout_analysis.to_dict(), f)\n", |
| 307 | + " json.dump(layout_analysis, f)\n", |
292 | 308 | " \n",
|
293 | 309 | " canvases[i].image_path_ref = image_path_ref\n",
|
294 | 310 | " canvases[i].page_ref = page_ref\n",
|
|
0 commit comments