From ac9b95471a8f0ca1f02e8e062e33b544cc17d39c Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Wed, 11 Dec 2024 07:16:23 +0100 Subject: [PATCH] added hap extra to pip install Signed-off-by: Maroun Touma --- transforms/universal/hap/hap_python.ipynb | 52 ++++++++--------------- 1 file changed, 18 insertions(+), 34 deletions(-) diff --git a/transforms/universal/hap/hap_python.ipynb b/transforms/universal/hap/hap_python.ipynb index 4fb1ad4c3d..1b7b954175 100644 --- a/transforms/universal/hap/hap_python.ipynb +++ b/transforms/universal/hap/hap_python.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "4a84e965-feeb-424d-9263-9f127e53a1aa", "metadata": {}, "outputs": [], @@ -24,7 +24,7 @@ "## This is here as a reference only\n", "# Users and application developers must use the right tag for the latest from pypi\n", "%pip install data-prep-toolkit\n", - "%pip install data-prep-toolkit-transforms==0.2.2.dev3" + "%pip install data-prep-toolkit-transforms[hap]" ] }, { @@ -48,22 +48,6 @@ "***** Import required classes and modules" ] }, - { - "cell_type": "code", - "execution_count": 1, - "id": "38aebf49-9460-4951-bb04-7045dec28690", - "metadata": {}, - "outputs": [], - "source": [ - "#import ast\n", - "#import os\n", - "#import sys\n", - "\n", - "#from data_processing.runtime.pure_python import PythonTransformLauncher\n", - "#from data_processing.utils import ParamsUtils\n", - "#from dpk_hap.transform_python import HAPPythonTransformConfiguration" - ] - }, { "cell_type": "code", "execution_count": 1, @@ -93,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "6a8ec5e4-1f52-4c61-9c9e-4618f9034b80", "metadata": {}, "outputs": [ @@ -101,14 +85,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "12:08:21 INFO - hap params are {'model_name_or_path': 'ibm-granite/granite-guardian-hap-38m', 'annotation_column': 'hap_score', 'doc_text_column': 'contents', 'inference_engine': 'CPU', 'max_length': 512, 'batch_size': 128} \n", - "12:08:21 INFO - pipeline id pipeline_id\n", - "12:08:21 INFO - code location None\n", - "12:08:21 INFO - data factory data_ is using local data access: input_folder - test-data/input output_folder - output\n", - "12:08:21 INFO - data factory data_ max_files -1, n_sample -1\n", - "12:08:21 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n", - "12:08:21 INFO - orchestrator hap started at 2024-12-10 12:08:21\n", - "12:08:21 INFO - Number of files is 1, source profile {'max_file_size': 0.10423946380615234, 'min_file_size': 0.10423946380615234, 'total_file_size': 0.10423946380615234}\n" + "07:12:05 INFO - hap params are {'model_name_or_path': 'ibm-granite/granite-guardian-hap-38m', 'annotation_column': 'hap_score', 'doc_text_column': 'contents', 'inference_engine': 'CPU', 'max_length': 512, 'batch_size': 128} \n", + "07:12:05 INFO - pipeline id pipeline_id\n", + "07:12:05 INFO - code location None\n", + "07:12:05 INFO - data factory data_ is using local data access: input_folder - test-data/input output_folder - output\n", + "07:12:05 INFO - data factory data_ max_files -1, n_sample -1\n", + "07:12:05 INFO - data factory data_ Not using data sets, checkpointing False, max files -1, random samples -1, files to use ['.parquet'], files to checkpoint ['.parquet']\n", + "07:12:05 INFO - orchestrator hap started at 2024-12-11 07:12:05\n", + "07:12:05 INFO - Number of files is 1, source profile {'max_file_size': 0.10423946380615234, 'min_file_size': 0.10423946380615234, 'total_file_size': 0.10423946380615234}\n" ] }, { @@ -133,10 +117,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "12:08:55 INFO - Completed 1 files (100.0%) in 0.467 min\n", - "12:08:55 INFO - Done processing 1 files, waiting for flush() completion.\n", - "12:08:55 INFO - done flushing in 0.0 sec\n", - "12:08:55 INFO - Completed execution in 0.568 min, execution result 0\n" + "07:12:38 INFO - Completed 1 files (100.0%) in 0.458 min\n", + "07:12:38 INFO - Done processing 1 files, waiting for flush() completion.\n", + "07:12:38 INFO - done flushing in 0.0 sec\n", + "07:12:38 INFO - Completed execution in 0.543 min, execution result 0\n" ] }, { @@ -202,7 +186,7 @@ "0" ] }, - "execution_count": 4, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -230,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "f21d5d9b-562d-4530-8cea-2de5b63eb1dc", "metadata": {}, "outputs": [ @@ -240,7 +224,7 @@ "['output/metadata.json', 'output/test1.parquet']" ] }, - "execution_count": 6, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" }