diff --git a/examples/notebooks/hap/generate_hap_score_csv.ipynb b/examples/notebooks/hap/generate_hap_score_csv.ipynb index 58c44b94dc..09621666db 100644 --- a/examples/notebooks/hap/generate_hap_score_csv.ipynb +++ b/examples/notebooks/hap/generate_hap_score_csv.ipynb @@ -6,8 +6,10 @@ "source": [ "HAP Transform Example Notebook\n", "=====================================\n", - "This notebook picks a CSV file from the `input` folder, converts it to Parquet format,\n", - "runs the `hap_local_python.py` transform, and displays the results." + "\n", + "This notebook processes a CSV file containing text data to analyze for Hate, Abuse, and Profanity (HAP) scores.\n", + "It converts the CSV file into Parquet format, uses the `hap_local_python.py` script to calculate HAP scores, \n", + "and generates outputs for further analysis." ] }, { @@ -63,9 +65,364 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: data-prep-connector in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (0.2.3)\n", + "Requirement already satisfied: scrapy>=2.11.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-connector) (2.12.0)\n", + "Requirement already satisfied: pydantic>=2.8.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-connector) (2.10.2)\n", + "Requirement already satisfied: tldextract>=5.1.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-connector) (5.1.3)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pydantic>=2.8.1->data-prep-connector) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pydantic>=2.8.1->data-prep-connector) (2.27.1)\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pydantic>=2.8.1->data-prep-connector) (4.12.2)\n", + "Requirement already satisfied: Twisted>=21.7.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (24.11.0)\n", + "Requirement already satisfied: cryptography>=37.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (44.0.0)\n", + "Requirement already satisfied: cssselect>=0.9.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (1.2.0)\n", + "Requirement already satisfied: itemloaders>=1.0.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (1.3.2)\n", + "Requirement already satisfied: parsel>=1.5.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (1.9.1)\n", + "Requirement already satisfied: pyOpenSSL>=22.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (24.3.0)\n", + "Requirement already satisfied: queuelib>=1.4.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (1.7.0)\n", + "Requirement already satisfied: service-identity>=18.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (24.2.0)\n", + "Requirement already satisfied: w3lib>=1.17.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (2.2.1)\n", + "Requirement already satisfied: zope.interface>=5.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (7.2)\n", + "Requirement already satisfied: protego>=0.1.15 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (0.3.1)\n", + "Requirement already satisfied: itemadapter>=0.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (0.10.0)\n", + "Requirement already satisfied: packaging in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (24.2)\n", + "Requirement already satisfied: lxml>=4.6.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (4.9.4)\n", + "Requirement already satisfied: defusedxml>=0.7.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (0.7.1)\n", + "Requirement already satisfied: PyDispatcher>=2.0.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scrapy>=2.11.2->data-prep-connector) (2.0.7)\n", + "Requirement already satisfied: idna in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from tldextract>=5.1.2->data-prep-connector) (3.10)\n", + "Requirement already satisfied: requests>=2.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from tldextract>=5.1.2->data-prep-connector) (2.32.3)\n", + "Requirement already satisfied: requests-file>=1.4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from tldextract>=5.1.2->data-prep-connector) (2.1.0)\n", + "Requirement already satisfied: filelock>=3.0.8 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from tldextract>=5.1.2->data-prep-connector) (3.16.1)\n", + "Requirement already satisfied: cffi>=1.12 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from cryptography>=37.0.0->scrapy>=2.11.2->data-prep-connector) (1.17.1)\n", + "Requirement already satisfied: jmespath>=0.9.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from itemloaders>=1.0.1->scrapy>=2.11.2->data-prep-connector) (1.0.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests>=2.1.0->tldextract>=5.1.2->data-prep-connector) (3.4.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests>=2.1.0->tldextract>=5.1.2->data-prep-connector) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests>=2.1.0->tldextract>=5.1.2->data-prep-connector) (2024.8.30)\n", + "Requirement already satisfied: attrs>=19.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from service-identity>=18.1.0->scrapy>=2.11.2->data-prep-connector) (24.2.0)\n", + "Requirement already satisfied: pyasn1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from service-identity>=18.1.0->scrapy>=2.11.2->data-prep-connector) (0.6.1)\n", + "Requirement already satisfied: pyasn1-modules in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from service-identity>=18.1.0->scrapy>=2.11.2->data-prep-connector) (0.4.1)\n", + "Requirement already satisfied: automat>=24.8.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from Twisted>=21.7.0->scrapy>=2.11.2->data-prep-connector) (24.8.1)\n", + "Requirement already satisfied: constantly>=15.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from Twisted>=21.7.0->scrapy>=2.11.2->data-prep-connector) (23.10.4)\n", + "Requirement already satisfied: hyperlink>=17.1.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from Twisted>=21.7.0->scrapy>=2.11.2->data-prep-connector) (21.0.0)\n", + "Requirement already satisfied: incremental>=24.7.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from Twisted>=21.7.0->scrapy>=2.11.2->data-prep-connector) (24.7.2)\n", + "Requirement already satisfied: setuptools in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from zope.interface>=5.1.0->scrapy>=2.11.2->data-prep-connector) (75.1.0)\n", + "Requirement already satisfied: pycparser in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from cffi>=1.12->cryptography>=37.0.0->scrapy>=2.11.2->data-prep-connector) (2.22)\n", + "Requirement already satisfied: data-prep-toolkit==0.2.2.dev1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit[ray]==0.2.2.dev1) (0.2.2.dev1)\n", + "Requirement already satisfied: numpy<1.29.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit==0.2.2.dev1->data-prep-toolkit[ray]==0.2.2.dev1) (1.26.4)\n", + "Requirement already satisfied: pyarrow==16.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit==0.2.2.dev1->data-prep-toolkit[ray]==0.2.2.dev1) (16.1.0)\n", + "Requirement already satisfied: boto3==1.34.69 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit==0.2.2.dev1->data-prep-toolkit[ray]==0.2.2.dev1) (1.34.69)\n", + "Collecting argparse (from data-prep-toolkit==0.2.2.dev1->data-prep-toolkit[ray]==0.2.2.dev1)\n", + " Using cached argparse-1.4.0-py2.py3-none-any.whl.metadata (2.8 kB)\n", + "Requirement already satisfied: mmh3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit==0.2.2.dev1->data-prep-toolkit[ray]==0.2.2.dev1) (4.1.0)\n", + "Requirement already satisfied: psutil in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit==0.2.2.dev1->data-prep-toolkit[ray]==0.2.2.dev1) (5.9.0)\n", + "Requirement already satisfied: ray==2.36.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2.36.1)\n", + "Requirement already satisfied: fastapi>=0.110.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit[ray]==0.2.2.dev1) (0.115.5)\n", + "Requirement already satisfied: pillow>=10.3.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit[ray]==0.2.2.dev1) (10.4.0)\n", + "Requirement already satisfied: botocore<1.35.0,>=1.34.69 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from boto3==1.34.69->data-prep-toolkit==0.2.2.dev1->data-prep-toolkit[ray]==0.2.2.dev1) (1.34.162)\n", + "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from boto3==1.34.69->data-prep-toolkit==0.2.2.dev1->data-prep-toolkit[ray]==0.2.2.dev1) (1.0.1)\n", + "Requirement already satisfied: s3transfer<0.11.0,>=0.10.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from boto3==1.34.69->data-prep-toolkit==0.2.2.dev1->data-prep-toolkit[ray]==0.2.2.dev1) (0.10.4)\n", + "Requirement already satisfied: click>=7.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (8.1.7)\n", + "Requirement already satisfied: filelock in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (3.16.1)\n", + "Requirement already satisfied: jsonschema in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (4.23.0)\n", + "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (1.1.0)\n", + "Requirement already satisfied: packaging in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (24.2)\n", + "Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (5.29.0)\n", + "Requirement already satisfied: pyyaml in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (6.0.2)\n", + "Requirement already satisfied: aiosignal in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (1.3.1)\n", + "Requirement already satisfied: frozenlist in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (1.5.0)\n", + "Requirement already satisfied: requests in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2.32.3)\n", + "Requirement already satisfied: aiohttp>=3.7 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (3.11.8)\n", + "Requirement already satisfied: aiohttp-cors in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.7.0)\n", + "Requirement already satisfied: colorful in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.5.6)\n", + "Requirement already satisfied: py-spy>=0.2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.4.0)\n", + "Requirement already satisfied: opencensus in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.11.4)\n", + "Requirement already satisfied: pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2.10.2)\n", + "Requirement already satisfied: prometheus-client>=0.7.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.21.0)\n", + "Requirement already satisfied: smart-open in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (7.0.5)\n", + "Requirement already satisfied: virtualenv!=20.21.1,>=20.0.24 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (20.28.0)\n", + "Requirement already satisfied: grpcio>=1.42.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (1.68.0)\n", + "Requirement already satisfied: memray in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (1.14.0)\n", + "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from fastapi>=0.110.2->data-prep-toolkit[ray]==0.2.2.dev1) (0.41.3)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from fastapi>=0.110.2->data-prep-toolkit[ray]==0.2.2.dev1) (4.12.2)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from aiohttp>=3.7->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2.4.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from aiohttp>=3.7->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (24.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from aiohttp>=3.7->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from aiohttp>=3.7->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.2.0)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from aiohttp>=3.7->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (1.18.0)\n", + "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from botocore<1.35.0,>=1.34.69->boto3==1.34.69->data-prep-toolkit==0.2.2.dev1->data-prep-toolkit[ray]==0.2.2.dev1) (2.9.0.post0)\n", + "Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from botocore<1.35.0,>=1.34.69->boto3==1.34.69->data-prep-toolkit==0.2.2.dev1->data-prep-toolkit[ray]==0.2.2.dev1) (2.2.3)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2.27.1)\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from starlette<0.42.0,>=0.40.0->fastapi>=0.110.2->data-prep-toolkit[ray]==0.2.2.dev1) (4.6.2.post1)\n", + "Requirement already satisfied: distlib<1,>=0.3.7 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.3.9)\n", + "Requirement already satisfied: platformdirs<5,>=3.9.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (4.3.6)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from jsonschema->ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from jsonschema->ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from jsonschema->ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.21.0)\n", + "Requirement already satisfied: jinja2>=2.9 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (3.1.4)\n", + "Requirement already satisfied: rich>=11.2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (13.9.4)\n", + "Requirement already satisfied: textual>=0.41.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.87.1)\n", + "Requirement already satisfied: opencensus-context>=0.1.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.1.3)\n", + "Requirement already satisfied: six~=1.16 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (1.16.0)\n", + "Requirement already satisfied: google-api-core<3.0.0,>=1.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2.23.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests->ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests->ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (3.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests->ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2024.8.30)\n", + "Requirement already satisfied: wrapt in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from smart-open->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (1.17.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi>=0.110.2->data-prep-toolkit[ray]==0.2.2.dev1) (1.3.1)\n", + "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (1.66.0)\n", + "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (1.25.0)\n", + "Requirement already satisfied: google-auth<3.0.dev0,>=2.14.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2.36.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from jinja2>=2.9->memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2.1.5)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from rich>=11.2.0->memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from rich>=11.2.0->memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2.18.0)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (5.5.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.4.1)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (4.9)\n", + "Requirement already satisfied: mdurl~=0.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from markdown-it-py>=2.2.0->rich>=11.2.0->memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.1.2)\n", + "Requirement already satisfied: linkify-it-py<3,>=1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from markdown-it-py[linkify,plugins]>=2.1.0->textual>=0.41.0->memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (2.0.3)\n", + "Requirement already satisfied: mdit-py-plugins in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from markdown-it-py[linkify,plugins]>=2.1.0->textual>=0.41.0->memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.4.2)\n", + "Requirement already satisfied: uc-micro-py in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from linkify-it-py<3,>=1->markdown-it-py[linkify,plugins]>=2.1.0->textual>=0.41.0->memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (1.0.3)\n", + "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pyasn1-modules>=0.2.1->google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]==0.2.2.dev1) (0.6.1)\n", + "Using cached argparse-1.4.0-py2.py3-none-any.whl (23 kB)\n", + "Installing collected packages: argparse\n", + "Successfully installed argparse-1.4.0\n", + "Requirement already satisfied: data-prep-toolkit-transforms==0.2.2.dev1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.2.2.dev1)\n", + "Requirement already satisfied: data-prep-toolkit>=0.2.2.dev1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms==0.2.2.dev1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.2.2.dev1)\n", + "Requirement already satisfied: bs4==0.0.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.0.2)\n", + "Requirement already satisfied: transformers==4.38.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.38.2)\n", + "Requirement already satisfied: parameterized in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.9.0)\n", + "Requirement already satisfied: pandas in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.2.2)\n", + "Requirement already satisfied: docling-core==1.3.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.3.0)\n", + "Requirement already satisfied: llama-index-core<0.12.0,>=0.11.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.11.23)\n", + "Requirement already satisfied: fasttext==0.9.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.9.2)\n", + "Requirement already satisfied: langcodes==3.3.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.3.0)\n", + "Requirement already satisfied: huggingface-hub<1.0.0,>=0.21.4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.26.3)\n", + "Requirement already satisfied: numpy==1.26.4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.26.4)\n", + "Requirement already satisfied: sentence-transformers==3.0.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.0.1)\n", + "Requirement already satisfied: docling-ibm-models==1.1.7 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.1.7)\n", + "Requirement already satisfied: deepsearch-glm==0.21.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.21.0)\n", + "Requirement already satisfied: docling==1.11.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.11.0)\n", + "Requirement already satisfied: filetype<2.0.0,>=1.2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.2.0)\n", + "Requirement already satisfied: nltk==3.9.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.9.1)\n", + "Requirement already satisfied: torch==2.4.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.4.1)\n", + "Requirement already satisfied: mmh3>=4.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.1.0)\n", + "Requirement already satisfied: xxhash==3.4.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.4.1)\n", + "Requirement already satisfied: duckdb>=0.10.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.1.3)\n", + "Requirement already satisfied: networkx==3.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.3)\n", + "Requirement already satisfied: colorlog==6.8.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (6.8.2)\n", + "Requirement already satisfied: func-timeout==4.3.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.3.5)\n", + "Requirement already satisfied: emerge-viz==2.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.0.0)\n", + "Requirement already satisfied: beautifulsoup4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from bs4==0.0.2->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.12.3)\n", + "Requirement already satisfied: pyarrow==16.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit>=0.2.2.dev1->data-prep-toolkit-transforms==0.2.2.dev1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (16.1.0)\n", + "Requirement already satisfied: boto3==1.34.69 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit>=0.2.2.dev1->data-prep-toolkit-transforms==0.2.2.dev1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.34.69)\n", + "Collecting argparse (from data-prep-toolkit>=0.2.2.dev1->data-prep-toolkit-transforms==0.2.2.dev1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1)\n", + " Using cached argparse-1.4.0-py2.py3-none-any.whl.metadata (2.8 kB)\n", + "Requirement already satisfied: psutil in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit>=0.2.2.dev1->data-prep-toolkit-transforms==0.2.2.dev1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (5.9.0)\n", + "Requirement already satisfied: docutils!=0.21 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.21.2)\n", + "Requirement already satisfied: matplotlib<4.0.0,>=3.7.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.9.2)\n", + "Requirement already satisfied: netwulf<0.2.0,>=0.1.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.1.5)\n", + "Requirement already satisfied: numerize<0.13,>=0.12 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.12)\n", + "Requirement already satisfied: python-dotenv<2.0.0,>=1.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.0.1)\n", + "Requirement already satisfied: rich<14.0.0,>=13.7.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (13.9.4)\n", + "Requirement already satisfied: tabulate>=0.8.9 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.9.0)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.64.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.67.1)\n", + "Requirement already satisfied: certifi>=2024.7.4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2024.8.30)\n", + "Requirement already satisfied: docling-parse<2.0.0,>=1.2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.6.2)\n", + "Requirement already satisfied: easyocr<2.0,>=1.7 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.7.2)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.10.2)\n", + "Requirement already satisfied: pydantic-settings<3.0.0,>=2.3.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.6.1)\n", + "Requirement already satisfied: pypdfium2<5.0.0,>=4.30.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.30.0)\n", + "Requirement already satisfied: requests<3.0.0,>=2.32.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.32.3)\n", + "Requirement already satisfied: rtree<2.0.0,>=1.3.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.3.0)\n", + "Requirement already satisfied: scipy<2.0.0,>=1.14.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.14.1)\n", + "Requirement already satisfied: json-schema-for-humans<2.0.0,>=1.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.0.4)\n", + "Requirement already satisfied: jsonref<2.0.0,>=1.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.1.0)\n", + "Requirement already satisfied: jsonschema<5.0.0,>=4.16.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.23.0)\n", + "Requirement already satisfied: pyproject-toml<0.0.11,>=0.0.10 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.0.10)\n", + "Requirement already satisfied: Pillow<11.0.0,>=10.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling-ibm-models==1.1.7->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (10.4.0)\n", + "Requirement already satisfied: jsonlines<4.0.0,>=3.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling-ibm-models==1.1.7->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.1.0)\n", + "Requirement already satisfied: lxml<5.0.0,>=4.9.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling-ibm-models==1.1.7->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.9.4)\n", + "Requirement already satisfied: mean_average_precision<2022.0.0.0,>=2021.4.26.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling-ibm-models==1.1.7->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2021.4.26.0)\n", + "Requirement already satisfied: onnxruntime<2.0.0,>=1.16.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling-ibm-models==1.1.7->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.20.1)\n", + "Requirement already satisfied: opencv-python-headless<5.0.0.0,>=4.9.0.80 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling-ibm-models==1.1.7->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.10.0.84)\n", + "Requirement already satisfied: torchvision<1,>=0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from docling-ibm-models==1.1.7->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.19.1)\n", + "Requirement already satisfied: wheel in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.44.0)\n", + "Requirement already satisfied: autopep8 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.3.1)\n", + "Requirement already satisfied: coloredlogs in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (15.0.1)\n", + "Requirement already satisfied: interrogate in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.7.0)\n", + "Requirement already satisfied: scikit-learn in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.5.2)\n", + "Requirement already satisfied: prettytable in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.12.0)\n", + "Requirement already satisfied: py in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.11.0)\n", + "Requirement already satisfied: pycodestyle in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.12.1)\n", + "Requirement already satisfied: pylint in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.3.1)\n", + "Requirement already satisfied: pyparsing in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.2.0)\n", + "Requirement already satisfied: python-louvain in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.16)\n", + "Requirement already satisfied: PyYAML in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (6.0.2)\n", + "Requirement already satisfied: PyDriller in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.7)\n", + "Requirement already satisfied: pyperclip in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.9.0)\n", + "Requirement already satisfied: pybind11>=2.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from fasttext==0.9.2->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.13.6)\n", + "Requirement already satisfied: setuptools>=0.7.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from fasttext==0.9.2->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (75.1.0)\n", + "Requirement already satisfied: click in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from nltk==3.9.1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (8.1.7)\n", + "Requirement already satisfied: joblib in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from nltk==3.9.1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.4.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from nltk==3.9.1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2024.11.6)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pandas->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pandas->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pandas->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2024.2)\n", + "Requirement already satisfied: filelock in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from torch==2.4.1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.16.1)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from torch==2.4.1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.12.2)\n", + "Requirement already satisfied: sympy in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from torch==2.4.1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.13.3)\n", + "Requirement already satisfied: jinja2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from torch==2.4.1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.1.4)\n", + "Requirement already satisfied: fsspec in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from torch==2.4.1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from transformers==4.38.2->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (24.2)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from transformers==4.38.2->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.15.2)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from transformers==4.38.2->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.4.5)\n", + "Requirement already satisfied: botocore<1.35.0,>=1.34.69 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from boto3==1.34.69->data-prep-toolkit>=0.2.2.dev1->data-prep-toolkit-transforms==0.2.2.dev1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.34.162)\n", + "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from boto3==1.34.69->data-prep-toolkit>=0.2.2.dev1->data-prep-toolkit-transforms==0.2.2.dev1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.0.1)\n", + "Requirement already satisfied: s3transfer<0.11.0,>=0.10.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from boto3==1.34.69->data-prep-toolkit>=0.2.2.dev1->data-prep-toolkit-transforms==0.2.2.dev1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.10.4)\n", + "Requirement already satisfied: ray==2.36.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.36.1)\n", + "Requirement already satisfied: fastapi>=0.110.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.115.5)\n", + "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.1.0)\n", + "Requirement already satisfied: protobuf!=3.19.5,>=3.15.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (5.29.0)\n", + "Requirement already satisfied: aiosignal in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.3.1)\n", + "Requirement already satisfied: frozenlist in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray==2.36.1->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.5.0)\n", + "Requirement already satisfied: aiohttp>=3.7 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.11.8)\n", + "Requirement already satisfied: aiohttp-cors in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.7.0)\n", + "Requirement already satisfied: colorful in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.5.6)\n", + "Requirement already satisfied: py-spy>=0.2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.4.0)\n", + "Requirement already satisfied: opencensus in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.11.4)\n", + "Requirement already satisfied: prometheus-client>=0.7.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.21.0)\n", + "Requirement already satisfied: smart-open in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (7.0.5)\n", + "Requirement already satisfied: virtualenv!=20.21.1,>=20.0.24 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (20.28.0)\n", + "Requirement already satisfied: grpcio>=1.42.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.68.0)\n", + "Requirement already satisfied: memray in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.14.0)\n", + "Requirement already satisfied: SQLAlchemy>=1.4.49 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.0.36)\n", + "Requirement already satisfied: dataclasses-json in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.6.7)\n", + "Requirement already satisfied: deprecated>=1.2.9.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.2.15)\n", + "Requirement already satisfied: dirtyjson<2.0.0,>=1.0.8 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.0.8)\n", + "Requirement already satisfied: httpx in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.28.0)\n", + "Requirement already satisfied: nest-asyncio<2.0.0,>=1.5.8 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.6.0)\n", + "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (8.5.0)\n", + "Requirement already satisfied: tiktoken>=0.3.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.8.0)\n", + "Requirement already satisfied: typing-inspect>=0.8.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.9.0)\n", + "Requirement already satisfied: wrapt in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.17.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from aiohttp>=3.7->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.4.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from aiohttp>=3.7->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (24.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from aiohttp>=3.7->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from aiohttp>=3.7->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.2.0)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from aiohttp>=3.7->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.18.0)\n", + "Requirement already satisfied: scikit-image in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from easyocr<2.0,>=1.7->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.24.0)\n", + "Requirement already satisfied: python-bidi in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from easyocr<2.0,>=1.7->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.6.3)\n", + "Requirement already satisfied: Shapely in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from easyocr<2.0,>=1.7->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.0.6)\n", + "Requirement already satisfied: pyclipper in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from easyocr<2.0,>=1.7->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.3.0.post6)\n", + "Requirement already satisfied: ninja in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from easyocr<2.0,>=1.7->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.11.1.2)\n", + "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from fastapi>=0.110.2->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.41.3)\n", + "Requirement already satisfied: MarkupSafe<3.0.0,>=2.1.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from json-schema-for-humans<2.0.0,>=1.0.0->docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.1.5)\n", + "Requirement already satisfied: Pygments<3.0.0,>=2.18.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from json-schema-for-humans<2.0.0,>=1.0.0->docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.18.0)\n", + "Requirement already satisfied: htmlmin2<0.2.0,>=0.1.13 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from json-schema-for-humans<2.0.0,>=1.0.0->docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.1.13)\n", + "Requirement already satisfied: markdown2<3.0.0,>=2.5.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from json-schema-for-humans<2.0.0,>=1.0.0->docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.5.1)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from dataclasses-json->llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.23.1)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.16.0->docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.16.0->docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from jsonschema<5.0.0,>=4.16.0->docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.21.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from matplotlib<4.0.0,>=3.7.1->deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.3.1)\n", + "Requirement already satisfied: cycler>=0.10 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from matplotlib<4.0.0,>=3.7.1->deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from matplotlib<4.0.0,>=3.7.1->deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.55.0)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from matplotlib<4.0.0,>=3.7.1->deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.4.7)\n", + "Requirement already satisfied: simplejson>=3.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from netwulf<0.2.0,>=0.1.5->deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.19.3)\n", + "Requirement already satisfied: flatbuffers in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from onnxruntime<2.0.0,>=1.16.2->docling-ibm-models==1.1.7->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (24.3.25)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.0.0->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.0.0->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.27.1)\n", + "Requirement already satisfied: toml in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pyproject-toml<0.0.11,>=0.0.10->docling-core==1.3.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.10.2)\n", + "Requirement already satisfied: six>=1.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.16.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests<3.0.0,>=2.32.3->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests<3.0.0,>=2.32.3->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests<3.0.0,>=2.32.3->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.2.3)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from rich<14.0.0,>=13.7.0->deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.0.0)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from SQLAlchemy[asyncio]>=1.4.49->llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.1.1)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from typing-inspect>=0.8.0->llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.0.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from beautifulsoup4->bs4==0.0.2->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.6)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from coloredlogs->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (10.0)\n", + "Requirement already satisfied: anyio in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.6.2.post1)\n", + "Requirement already satisfied: httpcore==1.* in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from httpx->llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from httpcore==1.*->httpx->llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.14.0)\n", + "Requirement already satisfied: colorama in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from interrogate->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.4.6)\n", + "Requirement already satisfied: wcwidth in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from prettytable->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.2.13)\n", + "Requirement already satisfied: gitpython in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from PyDriller->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.1.43)\n", + "Requirement already satisfied: types-pytz in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from PyDriller->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2024.2.0.20241003)\n", + "Requirement already satisfied: lizard in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from PyDriller->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.17.13)\n", + "Requirement already satisfied: platformdirs>=2.2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pylint->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.3.6)\n", + "Requirement already satisfied: astroid<=3.4.0-dev0,>=3.3.4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pylint->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.3.5)\n", + "Requirement already satisfied: isort!=5.13.0,<6,>=4.2.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pylint->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (5.13.2)\n", + "Requirement already satisfied: mccabe<0.8,>=0.6 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pylint->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.7.0)\n", + "Requirement already satisfied: tomlkit>=0.10.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pylint->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.13.2)\n", + "Requirement already satisfied: dill>=0.3.6 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pylint->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.3.9)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scikit-learn->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (3.5.0)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from sympy->torch==2.4.1->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.3.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from markdown-it-py>=2.2.0->rich<14.0.0,>=13.7.0->deepsearch-glm==0.21.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.1.2)\n", + "Requirement already satisfied: sniffio>=1.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from anyio->httpx->llama-index-core<0.12.0,>=0.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.3.1)\n", + "Requirement already satisfied: distlib<1,>=0.3.7 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.3.9)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from gitpython->PyDriller->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.0.11)\n", + "Requirement already satisfied: textual>=0.41.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.87.1)\n", + "Requirement already satisfied: opencensus-context>=0.1.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.1.3)\n", + "Requirement already satisfied: google-api-core<3.0.0,>=1.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.23.0)\n", + "Requirement already satisfied: imageio>=2.33 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scikit-image->easyocr<2.0,>=1.7->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.36.1)\n", + "Requirement already satisfied: tifffile>=2022.8.12 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scikit-image->easyocr<2.0,>=1.7->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2024.9.20)\n", + "Requirement already satisfied: lazy-loader>=0.4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from scikit-image->easyocr<2.0,>=1.7->docling==1.11.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.4)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from gitdb<5,>=4.0.1->gitpython->PyDriller->emerge-viz==2.0.0->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (5.0.1)\n", + "Requirement already satisfied: googleapis-common-protos<2.0.dev0,>=1.56.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.66.0)\n", + "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.25.0)\n", + "Requirement already satisfied: google-auth<3.0.dev0,>=2.14.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.36.0)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (5.5.0)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.4.1)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (4.9)\n", + "Requirement already satisfied: linkify-it-py<3,>=1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from markdown-it-py[linkify,plugins]>=2.1.0->textual>=0.41.0->memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (2.0.3)\n", + "Requirement already satisfied: mdit-py-plugins in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from markdown-it-py[linkify,plugins]>=2.1.0->textual>=0.41.0->memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.4.2)\n", + "Requirement already satisfied: uc-micro-py in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from linkify-it-py<3,>=1->markdown-it-py[linkify,plugins]>=2.1.0->textual>=0.41.0->memray->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (1.0.3)\n", + "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pyasn1-modules>=0.2.1->google-auth<3.0.dev0,>=2.14.1->google-api-core<3.0.0,>=1.0.0->opencensus->ray[default]==2.36.1; extra == \"ray\"->data-prep-toolkit[ray]>=0.2.2.dev1; extra == \"ray\"->data-prep-toolkit-transforms[all,ray]==0.2.2.dev1) (0.6.1)\n", + "Using cached argparse-1.4.0-py2.py3-none-any.whl (23 kB)\n", + "Installing collected packages: argparse\n", + "Successfully installed argparse-1.4.0\n", + "Requirement already satisfied: nltk==3.9.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from -r requirements.txt (line 1)) (3.9.1)\n", + "Requirement already satisfied: transformers==4.38.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from -r requirements.txt (line 2)) (4.38.2)\n", + "Requirement already satisfied: torch<=2.4.1,>=2.2.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from -r requirements.txt (line 3)) (2.4.1)\n", + "Requirement already satisfied: pandas==2.2.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from -r requirements.txt (line 4)) (2.2.2)\n", + "Requirement already satisfied: click in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from nltk==3.9.1->-r requirements.txt (line 1)) (8.1.7)\n", + "Requirement already satisfied: joblib in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from nltk==3.9.1->-r requirements.txt (line 1)) (1.4.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from nltk==3.9.1->-r requirements.txt (line 1)) (2024.11.6)\n", + "Requirement already satisfied: tqdm in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from nltk==3.9.1->-r requirements.txt (line 1)) (4.67.1)\n", + "Requirement already satisfied: filelock in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from transformers==4.38.2->-r requirements.txt (line 2)) (3.16.1)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from transformers==4.38.2->-r requirements.txt (line 2)) (0.26.3)\n", + "Requirement already satisfied: numpy>=1.17 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from transformers==4.38.2->-r requirements.txt (line 2)) (1.26.4)\n", + "Requirement already satisfied: packaging>=20.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from transformers==4.38.2->-r requirements.txt (line 2)) (24.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from transformers==4.38.2->-r requirements.txt (line 2)) (6.0.2)\n", + "Requirement already satisfied: requests in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from transformers==4.38.2->-r requirements.txt (line 2)) (2.32.3)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from transformers==4.38.2->-r requirements.txt (line 2)) (0.15.2)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from transformers==4.38.2->-r requirements.txt (line 2)) (0.4.5)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pandas==2.2.2->-r requirements.txt (line 4)) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pandas==2.2.2->-r requirements.txt (line 4)) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from pandas==2.2.2->-r requirements.txt (line 4)) (2024.2)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from torch<=2.4.1,>=2.2.2->-r requirements.txt (line 3)) (4.12.2)\n", + "Requirement already satisfied: sympy in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from torch<=2.4.1,>=2.2.2->-r requirements.txt (line 3)) (1.13.3)\n", + "Requirement already satisfied: networkx in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from torch<=2.4.1,>=2.2.2->-r requirements.txt (line 3)) (3.3)\n", + "Requirement already satisfied: jinja2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from torch<=2.4.1,>=2.2.2->-r requirements.txt (line 3)) (3.1.4)\n", + "Requirement already satisfied: fsspec in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from torch<=2.4.1,>=2.2.2->-r requirements.txt (line 3)) (2024.10.0)\n", + "Requirement already satisfied: six>=1.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas==2.2.2->-r requirements.txt (line 4)) (1.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from jinja2->torch<=2.4.1,>=2.2.2->-r requirements.txt (line 3)) (2.1.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests->transformers==4.38.2->-r requirements.txt (line 2)) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests->transformers==4.38.2->-r requirements.txt (line 2)) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests->transformers==4.38.2->-r requirements.txt (line 2)) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from requests->transformers==4.38.2->-r requirements.txt (line 2)) (2024.8.30)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/anaconda3/envs/data-prep-kit/lib/python3.11/site-packages (from sympy->torch<=2.4.1,>=2.2.2->-r requirements.txt (line 3)) (1.3.0)\n" + ] + } + ], "source": [ "! pip install data-prep-connector\n", "! pip install 'data-prep-toolkit[ray]==0.2.2.dev1'\n", @@ -82,7 +439,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -103,20 +460,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "hap_script_path = \"./transforms/universal/hap/python/src/hap_local_python.py\"\n", + "from pathlib import Path\n", + "\n", + "notebook_dir = Path().resolve()\n", + "relative_script_dir = '../../../transforms/universal/hap/python/src/hap_local_python.py'\n", + "hap_script_path = (notebook_dir / relative_script_dir).resolve()\n", + "\n", "input_folder = \"./input\"\n", "output_folder = \"./output\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Script Path: /Users/aisha/Documents/GitHub/Personal/DPK/transforms/universal/hap/python/src/hap_local_python.py\n", + "Input Folder: ./input\n", + "Output Folder: ./output\n" + ] + } + ], "source": [ "# Ensure the necessary folders exist.\n", "os.makedirs(input_folder, exist_ok=True)\n", @@ -141,9 +513,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found CSV file(s): ['customer_feedback_file.csv']\n", + "Using CSV file: ./input/customer_feedback_file.csv\n" + ] + } + ], "source": [ "csv_files = [f for f in os.listdir(input_folder) if f.endswith(\".csv\")]\n", "\n", @@ -168,9 +549,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CSV file converted to Parquet format at: ./input/data.parquet\n" + ] + } + ], "source": [ "parquet_file_path = os.path.join(input_folder, \"data.parquet\")\n", "df = pd.read_csv(csv_file_path)\n", @@ -182,37 +571,30 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 4: Simulate Command-Line Arguments for HAP Parameters" + "### Step 4: Define HAP Parameters" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "\n", - "# Clear sys.argv first to avoid conflicts (reset arguments list)\n", - "sys.argv = [\n", - " \"hap_local_python.py\", # Script name\n", - " \"--input_folder\", input_folder, # Correct input folder\n", - " \"--output_folder\", output_folder, # Correct output folder\n", - " \"--model_name_or_path\", hap_params[\"model_name_or_path\"],\n", - " \"--annotation_column\", hap_params[\"annotation_column\"],\n", - " \"--doc_text_column\", hap_params[\"doc_text_column\"],\n", - " \"--inference_engine\", hap_params[\"inference_engine\"],\n", - " \"--max_length\", str(hap_params[\"max_length\"]),\n", - " \"--batch_size\", str(hap_params[\"batch_size\"]),\n", - "]\n", - "\n", - "print(f\"Command-line arguments: {sys.argv}\")" + "hap_params = {\n", + " \"model_name_or_path\": \"ibm-granite/granite-guardian-hap-38m\", # Default model name\n", + " \"annotation_column\": \"hap_score\", # Output column for HAP scores\n", + " \"doc_text_column\": \"Customer Feedback\", # Input column containing document text\n", + " \"inference_engine\": \"CPU\", # Inference engine (CPU or GPU)\n", + " \"max_length\": 512, # Maximum token length\n", + " \"batch_size\": 128, # Batch size\n", + "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 5: Run the Transform with Simulated Arguments\n", + "### Step 5: Run the Transform with defined HAP Paramters\n", "\n", "This cell executes the HAP transformation script:\n", "- `--input_file`: Path to your input CSV/Parquet file.\n", @@ -227,9 +609,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transform completed successfully.\n", + "Processing batch: 0/1\n", + "Processing batch: 1/1\n", + " Customer Name Date ... Unnamed: 13 hap_score\n", + "0 Ethan Johnson 11/7/24 ... NaN 0.000195\n", + "1 Sophia Wilson 11/8/24 ... NaN 0.000153\n", + "2 Liam Smith 11/6/24 ... NaN 0.000169\n", + "3 Isabella Martinez 11/9/24 ... NaN 0.000158\n", + "4 Noah White 11/10/24 ... NaN 0.000875\n", + ".. ... ... ... ... ...\n", + "60 Karen Brooks 1/2/25 ... NaN 0.000150\n", + "61 David King 1/3/25 ... NaN 0.579251\n", + "62 Angela Hill 1/4/25 ... NaN 0.000384\n", + "63 Richard Young 1/5/25 ... NaN 0.000285\n", + "64 Lisa Green 1/6/25 ... NaN 0.052517\n", + "\n", + "[65 rows x 15 columns]\n", + "\n" + ] + } + ], "source": [ "# Copy the current environment variables\n", "env = os.environ.copy()\n", @@ -283,9 +690,132 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Complete Output Parquet File Path: ./output/data.parquet\n", + "Filtered Output (only HAP score and document text):\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Customer Feedbackhap_score
0Rating: 4 Comments: \"Service was prompt, but ...0.000195
1Rating: 5 Comments: \"Great help from Peter! H...0.000153
2Rating: 3 Comments: \"The service was quick, b...0.000169
3Rating: 5 Comments: \"Excellent service and ad...0.000158
4Rating: 2 Comments: \"I’m really frustrated. T...0.000875
.........
60Rating: 3 Comments: \"This is not what I expec...0.000150
61Rating: 1 Comments: \"This is insane. I’ve onl...0.579251
62Rating: 4 Comments: \"I need this fixed. I can’...0.000384
63Rating: 2 Comments: \"I’m so done with this ma...0.000285
64Rating: 3 Comments: \"I’ve leveled the machine...0.052517
\n", + "

65 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Customer Feedback hap_score\n", + "0 Rating: 4 Comments: \"Service was prompt, but ... 0.000195\n", + "1 Rating: 5 Comments: \"Great help from Peter! H... 0.000153\n", + "2 Rating: 3 Comments: \"The service was quick, b... 0.000169\n", + "3 Rating: 5 Comments: \"Excellent service and ad... 0.000158\n", + "4 Rating: 2 Comments: \"I’m really frustrated. T... 0.000875\n", + ".. ... ...\n", + "60 Rating: 3 Comments: \"This is not what I expec... 0.000150\n", + "61 Rating: 1 Comments: \"This is insane. I’ve onl... 0.579251\n", + "62 Rating: 4 Comments: \"I need this fixed. I can’... 0.000384\n", + "63 Rating: 2 Comments: \"I’m so done with this ma... 0.000285\n", + "64 Rating: 3 Comments: \"I’ve leveled the machine... 0.052517\n", + "\n", + "[65 rows x 2 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Complete output saved to: ./output/hap_complete_output.csv\n", + "Filtered output saved to: ./output/hap_filtered_output.csv\n" + ] + } + ], "source": [ "import os\n", "import pandas as pd\n", diff --git a/examples/notebooks/hap/output/metadata.json b/examples/notebooks/hap/output/metadata.json index b9f7c11a3f..f5df683159 100644 --- a/examples/notebooks/hap/output/metadata.json +++ b/examples/notebooks/hap/output/metadata.json @@ -5,8 +5,8 @@ "job name": "hap", "job type": "pure python", "job id": "job_id", - "start_time": "2024-11-29 13:41:19", - "end_time": "2024-11-29 13:41:21", + "start_time": "2024-12-05 09:25:45", + "end_time": "2024-12-05 09:25:48", "status": "success" }, "code": { @@ -30,18 +30,18 @@ "num_processors": 0 }, "execution_stats": { - "cpus": 35.7, + "cpus": 190.1, "gpus": 0, - "memory": 9.43, + "memory": 10.46, "object_store": 0, - "execution time, min": 0.037 + "execution time, min": 0.035 }, "job_output_stats": { "source_files": 1, "source_size": 47309, "result_files": 1, "result_size": 40282, - "processing_time": 0.419, + "processing_time": 0.439, "source_doc_count": 65, "result_doc_count": 65 },