ZixuanKe · ClaudiaQueipo · Dec 28, 2021 · Dec 28, 2021 · Mar 16, 2023 · Mar 16, 2023
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/SUCESSFUL_LL_APPROACH.ipynb b/SUCESSFUL_LL_APPROACH.ipynb
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,7 @@
+et-xmlfile==1.1.0
+numpy @ file:///D:/Trabajo/rest-mex_2022_sentiment_data_training/numpy-1.24.2-cp311-cp311-win_amd64.whl
+openpyxl==3.1.2
+pandas @ file:///D:/Trabajo/rest-mex_2022_sentiment_data_training/pandas-1.5.3-cp311-cp311-win_amd64.whl
+python-dateutil==2.8.2
+pytz==2022.7.1
+six==1.16.0
diff --git a/src/_onlyrequirements.txt b/src/_onlyrequirements.txt
@@ -0,0 +1,107 @@
+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: linux-64
+#_libgcc_mutex=0.1=main
+#_openmp_mutex=4.5=1_gnu
+blas
+boto3
+botocore
+brotlipy
+bzip2
+ca-certificates
+certifi
+cffi
+charset-normalizer
+click
+cryptography
+cudatoolkit
+ffmpeg
+filelock
+freetype
+giflib
+gmp
+gnutls
+idna
+importlib-metadata
+intel-openmp
+jmespath
+joblib
+jpeg
+lame
+lcms2
+ld_impl_linux-64
+libffi
+libgcc-ng
+libgfortran-ng
+libgfortran4
+libgomp
+libiconv
+libidn2
+libpng
+libstdcxx-ng
+libtasn1
+libtiff
+libunistring
+libuv
+libwebp
+libwebp-base
+lz4-c
+mkl
+mkl-service
+mkl_fft
+mkl_random
+ncurses
+nettle
+ninja
+numpy
+numpy-base
+olefile
+openh264
+openssl
+packaging
+pillow
+pip
+pycparser
+pyopenssl
+pyparsing
+pysocks
+python==3.7.11
+python-dateutil==2.8.2
+pytorch==1.7.0
+pytorch-mutex==1.0
+pyyaml
+quadprog
+readline
+regex
+requests
+s3transfer
+sacremoses
+scikit-learn
+scipy
+setuptools
+six
+sqlite
+threadpoolctl
+tk
+tokenizers
+torchaudio
+torchvision
+tqdm
+transformers
+typing_extensions
+urllib3
+wheel
+xz
+zipp
+zlib
+zstd
+transformers==4.10.2
+zipp==3.6.0
+tokenizers==0.10.3
+pyparsing==2.4.7
+packaging==21.2
+quadprog==0.1.10
+pyyaml==6.0
+filelock==3.3.2
+importlib-metadata==4.8.1
+huggingface-hub==0.1.1
diff --git a/src/_requirements.txt b/src/_requirements.txt
@@ -0,0 +1,99 @@
+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: linux-64
+#_libgcc_mutex=0.1=main
+#_openmp_mutex=4.5=1_gnu
+blas=1.0=mkl
+boto3=1.18.21=pyhd3eb1b0_0
+
+botocore=1.21.41=pyhd3eb1b0_1
+brotlipy=0.7.0=py37h27cfd23_1003
+bzip2=1.0.8=h7b6447c_0
+ca-certificates=2021.10.26=h06a4308_2
+certifi=2021.10.8=py37h06a4308_0
+cffi=1.14.6=py37h400218f_0
+charset-normalizer=2.0.4=pyhd3eb1b0_0
+click=8.0.3=pyhd3eb1b0_0
+cryptography=35.0.0=py37hd23ed53_0
+cudatoolkit=11.0.221=h6bb024c_0
+ffmpeg=4.3=hf484d3e_0
+filelock=3.3.2=pypi_0
+freetype=2.11.0=h70c0345_0
+giflib=5.2.1=h7b6447c_0
+gmp=6.2.1=h2531618_2
+gnutls=3.6.15=he1e5248_0
+huggingface-hub=0.1.1=pypi_0
+idna=3.2=pyhd3eb1b0_0
+importlib-metadata=4.8.1=pypi_0
+intel-openmp=2021.4.0=h06a4308_3561
+jmespath=0.10.0=pyhd3eb1b0_0
+joblib=1.1.0=pyhd3eb1b0_0
+jpeg=9d=h7f8727e_0
+lame=3.100=h7b6447c_0
+lcms2=2.12=h3be6417_0
+ld_impl_linux-64=2.35.1=h7274673_9
+libffi=3.3=he6710b0_2
+libgcc-ng=9.3.0=h5101ec6_17
+libgfortran-ng=7.5.0=ha8ba4b0_17
+libgfortran4=7.5.0=ha8ba4b0_17
+libgomp=9.3.0=h5101ec6_17
+libiconv=1.15=h63c8f33_5
+libidn2=2.3.2=h7f8727e_0
+libpng=1.6.37=hbc83047_0
+libstdcxx-ng=9.3.0=hd4cf53a_17
+libtasn1=4.16.0=h27cfd23_0
+libtiff=4.2.0=h85742a9_0
+libunistring=0.9.10=h27cfd23_0
+libuv=1.40.0=h7b6447c_0
+libwebp=1.2.0=h89dd481_0
+libwebp-base=1.2.0=h27cfd23_0
+lz4-c=1.9.3=h295c915_1
+mkl=2021.4.0=h06a4308_640
+mkl-service=2.4.0=py37h7f8727e_0
+mkl_fft=1.3.1=py37hd3c417c_0
+mkl_random=1.2.2=py37h51133e4_0
+ncurses=6.3=heee7806_1
+nettle=3.7.3=hbbd107a_1
+ninja=1.10.2=hff7bd54_1
+numpy=1.21.2=py37h20f2e39_0
+numpy-base=1.21.2=py37h79a1101_0
+olefile=0.46=py37_0
+openh264=2.1.0=hd408876_0
+openssl=1.1.1l=h7f8727e_0
+packaging=21.2=pypi_0
+pillow=8.4.0=py37h5aabda8_0
+pip=21.0.1=py37h06a4308_0
+pycparser=2.20=py_2
+pyopenssl=21.0.0=pyhd3eb1b0_1
+pyparsing=2.4.7=pypi_0
+pysocks=1.7.1=py37_1
+python=3.7.11=h12debd9_0
+python-dateutil=2.8.2=pyhd3eb1b0_0
+pytorch=1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0
+pytorch-mutex=1.0=cuda
+pyyaml=6.0=pypi_0
+quadprog=0.1.10=pypi_0
+readline=8.1=h27cfd23_0
+regex=2021.8.3=py37h7f8727e_0
+requests=2.26.0=pyhd3eb1b0_0
+s3transfer=0.5.0=pyhd3eb1b0_0
+sacremoses=0.0.43=pyhd3eb1b0_0
+scikit-learn=1.0.1=py37h51133e4_0
+scipy=1.7.1=py37h292c36d_2
+setuptools=58.0.4=py37h06a4308_0
+six=1.16.0=pyhd3eb1b0_0
+sqlite=3.36.0=hc218d9a_0
+threadpoolctl=2.2.0=pyh0d69192_0
+tk=8.6.11=h1ccaba5_0
+tokenizers=0.10.3=pypi_0
+torchaudio=0.7.0=py37
+torchvision=0.8.1=py37_cu110
+tqdm=4.62.3=pyhd3eb1b0_1
+transformers=4.10.2=pypi_0
+typing_extensions=3.10.0.2=pyh06a4308_0
+urllib3=1.26.7=pyhd3eb1b0_0
+wheel=0.37.0=pyhd3eb1b0_1
+xz=5.2.5=h7b6447c_0
+zipp=3.6.0=pypi_0
+zlib=1.2.11=h7b6447c_3
+zstd=1.4.9=haebb681_0
diff --git a/src/_testrequirements.txt b/src/_testrequirements.txt
@@ -0,0 +1,97 @@
+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: linux-64
+#_libgcc_mutex==0.1
+#_openmp_mutex==4.5
+blas
+boto3
+botocore
+brotlipy
+bzip2
+ca-certificates
+certifi
+cffi
+charset-normalizer
+click
+cryptography
+cudatoolkit
+ffmpeg
+filelock
+freetype
+giflib
+gmp
+gnutls
+idna
+importlib-metadata
+intel-openmp
+jmespath
+joblib
+jpeg
+lame
+lcms2
+ld_impl_linux-64
+libffi
+libgcc-ng
+libgfortran-ng
+libgfortran4
+libgomp
+libiconv
+libidn2
+libpng
+libstdcxx-ng
+libtasn1
+libtiff
+libunistring
+libuv
+libwebp
+libwebp-base
+lz4-c
+mkl
+mkl-service
+mkl_fft
+mkl_random
+ncurses
+nettle
+ninja
+numpy
+numpy-base
+olefile
+openh264
+openssl
+packaging
+pillow
+pip
+pycparser
+pyopenssl
+pyparsing
+pysocks
+python==3.7.11
+python-dateutil==2.8.2
+pytorch=1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0
+pytorch-mutex=1.0=cuda
+pyyaml
+quadprog
+readline
+regex
+requests
+s3transfer
+sacremoses
+scikit-learn
+scipy
+setuptools
+six
+sqlite
+threadpoolctl
+tk
+tokenizers
+torchaudio
+torchvision
+tqdm
+transformers=4.10.2
+typing_extensions
+urllib3
+wheel
+xz
+zipp
+zlib
+zstd
diff --git a/src/piprequirements.txt b/src/piprequirements.txt
@@ -0,0 +1,13 @@
+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: linux-64
+transformers==4.10.2
+zipp==3.6.0
+tokenizers==0.10.3
+pyparsing==2.4.7
+packaging==21.2
+quadprog==0.1.10
+pyyaml==6.0
+filelock==3.3.2
+importlib-metadata==4.8.1
+huggingface-hub==0.1.1
diff --git a/src/serialization/README.MD b/src/serialization/README.MD
@@ -0,0 +1,5 @@
+# INSTRUCCIONES
+1. Copiar los 2 archivos excel en esta carpeta
+2. Tener instalado Python 3.11
+3. Instalar las dependencias necesarias que estan al inicio del repositorio en el archivo requirements.txt
+4. Ejecutar los scripts
diff --git a/src/serialization/classify_to_json.py b/src/serialization/classify_to_json.py
@@ -0,0 +1,15 @@
+import pandas as pd
+
+"""
+In this algorithm i´m using python 3.11 cause it´s 80% faster than previous versions 
+"""
+# Reading dataset
+df = pd.read_excel("Rest_Mex_Sentiment_Analysis_2023_Train.xlsx")
+
+# Filter
+with open("classified\\hotel.json", "w", encoding='utf-8') as file:
+    file.write(df[df["Type"] == "Hotel"].to_json(force_ascii=False, orient='index'))
+with open("classified\\restaurant.json", "w", encoding='utf-8') as file:
+    file.write(df[df["Type"] == "Restaurant"].to_json(force_ascii=False, orient='index'))
+with open("classified\\attractive.json", "w", encoding='utf-8') as file:
+    file.write(df[df["Type"] == "Attractive"].to_json(force_ascii=False, orient='index'))
diff --git a/src/serialization/find_diferents.py b/src/serialization/find_diferents.py
@@ -0,0 +1,10 @@
+import pandas as pd
+
+df2022 = pd.read_excel('Track_Train.xlsx')
+df2023 =  pd.read_excel('Rest_Mex_Sentiment_Analysis_2023_Train.xlsx')
+
+df = pd.concat([df2023, df2022]).drop_duplicates(keep=False)
+
+df = df[~df.astype(str).apply(lambda x: x.str.contains('Attractive')).any(axis=1)]
+
+df.to_excel('dataset_filtrado.xlsx', index=False)
diff --git a/src/serialization/find_equals.py b/src/serialization/find_equals.py
@@ -0,0 +1,18 @@
+import pandas as pd
+
+# Cargando los datasets
+df2022 = pd.read_excel('Track_Train.xlsx')
+df2023 =  pd.read_excel('Rest_Mex_Sentiment_Analysis_2023_Train.xlsx')
+
+# Filtrando hasta quedarse con las filas unicas
+reviews_ds22 = df2022['Review'].unique().tolist()
+reviews_ds23 = df2023['Review'].unique().tolist()
+
+# Esta query permite saber si el contenido de la columna review esta en el dataset 
+# con el q se le esta comparando
+rows_only_in_ds22_df = df2023[~df2023['Review'].isin(reviews_ds22)]
+rows_only_in_ds23_df = df2022[~df2022['Review'].isin(reviews_ds23)]
+
+# Exportando los datasets resultantes
+rows_only_in_ds22_df.to_excel('only_2022.xlsx', index=False)
+rows_only_in_ds23_df.to_excel('only_2023.xlsx', index=False)