Skip to content

Commit

Permalink
Initial upload
Browse files Browse the repository at this point in the history
  • Loading branch information
PatrickSVM committed Jul 5, 2023
1 parent 6e9428c commit b7442a9
Show file tree
Hide file tree
Showing 68 changed files with 9,935 additions and 0 deletions.
10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
repos:
- repo: https://github.com/psf/black
rev: 23.1.0
hooks:
- id: black
# It is recommended to specify the latest version of Python
# supported by your project here, or alternatively use
# pre-commit's default_language_version, see
# https://pre-commit.com/#top_level-default_language_version
language_version: python3.9
Binary file added Master_Thesis_IKEA_final_handin.pdf
Binary file not shown.
284 changes: 284 additions & 0 deletions gpu-env.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
name: Recommender-Models
channels:
- pytorch
- nvidia
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=5.1=1_gnu
- absl-py=1.3.0=py39h06a4308_0
- aiohttp=3.8.3=py39h5eee18b_0
- aiosignal=1.2.0=pyhd3eb1b0_0
- anyio=3.5.0=py39h06a4308_0
- appdirs=1.4.4=pyh9f0ad1d_0
- argon2-cffi=21.3.0=pyhd3eb1b0_0
- argon2-cffi-bindings=21.2.0=py39h7f8727e_0
- asttokens=2.0.5=pyhd3eb1b0_0
- async-timeout=4.0.2=py39h06a4308_0
- attrs=22.1.0=py39h06a4308_0
- babel=2.11.0=py39h06a4308_0
- backcall=0.2.0=pyhd3eb1b0_0
- beautifulsoup4=4.11.1=py39h06a4308_0
- black=22.6.0=py39h06a4308_0
- blas=1.0=mkl
- bleach=4.1.0=pyhd3eb1b0_0
- blinker=1.4=py39h06a4308_0
- bottleneck=1.3.5=py39h7deecbd_0
- brotlipy=0.7.0=py39h27cfd23_1003
- bzip2=1.0.8=h7b6447c_0
- c-ares=1.18.1=h7f8727e_0
- ca-certificates=2023.01.10=h06a4308_0
- cachetools=4.2.2=pyhd3eb1b0_0
- certifi=2022.12.7=py39h06a4308_0
- cffi=1.15.1=py39h5eee18b_3
- charset-normalizer=2.0.4=pyhd3eb1b0_0
- click=8.1.3=unix_pyhd8ed1ab_2
- comm=0.1.2=py39h06a4308_0
- cryptography=38.0.4=py39h9ce1e76_0
- cuda=11.7.1=0
- cuda-cccl=11.7.91=0
- cuda-command-line-tools=11.7.1=0
- cuda-compiler=11.7.1=0
- cuda-cudart=11.7.99=0
- cuda-cudart-dev=11.7.99=0
- cuda-cuobjdump=11.7.91=0
- cuda-cupti=11.7.101=0
- cuda-cuxxfilt=11.7.91=0
- cuda-demo-suite=12.0.140=0
- cuda-documentation=12.0.140=0
- cuda-driver-dev=11.7.99=0
- cuda-gdb=12.0.140=0
- cuda-libraries=11.7.1=0
- cuda-libraries-dev=11.7.1=0
- cuda-memcheck=11.8.86=0
- cuda-nsight=12.0.140=0
- cuda-nsight-compute=12.0.1=0
- cuda-nvcc=11.7.99=0
- cuda-nvdisasm=12.0.140=0
- cuda-nvml-dev=11.7.91=0
- cuda-nvprof=12.0.146=0
- cuda-nvprune=11.7.91=0
- cuda-nvrtc=11.7.99=0
- cuda-nvrtc-dev=11.7.99=0
- cuda-nvtx=11.7.91=0
- cuda-nvvp=12.0.146=0
- cuda-runtime=11.7.1=0
- cuda-sanitizer-api=12.0.140=0
- cuda-toolkit=11.7.1=0
- cuda-tools=11.7.1=0
- cuda-visual-tools=11.7.1=0
- dbus=1.13.18=hb2f20db_0
- debugpy=1.5.1=py39h295c915_0
- decorator=5.1.1=pyhd3eb1b0_0
- defusedxml=0.7.1=pyhd3eb1b0_0
- docker-pycreds=0.4.0=py_0
- entrypoints=0.4=py39h06a4308_0
- executing=0.8.3=pyhd3eb1b0_0
- expat=2.4.9=h6a678d5_0
- ffmpeg=4.3=hf484d3e_0
- flit-core=3.6.0=pyhd3eb1b0_0
- fontconfig=2.14.1=h52c9d5c_1
- freetype=2.12.1=h4a9f257_0
- frozenlist=1.3.3=py39h5eee18b_0
- gds-tools=1.5.1.14=0
- giflib=5.2.1=h5eee18b_1
- gitdb=4.0.10=pyhd8ed1ab_0
- gitpython=3.1.30=pyhd8ed1ab_0
- glib=2.69.1=he621ea3_2
- gmp=6.2.1=h295c915_3
- gnutls=3.6.15=he1e5248_0
- google-auth=2.6.0=pyhd3eb1b0_0
- google-auth-oauthlib=0.4.4=pyhd3eb1b0_0
- grpcio=1.42.0=py39hce63b2e_0
- gst-plugins-base=1.14.0=h8213a91_2
- gstreamer=1.14.0=h28cd5cc_2
- icu=58.2=he6710b0_3
- idna=3.4=py39h06a4308_0
- importlib-metadata=4.11.3=py39h06a4308_0
- intel-openmp=2021.4.0=h06a4308_3561
- ipykernel=6.19.2=py39hb070fc8_0
- ipython=8.8.0=py39h06a4308_0
- ipython_genutils=0.2.0=pyhd3eb1b0_1
- ipywidgets=7.6.5=pyhd3eb1b0_1
- jedi=0.18.1=py39h06a4308_1
- jinja2=3.1.2=py39h06a4308_0
- jpeg=9e=h7f8727e_0
- json5=0.9.6=pyhd3eb1b0_0
- jsonschema=4.16.0=py39h06a4308_0
- jupyter=1.0.0=py39h06a4308_8
- jupyter_client=7.4.9=py39h06a4308_0
- jupyter_console=6.4.4=py39h06a4308_0
- jupyter_core=5.1.1=py39h06a4308_0
- jupyter_server=1.23.4=py39h06a4308_0
- jupyterlab=3.5.3=py39h06a4308_0
- jupyterlab_pygments=0.1.2=py_0
- jupyterlab_server=2.16.5=py39h06a4308_0
- jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
- krb5=1.19.4=h568e23c_0
- lame=3.100=h7b6447c_0
- lcms2=2.12=h3be6417_0
- ld_impl_linux-64=2.38=h1181459_1
- lerc=3.0=h295c915_0
- libclang=10.0.1=default_hb85057a_2
- libcublas=11.10.3.66=0
- libcublas-dev=11.10.3.66=0
- libcufft=10.7.2.124=h4fbf590_0
- libcufft-dev=10.7.2.124=h98a8f43_0
- libcufile=1.5.1.14=0
- libcufile-dev=1.5.1.14=0
- libcurand=10.3.1.124=0
- libcurand-dev=10.3.1.124=0
- libcusolver=11.4.0.1=0
- libcusolver-dev=11.4.0.1=0
- libcusparse=11.7.4.91=0
- libcusparse-dev=11.7.4.91=0
- libdeflate=1.8=h7f8727e_5
- libedit=3.1.20221030=h5eee18b_0
- libevent=2.1.12=h8f2d780_0
- libffi=3.4.2=h6a678d5_6
- libgcc-ng=11.2.0=h1234567_1
- libgomp=11.2.0=h1234567_1
- libiconv=1.16=h7f8727e_2
- libidn2=2.3.2=h7f8727e_0
- libllvm10=10.0.1=hbcb73fb_5
- libnpp=11.7.4.75=0
- libnpp-dev=11.7.4.75=0
- libnvjpeg=11.8.0.2=0
- libnvjpeg-dev=11.8.0.2=0
- libpng=1.6.37=hbc83047_0
- libpq=12.9=h16c4e8d_3
- libprotobuf=3.20.3=he621ea3_0
- libsodium=1.0.18=h7b6447c_0
- libstdcxx-ng=11.2.0=h1234567_1
- libtasn1=4.16.0=h27cfd23_0
- libtiff=4.5.0=h6a678d5_1
- libunistring=0.9.10=h27cfd23_0
- libuuid=1.41.5=h5eee18b_0
- libwebp=1.2.4=h11a3e52_0
- libwebp-base=1.2.4=h5eee18b_0
- libxcb=1.15=h7f8727e_0
- libxkbcommon=1.0.1=hfa300c1_0
- libxml2=2.9.14=h74e7548_0
- libxslt=1.1.35=h4e12654_0
- lxml=4.9.1=py39h1edc446_0
- lz4-c=1.9.4=h6a678d5_0
- markdown=3.4.1=py39h06a4308_0
- markupsafe=2.1.1=py39h7f8727e_0
- matplotlib-inline=0.1.6=py39h06a4308_0
- mistune=0.8.4=py39h27cfd23_1000
- mkl=2021.4.0=h06a4308_640
- mkl-service=2.4.0=py39h7f8727e_0
- mkl_fft=1.3.1=py39hd3c417c_0
- mkl_random=1.2.2=py39h51133e4_0
- multidict=6.0.2=py39h5eee18b_0
- mypy_extensions=0.4.3=py39h06a4308_1
- nbclassic=0.4.8=py39h06a4308_0
- nbclient=0.5.13=py39h06a4308_0
- nbconvert=6.5.4=py39h06a4308_0
- nbformat=5.7.0=py39h06a4308_0
- ncurses=6.4=h6a678d5_0
- nest-asyncio=1.5.6=py39h06a4308_0
- nettle=3.7.3=hbbd107a_1
- notebook=6.5.2=py39h06a4308_0
- notebook-shim=0.2.2=py39h06a4308_0
- nsight-compute=2022.4.1.6=0
- nspr=4.33=h295c915_0
- nss=3.74=h0370c37_0
- numexpr=2.8.4=py39he184ba9_0
- numpy=1.23.5=py39h14f4228_0
- numpy-base=1.23.5=py39h31eccc5_0
- oauthlib=3.2.1=py39h06a4308_0
- openh264=2.1.1=h4ff587b_0
- openssl=1.1.1t=h7f8727e_0
- packaging=22.0=py39h06a4308_0
- pandas=1.5.2=py39h417a72b_0
- pandocfilters=1.5.0=pyhd3eb1b0_0
- parso=0.8.3=pyhd3eb1b0_0
- pathspec=0.10.3=py39h06a4308_0
- pathtools=0.1.2=py_1
- pcre=8.45=h295c915_0
- pexpect=4.8.0=pyhd3eb1b0_3
- pickleshare=0.7.5=pyhd3eb1b0_1003
- pillow=9.3.0=py39h6a678d5_2
- pip=22.3.1=py39h06a4308_0
- platformdirs=2.5.2=py39h06a4308_0
- ply=3.11=py39h06a4308_0
- prometheus_client=0.14.1=py39h06a4308_0
- prompt-toolkit=3.0.36=py39h06a4308_0
- prompt_toolkit=3.0.36=hd3eb1b0_0
- protobuf=3.20.3=py39h6a678d5_0
- psutil=5.9.0=py39h5eee18b_0
- ptyprocess=0.7.0=pyhd3eb1b0_2
- pure_eval=0.2.2=pyhd3eb1b0_0
- pyasn1=0.4.8=pyhd3eb1b0_0
- pyasn1-modules=0.2.8=py_0
- pycparser=2.21=pyhd3eb1b0_0
- pygments=2.11.2=pyhd3eb1b0_0
- pyjwt=2.4.0=py39h06a4308_0
- pyopenssl=22.0.0=pyhd3eb1b0_0
- pyqt=5.15.7=py39h6a678d5_1
- pyqt5-sip=12.11.0=py39h6a678d5_1
- pyrsistent=0.18.0=py39heee7806_0
- pysocks=1.7.1=py39h06a4308_0
- python=3.9.16=h7a1cb2a_0
- python-dateutil=2.8.2=pyhd3eb1b0_0
- python-fastjsonschema=2.16.2=py39h06a4308_0
- python_abi=3.9=2_cp39
- pytorch=1.13.1=py3.9_cuda11.7_cudnn8.5.0_0
- pytorch-cuda=11.7=h67b0de4_1
- pytorch-mutex=1.0=cuda
- pytz=2022.7=py39h06a4308_0
- pyyaml=6.0=py39hb9d737c_4
- pyzmq=23.2.0=py39h6a678d5_0
- qt-main=5.15.2=h327a75a_7
- qt-webengine=5.15.9=hd2b0992_4
- qtconsole=5.4.0=py39h06a4308_0
- qtpy=2.2.0=py39h06a4308_0
- qtwebkit=5.212=h4eab89a_4
- readline=8.2=h5eee18b_0
- requests=2.28.1=py39h06a4308_0
- requests-oauthlib=1.3.0=py_0
- rsa=4.7.2=pyhd3eb1b0_1
- send2trash=1.8.0=pyhd3eb1b0_1
- sentry-sdk=1.15.0=pyhd8ed1ab_0
- setproctitle=1.2.2=py39hb9d737c_2
- setuptools=65.6.3=py39h06a4308_0
- sip=6.6.2=py39h6a678d5_0
- six=1.16.0=pyhd3eb1b0_1
- smmap=3.0.5=pyh44b312d_0
- sniffio=1.2.0=py39h06a4308_1
- soupsieve=2.3.2.post1=py39h06a4308_0
- sqlite=3.40.1=h5082296_0
- stack_data=0.2.0=pyhd3eb1b0_0
- tensorboard=2.10.0=py39h06a4308_0
- tensorboard-data-server=0.6.1=py39h52d8a92_0
- tensorboard-plugin-wit=1.8.1=py39h06a4308_0
- terminado=0.17.1=py39h06a4308_0
- tinycss2=1.2.1=py39h06a4308_0
- tk=8.6.12=h1ccaba5_0
- toml=0.10.2=pyhd3eb1b0_0
- tomli=2.0.1=py39h06a4308_0
- torchaudio=0.13.1=py39_cu117
- torchvision=0.14.1=py39_cu117
- tornado=6.2=py39h5eee18b_0
- traitlets=5.7.1=py39h06a4308_0
- typing-extensions=4.4.0=py39h06a4308_0
- typing_extensions=4.4.0=py39h06a4308_0
- tzdata=2022g=h04d1e81_0
- urllib3=1.26.14=py39h06a4308_0
- wandb=0.13.10=pyhd8ed1ab_0
- wcwidth=0.2.5=pyhd3eb1b0_0
- webencodings=0.5.1=py39h06a4308_1
- websocket-client=0.58.0=py39h06a4308_4
- werkzeug=2.2.2=py39h06a4308_0
- wheel=0.37.1=pyhd3eb1b0_0
- widgetsnbextension=3.5.2=py39h06a4308_0
- xz=5.2.10=h5eee18b_1
- yaml=0.2.5=h7f98852_2
- yarl=1.8.1=py39h5eee18b_0
- zeromq=4.3.4=h2531618_0
- zipp=3.11.0=py39h06a4308_0
- zlib=1.2.13=h5eee18b_0
- zstd=1.5.2=ha4553b6_0
prefix: /local/data1/pathi619/envs/Recommender-Models
Empty file added recommenders/__init__.py
Empty file.
Empty file.
96 changes: 96 additions & 0 deletions recommenders/data_utils/item_frequency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import os
import pickle
import json
import numpy as np
import pandas as pd


def get_low_frequency_items(data_dir, quantile, item_col_name):
# Read data
if "csv" in str(data_dir):
data = pd.read_pickle(data_dir)
elif "json" in str(data_dir):
data = pd.read_json(data_dir, orient="records", lines=True)
else:
data = pd.read_pickle(data_dir)

# Get sorted item frequency
sorted_freq = data[item_col_name].value_counts().sort_values(ascending=False)
freq_thresh = np.quantile(sorted_freq, q=quantile)

# Get itemIDs with frequency smaller than thresh
unpopular_items = sorted_freq[sorted_freq < freq_thresh].index.values

return unpopular_items


def save_freq_to_file(data_dir, target_dir, quantile=0.9, item_col_name="item_id"):
unpopular_items = get_low_frequency_items(
data_dir, quantile=quantile, item_col_name=item_col_name
)

unpopular_items = unpopular_items.tolist()

with open(os.path.join(target_dir, "unpopular_items.json"), "w") as f:
json.dump(unpopular_items, f)


def load_unpopular_items(data_dir):
"""
Loads list from directory and returns it as set.
"""
if "pkl" in data_dir:
with open(data_dir, "rb") as f:
unpopular_items = pickle.load(f)
else:
with open(data_dir, "r") as f:
unpopular_items = json.load(f)
return set(unpopular_items)


if __name__ == "__main__":
import os
import pathlib
from argparse import ArgumentParser

parser = ArgumentParser(description="Write unpopular items to file")

parser.add_argument(
"-f",
"--filename",
help="all events file path",
metavar="FILE",
required=True,
)

parser.add_argument(
"-t",
"--targetdir",
help="target directory where to save file",
required=True,
)

parser.add_argument(
"-q",
"--quantile",
help="quantile to check - e.g. 0.9, 90% of all items included",
required=False,
type=float,
)

parser.add_argument(
"-i",
"--item_col_name",
help="name of itemID column in data file",
required=False,
)

# Get filepath as arg
args = parser.parse_args()
config_path = pathlib.Path(args.filename)
save_freq_to_file(
config_path.absolute(),
quantile=args.quantile,
target_dir=args.targetdir,
item_col_name=args.item_col_name,
)
Loading

0 comments on commit b7442a9

Please sign in to comment.