o19s
diff --git a/‎.github/workflows/main.yml
+17 b/‎.github/workflows/main.yml
+17
diff --git a/‎.gitignore
+28 b/‎.gitignore
+28
diff --git a/‎Dockerfile
+20 b/‎Dockerfile
+20
diff --git a/‎action.yml
+5 b/‎action.yml
+5
diff --git a/‎jupyterlite/files/README.md
+5 b/‎jupyterlite/files/README.md
+5
diff --git a/‎jupyterlite/files/examples/Snapshot Compare With Charts.ipynb
+121 b/‎jupyterlite/files/examples/Snapshot Compare With Charts.ipynb
+121
diff --git a/‎jupyterlite/files/examples/Snapshot Compare.ipynb
+142 b/‎jupyterlite/files/examples/Snapshot Compare.ipynb
+142
diff --git a/‎jupyterlite/files/examples/Snapshot Jaccard.ipynb
+199 b/‎jupyterlite/files/examples/Snapshot Jaccard.ipynb
+199
@@ -0,0 +1,17 @@
+name: Main
+on: push
+jobs:
+  jupyter-lite-build-release:
+    runs-on: ubuntu-latest
+    name: Package and release the Juypter Lite app
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Jupyter Lite Build
+        uses: ./
+        id: jupyter-lite-build
+      - name: Release
+        if: startsWith(github.ref, 'refs/tags/')
+        uses: softprops/action-gh-release@v1
+        with:
+          files: jupyter-lite-build.tgz
@@ -0,0 +1,28 @@
+# VIM swp
+*.swp
+
+# Project files
+.idea/
+
+# Mac
+.DS_Store
+
+# Ignore all logfiles and tempfiles.
+/log/*
+!/log/.keep
+/tmp/*
+!/tmp/pids
+/tmp/pids/*
+!/tmp/pids/.keep
+
+# Ignore local env file
+/.envrc
+.env
+
+# Jupyterlite
+public/notebooks
+public/notebooks-*
+.jupyterlite.doit.db
+
+# Built artifact
+jupyter-lite-build.tgz
@@ -0,0 +1,20 @@
+FROM python:3.11
+RUN pip install --no-cache-dir --upgrade pip
+
+COPY jupyterlite /build/
+
+RUN pip install --no-cache-dir -r /build/requirements.txt && rm -f requirements.txt
+
+# Use the target dir, i.e. a mounted path from where the built artifact can be retrieved from
+# Use the github workspace as the default.
+# For a local run use e.g. /dist with `docker run -it --rm -e TARGET_DIR=/dist -v "$(pwd)":/dist $(docker build -q .)`
+ENV TARGET_DIR=${GITHUB_WORKSPACE:-/github/workspace}
+
+# Build the Jupyter lite web app, the pyodide kernel option is passed for offline availability
+# (see https://jupyterlite.readthedocs.io/en/latest/howto/configure/advanced/offline.html), the
+# Kernel version is taken from https://github.com/jupyterlite/pyodide-kernel/blob/main/packages/pyodide-kernel/package.json
+CMD jupyter lite build \
+  --lite-dir /build \
+  --pyodide https://github.com/pyodide/pyodide/releases/download/0.23.2/pyodide-0.23.2.tar.bz2 \
+  --output-dir notebooks \
+  && tar -czf ${TARGET_DIR}/jupyter-lite-build.tgz notebooks
@@ -0,0 +1,5 @@
+name: 'Jupyter Lite Docker Action'
+description: 'Build Juypter Lite App'
+runs:
+  using: 'docker'
+  image: 'Dockerfile'
@@ -0,0 +1,5 @@
+# Welcome to the Jupyterlite based notebooks
+
+Learn more at https://jupyterlite.readthedocs.io/en/latest/
+
+* ./examples/Snapshot Compare.ipynb is an example of measuring baseline relevancy over time.
@@ -0,0 +1,199 @@
+{
+  "metadata": {
+    "language_info": {
+      "codemirror_mode": {
+        "name": "python",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8"
+    },
+    "kernelspec": {
+      "name": "python",
+      "display_name": "Python (Pyodide)",
+      "language": "python"
+    }
+  },
+  "nbformat_minor": 4,
+  "nbformat": 4,
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": "# Snapshot Jaccard Similarity\n\nTo understand the impact of changes, you can compare the Jaccard Similarity of snapshots.\n\nPlease copy this example and customize it for your own purposes!",
+      "metadata": {}
+    },
+    {
+      "cell_type": "markdown",
+      "source": "### Imports",
+      "metadata": {}
+    },
+    {
+      "cell_type": "code",
+      "source": "import pandas as pd\nimport io\nfrom js import fetch",
+      "metadata": {
+        "trusted": true
+      },
+      "execution_count": 1,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": "## Define the Data You Want",
+      "metadata": {}
+    },
+    {
+      "cell_type": "code",
+      "source": "CASE_ID = 6   # Your Case\nSNAPSHOT_IDS = [1,2]   # Your Snapshots.  Use the Compare Snapshot function in Quepid to see what the specific ID's are of your snapshots.",
+      "metadata": {
+        "trusted": true
+      },
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": "### Jaccard Subroutines",
+      "metadata": {}
+    },
+    {
+      "cell_type": "code",
+      "source": "## Calculation of Jaccard Similarity of List 1 and 2\n\ndef jaccard_similarity(list1, list2):\n    print(list1, list2)\n    if list1 == list2: \n        print('the lists are same')\n        return float(1.0)\n    intersection = len(list(set(list1).intersection(list2)))\n    union = (len(set(list1)) + len(set(list2))) - intersection\n    return float(intersection) / union",
+      "metadata": {
+        "trusted": true
+      },
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": "## Construction of a comparable list from Snapshot blob\n\ndef construct_comparable_list_from_snapshot_blob(snapshot):\n    for data in snapshot:\n        record = data.split(\"\\n\")\n        #print(record)\n        df = pd.DataFrame(record)\n        df[['query','docid','rating']] = df[0].str.split(',',expand=True)\n        ratings_df= df[['query','docid','rating']]\n        \n        # Drop first row as its just column names\n        ratings_mod_df = ratings_df.drop(index=ratings_df.index[0])\n        \n        # Remove '?' if using ispy else the next step can be ignored\n        ratings_mod_df['docid'] = ratings_mod_df['docid'].str.split('?').str.get(0)\n        #print(ratings_mod_df.head(10))\n        \n        return ratings_mod_df",
+      "metadata": {
+        "trusted": true
+      },
+      "execution_count": 4,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": "## Subroutine for calculating Jaccard Similarity between 2 Snapshots\n\ndef jaccard_similarity(A, B):\n    # Compute Jaccard Similarity\n    nominator = set(A).intersection(set(B))\n    denominator = set(A).union(set(B))\n    Jacc_similarity = len(nominator)/len(denominator)\n    #print(Jacc_similarity) \n    return (Jacc_similarity) ",
+      "metadata": {
+        "trusted": true
+      },
+      "execution_count": 5,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": "### Pull data directly from Quepid's snapshot repository to calculate Jaccard Similarity",
+      "metadata": {}
+    },
+    {
+      "cell_type": "code",
+      "source": "# Retrieve from Quepid API from Case id - 6 and Snapshot id - 1\nrating_snapshot_1 = []\nres = await fetch(f'/api/export/ratings/{CASE_ID}.csv?file_format=basic_snapshot&snapshot_id={SNAPSHOT_IDS[0]}')\nrating_snapshot_1.append(await res.text())\n#print(rating_snapshot_1)\n\n# Retrieve from Quepid API from Case id - 6 and Snapshot id - 2\nrating_snapshot_2 = []\nres = await fetch(f'/api/export/ratings/{CASE_ID}.csv?file_format=basic_snapshot&snapshot_id={SNAPSHOT_IDS[1]}')\nrating_snapshot_2.append(await res.text())\n#print(rating_snapshot_2)",
+      "metadata": {
+        "tags": [],
+        "trusted": true
+      },
+      "execution_count": 6,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": "### Read and transform data in a dataframe",
+      "metadata": {}
+    },
+    {
+      "cell_type": "code",
+      "source": "df1 = construct_comparable_list_from_snapshot_blob(rating_snapshot_1)\ndf2 = construct_comparable_list_from_snapshot_blob(rating_snapshot_2)\ndf1 = df1.groupby('query')['docid'].apply(list).reset_index(name=\"results\")\ndf2 = df2.groupby('query')['docid'].apply(list).reset_index(name=\"results\")\n\ndf_jaccard = df1[['query']].copy()\ndf_jaccard['baseline_results'] = df1['results']\ndf_jaccard['comparison_results'] = df2['results']\ndf_jaccard['baseline_count'] = df_jaccard.apply(lambda row: len(row.baseline_results), axis = 1)\ndf_jaccard['comparison_count'] = df_jaccard.apply(lambda row: len(row.comparison_results), axis = 1)",
+      "metadata": {
+        "tags": [],
+        "trusted": true
+      },
+      "execution_count": 7,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": "### Add column with jaccard similarity",
+      "metadata": {}
+    },
+    {
+      "cell_type": "code",
+      "source": "df_jaccard['jaccard_similarity'] = df_jaccard.apply(lambda row:jaccard_similarity(row.baseline_results, row.comparison_results), axis = 1)",
+      "metadata": {
+        "trusted": true
+      },
+      "execution_count": 8,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": "df_jaccard.head(10)",
+      "metadata": {
+        "trusted": true
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "execution_count": 9,
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": "                            query  \\\n0                                   \n1  movie about a boxer who climbs   \n2                       star trek   \n3                       star wars   \n\n                                    baseline_results  \\\n0                                             [None]   \n1  [45317, 826, 46838, 683716, 769, 570731, 680, ...   \n2  [193, 199, 188927, 200, 13475, 152, 201, 154, ...   \n3  [11, 12180, 181808, 330459, 348350, 140607, 18...   \n\n                                  comparison_results  baseline_count  \\\n0                                             [None]               1   \n1  [45317, 826, 46838, 683716, 769, 570731, 680, ...              10   \n2  [13363, 193, 199, 154, 152, 174, 157, 168, 188...              10   \n3  [12180, 322506, 85, 1895, 18046, 11, 330459, 1...              10   \n\n   comparison_count  jaccard_similarity  \n0                 1            1.000000  \n1                10            1.000000  \n2                10            0.666667  \n3                10            0.538462  ",
+            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>query</th>\n      <th>baseline_results</th>\n      <th>comparison_results</th>\n      <th>baseline_count</th>\n      <th>comparison_count</th>\n      <th>jaccard_similarity</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td></td>\n      <td>[None]</td>\n      <td>[None]</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1.000000</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>movie about a boxer who climbs</td>\n      <td>[45317, 826, 46838, 683716, 769, 570731, 680, ...</td>\n      <td>[45317, 826, 46838, 683716, 769, 570731, 680, ...</td>\n      <td>10</td>\n      <td>10</td>\n      <td>1.000000</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>star trek</td>\n      <td>[193, 199, 188927, 200, 13475, 152, 201, 154, ...</td>\n      <td>[13363, 193, 199, 154, 152, 174, 157, 168, 188...</td>\n      <td>10</td>\n      <td>10</td>\n      <td>0.666667</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>star wars</td>\n      <td>[11, 12180, 181808, 330459, 348350, 140607, 18...</td>\n      <td>[12180, 322506, 85, 1895, 18046, 11, 330459, 1...</td>\n      <td>10</td>\n      <td>10</td>\n      <td>0.538462</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": "### Export data as CSV for reporting and sharing purpose",
+      "metadata": {}
+    },
+    {
+      "cell_type": "code",
+      "source": "df_jaccard.to_csv('jaccard_similarity_results.csv', encoding='utf-8', index=False)",
+      "metadata": {
+        "trusted": true
+      },
+      "execution_count": 10,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": "df_jaccard['jaccard_similarity'].mean()",
+      "metadata": {
+        "trusted": true
+      },
+      "execution_count": 11,
+      "outputs": [
+        {
+          "execution_count": 11,
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": "0.8012820512820512"
+          },
+          "metadata": {}
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": "",
+      "metadata": {},
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": "",
+      "metadata": {},
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}