diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..26d33521 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/SUCESSFUL_LL_APPROACH.ipynb b/SUCESSFUL_LL_APPROACH.ipynb new file mode 100644 index 00000000..9e758436 --- /dev/null +++ b/SUCESSFUL_LL_APPROACH.ipynb @@ -0,0 +1,1641 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "SUCESSFUL_LL_APPROACH.ipynb", + "provenance": [], + "collapsed_sections": [], + "authorship_tag": "ABX9TyNXvNjLnuleHrTWI+/Wy02y", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "qEu-Kgavdp35", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "8a4ceca7-d87c-4661-8b91-a446df3224ba" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/gdrive\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/gdrive')" + ] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import numpy\n", + "print(torch.cuda.device_count())\n", + "print(torch.cuda.get_device_name(0))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pHjt0iX5bsRk", + "outputId": "37d37cc7-3cf0-44e3-85ed-93f5b93af360" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1\n", + "Tesla K80\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install -q condacolab\n", + "import condacolab\n", + "condacolab.install()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NmrKqS-jE7Pa", + "outputId": "1bd9c028-f32a-4211-e02f-4842b6739932" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "⏬ Downloading https://github.com/jaimergp/miniforge/releases/latest/download/Mambaforge-colab-Linux-x86_64.sh...\n", + "πŸ“¦ Installing...\n", + "πŸ“Œ Adjusting configuration...\n", + "🩹 Patching environment...\n", + "⏲ Done in 0:00:36\n", + "πŸ” Restarting kernel...\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import condacolab\n", + "condacolab.check()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QPO8EymEFHgS", + "outputId": "32424e44-e22f-4e5c-ef34-c641ae4c414d" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "✨🍰✨ Everything looks OK!\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "cd gdrive/MyDrive/PyContinual-main/PyContinual-main/src" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OiAq5U_zhrA0", + "outputId": "e250ad01-5e22-4560-fa5e-ff893cee7870" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/gdrive/MyDrive/PyContinual-main/PyContinual-main/src\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Using conda in Colab https://inside-machinelearning.com/en/how-to-install-use-conda-on-google-colab/" + ], + "metadata": { + "id": "BIToLrFspncE" + } + }, + { + "cell_type": "markdown", + "source": [ + "Using other https://pypi.org/project/condacolab/" + ], + "metadata": { + "id": "KqSnDJShZ4HN" + } + }, + { + "cell_type": "code", + "source": [ + "!python --version # now returns Python 3.6.5 :: Anaconda, Inc." + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QzIBvcF9K9eX", + "outputId": "101bac79-d7cc-4205-ce85-d339f66dcb99" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Python 3.7.10\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!conda config --add channels conda-forge\n", + "!conda config --add channels huggingface \n", + "!conda config --add channels pytorch \n" + ], + "metadata": { + "id": "7qFVnqZjHW6v" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "stegCcCiRtY0" + } + }, + { + "cell_type": "markdown", + "source": [ + "https://colab.research.google.com/drive/1c_RGCgQeLHVXlF44LyOFjfUW32CmG6BP" + ], + "metadata": { + "id": "B8Wdtnk3mv-W" + } + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "vKlz6GkHRuYL" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install -r piprequirements.txt" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wO_yjgAeTl6J", + "outputId": "99a66ded-84ca-453c-f10d-1b97a251025a" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting transformers==4.10.2\n", + " Downloading transformers-4.10.2-py3-none-any.whl (2.8 MB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2.8 MB 5.4 MB/s \n", + "\u001b[?25hCollecting zipp==3.6.0\n", + " Downloading zipp-3.6.0-py3-none-any.whl (5.3 kB)\n", + "Collecting tokenizers==0.10.3\n", + " Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.3 MB 26.7 MB/s \n", + "\u001b[?25hCollecting pyparsing==2.4.7\n", + " Downloading pyparsing-2.4.7-py2.py3-none-any.whl (67 kB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 67 kB 5.5 MB/s \n", + "\u001b[?25hCollecting packaging==21.2\n", + " Downloading packaging-21.2-py3-none-any.whl (40 kB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 40 kB 5.4 MB/s \n", + "\u001b[?25hCollecting quadprog==0.1.10\n", + " Downloading quadprog-0.1.10.tar.gz (121 kB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 121 kB 47.2 MB/s \n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n", + "Collecting pyyaml==6.0\n", + " Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 596 kB 44.2 MB/s \n", + "\u001b[?25hCollecting filelock==3.3.2\n", + " Downloading filelock-3.3.2-py3-none-any.whl (9.7 kB)\n", + "Collecting importlib-metadata==4.8.1\n", + " Downloading importlib_metadata-4.8.1-py3-none-any.whl (17 kB)\n", + "Collecting huggingface-hub==0.1.1\n", + " Downloading huggingface_hub-0.1.1-py3-none-any.whl (59 kB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 59 kB 6.8 MB/s \n", + "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.7/site-packages (from huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (4.59.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/site-packages (from huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (2.25.1)\n", + "Collecting typing-extensions>=3.7.4.3\n", + " Downloading typing_extensions-4.0.1-py3-none-any.whl (22 kB)\n", + "Collecting numpy\n", + " Downloading numpy-1.21.5-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 15.7 MB 36.2 MB/s \n", + "\u001b[?25hCollecting sacremoses\n", + " Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 895 kB 40.2 MB/s \n", + "\u001b[?25hCollecting regex!=2019.12.17\n", + " Downloading regex-2021.11.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (749 kB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 749 kB 45.8 MB/s \n", + "\u001b[?25hRequirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/site-packages (from requests->huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (2020.12.5)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.7/site-packages (from requests->huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (4.0.0)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/site-packages (from requests->huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (2.10)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/site-packages (from requests->huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (1.26.3)\n", + "Collecting joblib\n", + " Downloading joblib-1.1.0-py2.py3-none-any.whl (306 kB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 306 kB 48.7 MB/s \n", + "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.7/site-packages (from sacremoses->transformers==4.10.2->-r piprequirements.txt (line 4)) (1.15.0)\n", + "Collecting click\n", + " Downloading click-8.0.3-py3-none-any.whl (97 kB)\n", + "\u001b[K |β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 97 kB 5.6 MB/s \n", + "\u001b[33mWARNING: The candidate selected for download or install is a yanked version: 'quadprog' candidate (version 0.1.10 at https://files.pythonhosted.org/packages/78/7a/f0355bad3cf730747cd8971dd8b3b826a802a4f8b11648748af964b18d71/quadprog-0.1.10.tar.gz#sha256=f13bf9609593781a5686ccfd8b38188542dc3c6e00129574817d049fb19ce966 (from https://pypi.org/simple/quadprog/))\n", + "Reason for being yanked: Suffers from https://github.com/quadprog/quadprog/issues/32\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: quadprog\n", + " Building wheel for quadprog (PEP 517) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for quadprog: filename=quadprog-0.1.10-cp37-cp37m-linux_x86_64.whl size=313103 sha256=669d3fefeb1d547afff57157cf676c91ffa82ba8cbb2d69aa939b8b896c2733c\n", + " Stored in directory: /root/.cache/pip/wheels/e9/af/76/c5335ed32afc1284e6100b86588d1f75f5c4906fa26df6efda\n", + "Successfully built quadprog\n", + "Installing collected packages: zipp, typing-extensions, pyparsing, importlib-metadata, regex, pyyaml, packaging, joblib, filelock, click, tokenizers, sacremoses, numpy, huggingface-hub, transformers, quadprog\n", + "Successfully installed click-8.0.3 filelock-3.3.2 huggingface-hub-0.1.1 importlib-metadata-4.8.1 joblib-1.1.0 numpy-1.21.5 packaging-21.2 pyparsing-2.4.7 pyyaml-6.0 quadprog-0.1.10 regex-2021.11.10 sacremoses-0.0.46 tokenizers-0.10.3 transformers-4.10.2 typing-extensions-4.0.1 zipp-3.6.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!mamba env update -n base -f _testrequirements.txt" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xfeHZQbUqhWo", + "outputId": "eff92688-e032-4d0b-c994-10cb8a596981" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K\rconda-forge/linux-64 [] (00m:00s) \n", + "\u001b[1A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[1A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[2K\rhuggingface/linux-64 [] (--:--) Finalizing...\n", + "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[2K\rhuggingface/linux-64 [] (--:--) Done\n", + "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[2K\rhuggingface/linux-64 [] (00m:00s) Done\n", + "\u001b[2A\u001b[0K\u001b[2K\rhuggingface/linux-64 [] (00m:00s) Done\n", + "\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[1A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[2K\rhuggingface/noarch [] (--:--) Finalizing...\n", + "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[2K\rhuggingface/noarch [] (--:--) Done\n", + "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[2K\rhuggingface/noarch [] (00m:00s) Done\n", + "\u001b[2A\u001b[0K\u001b[2K\rhuggingface/noarch [] (00m:00s) Done\n", + "\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[1A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[2K\rpytorch/noarch [] (--:--) Finalizing...\n", + "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[2K\rpytorch/noarch [] (--:--) Done\n", + "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "\u001b[2K\rpytorch/noarch [] (00m:00s) Done\n", + "\u001b[2A\u001b[0K\u001b[2K\rpytorch/noarch [] (00m:00s) Done\n", + "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "pytorch/linux-64 [] (--:--) Finalizing...\n", + "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "pytorch/linux-64 [] (--:--) Done\n", + "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "pytorch/linux-64 [] (00m:00s) Done\n", + "pytorch/linux-64 [] (00m:00s) Done\n", + "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) \n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) \n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n", + "pkgs/r/noarch [] (00m:00s) \n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n", + "pkgs/r/noarch [] (00m:00s) 312 KB / ?? (587.59 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n", + "pkgs/r/noarch [] (00m:00s) Finalizing...\n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n", + "pkgs/r/noarch [] (00m:00s) Done\n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n", + "pkgs/r/noarch [] (00m:00s) Done\n", + "pkgs/r/noarch [] (00m:00s) Done\n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n", + "pkgs/r/linux-64 [] (00m:00s) \n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n", + "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n", + "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n", + "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) Finalizing...\n", + "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) Done\n", + "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/main/noarch [] (00m:00s) Done\n", + "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n", + "pkgs/main/noarch [] (00m:00s) Done\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n", + "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n", + "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n", + "pkgs/r/linux-64 [] (00m:00s) Finalizing...\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n", + "pkgs/r/linux-64 [] (00m:00s) Done\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n", + "pkgs/r/linux-64 [] (00m:00s) Done\n", + "pkgs/r/linux-64 [] (00m:00s) Done\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) \n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 4 MB / ?? (3.26 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 4 MB / ?? (3.26 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n", + "conda-forge/linux-64 [] (00m:00s) 4 MB / ?? (3.26 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.26 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.26 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.58 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.26 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.58 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.58 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.58 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 5 MB / ?? (3.22 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.58 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n", + "conda-forge/noarch [] (00m:00s) 5 MB / ?? (3.22 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) Finalizing...\n", + "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) Done\n", + "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n", + "pkgs/main/linux-64 [] (00m:00s) Done\n", + "pkgs/main/linux-64 [] (00m:00s) Done\n", + "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.47 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.47 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.47 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 6 MB / ?? (3.45 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.47 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 6 MB / ?? (3.45 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n", + "conda-forge/noarch [] (00m:01s) 6 MB / ?? (3.45 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n", + "conda-forge/noarch [] (00m:01s) Finalizing...\n", + "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n", + "conda-forge/noarch [] (00m:01s) Done\n", + "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n", + "conda-forge/noarch [] (00m:01s) Done\n", + "conda-forge/noarch [] (00m:01s) Done\n", + "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n", + "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (2.94 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 6 MB / ?? (2.94 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 9 MB / ?? (3.71 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 9 MB / ?? (3.71 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 9 MB / ?? (3.78 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 9 MB / ?? (3.78 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 10 MB / ?? (3.85 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 10 MB / ?? (3.85 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 11 MB / ?? (3.90 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 11 MB / ?? (3.90 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 12 MB / ?? (3.97 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 12 MB / ?? (3.97 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 12 MB / ?? (4.01 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 12 MB / ?? (4.01 MB/s)\n", + "conda-forge/linux-64 [] (00m:02s) 13 MB / ?? (4.02 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 13 MB / ?? (4.02 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 14 MB / ?? (4.03 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 14 MB / ?? (4.03 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 14 MB / ?? (4.06 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 14 MB / ?? (4.06 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 15 MB / ?? (4.09 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 15 MB / ?? (4.09 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 16 MB / ?? (4.13 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 16 MB / ?? (4.13 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 17 MB / ?? (4.16 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 17 MB / ?? (4.16 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 17 MB / ?? (4.19 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 17 MB / ?? (4.19 MB/s)\n", + "conda-forge/linux-64 [] (00m:03s) 18 MB / ?? (4.23 MB/s)\n", + "conda-forge/linux-64 [] (00m:04s) 18 MB / ?? (4.23 MB/s)\n", + "conda-forge/linux-64 [] (00m:04s) 19 MB / ?? (4.26 MB/s)\n", + "conda-forge/linux-64 [] (00m:04s) Finalizing...\n", + "conda-forge/linux-64 [] (00m:04s) Done\n", + "conda-forge/linux-64 [] (00m:04s) Done\n", + "conda-forge/linux-64 [] (00m:04s) Done\n", + "\n", + "\n", + "Looking for: ['blas', 'boto3', 'botocore', 'brotlipy', 'bzip2', 'ca-certificates', 'certifi', 'cffi', 'charset-normalizer', 'click', 'cryptography', 'cudatoolkit', 'ffmpeg', 'filelock', 'freetype', 'giflib', 'gmp', 'gnutls', 'idna', 'importlib-metadata', 'intel-openmp', 'jmespath', 'joblib', 'jpeg', 'lame', 'lcms2', 'ld_impl_linux-64', 'libffi', 'libgcc-ng', 'libgfortran-ng', 'libgfortran4', 'libgomp', 'libiconv', 'libidn2', 'libpng', 'libstdcxx-ng', 'libtasn1', 'libtiff', 'libunistring', 'libuv', 'libwebp', 'libwebp-base', 'lz4-c', 'mkl', 'mkl-service', 'mkl_fft', 'mkl_random', 'ncurses', 'nettle', 'ninja', 'numpy', 'numpy-base', 'olefile', 'openh264', 'openssl', 'packaging', 'pillow', 'pip', 'pycparser', 'pyopenssl', 'pyparsing', 'pysocks', 'python==3.7.11', 'python-dateutil==2.8.2', 'pytorch==1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0', 'pytorch-mutex==1.0=cuda', 'pyyaml', 'quadprog', 'readline', 'regex', 'requests', 's3transfer', 'sacremoses', 'scikit-learn', 'scipy', 'setuptools', 'six', 'sqlite', 'threadpoolctl', 'tk', 'tokenizers', 'torchaudio', 'torchvision', 'tqdm', 'transformers=4.10.2', 'typing_extensions', 'urllib3', 'wheel', 'xz', 'zipp', 'zlib', 'zstd']\n", + "\n", + "\n", + "Transaction\n", + "\n", + " Prefix: /usr/local\n", + "\n", + " Updating specs:\n", + "\n", + " - blas\n", + " - boto3\n", + " - botocore\n", + " - brotlipy\n", + " - bzip2\n", + " - ca-certificates\n", + " - certifi\n", + " - cffi\n", + " - charset-normalizer\n", + " - click\n", + " - cryptography\n", + " - cudatoolkit\n", + " - ffmpeg\n", + " - filelock\n", + " - freetype\n", + " - giflib\n", + " - gmp\n", + " - gnutls\n", + " - idna\n", + " - importlib-metadata\n", + " - intel-openmp\n", + " - jmespath\n", + " - joblib\n", + " - jpeg\n", + " - lame\n", + " - lcms2\n", + " - ld_impl_linux-64\n", + " - libffi\n", + " - libgcc-ng\n", + " - libgfortran-ng\n", + " - libgfortran4\n", + " - libgomp\n", + " - libiconv\n", + " - libidn2\n", + " - libpng\n", + " - libstdcxx-ng\n", + " - libtasn1\n", + " - libtiff\n", + " - libunistring\n", + " - libuv\n", + " - libwebp\n", + " - libwebp-base\n", + " - lz4-c\n", + " - mkl\n", + " - mkl-service\n", + " - mkl_fft\n", + " - mkl_random\n", + " - ncurses\n", + " - nettle\n", + " - ninja\n", + " - numpy\n", + " - numpy-base\n", + " - olefile\n", + " - openh264\n", + " - openssl\n", + " - packaging\n", + " - pillow\n", + " - pip\n", + " - pycparser\n", + " - pyopenssl\n", + " - pyparsing\n", + " - pysocks\n", + " - python==3.7.11\n", + " - python-dateutil==2.8.2\n", + " - pytorch==1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0\n", + " - pytorch-mutex==1.0=cuda\n", + " - pyyaml\n", + " - quadprog\n", + " - readline\n", + " - regex\n", + " - requests\n", + " - s3transfer\n", + " - sacremoses\n", + " - scikit-learn\n", + " - scipy\n", + " - setuptools\n", + " - six\n", + " - sqlite\n", + " - threadpoolctl\n", + " - tk\n", + " - tokenizers\n", + " - torchaudio\n", + " - torchvision\n", + " - tqdm\n", + " - transformers==4.10.2\n", + " - typing_extensions\n", + " - urllib3\n", + " - wheel\n", + " - xz\n", + " - zipp\n", + " - zlib\n", + " - zstd\n", + "\n", + "\n", + " Package Version Build Channel Size\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────\n", + " Install:\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────\n", + "\n", + "\u001b[32m aom \u001b[00m 3.2.0 h9c3ff4c_2 conda-forge/linux-64 5 MB\n", + "\u001b[32m blas \u001b[00m 1.0 mkl pkgs/main/linux-64 6 KB\n", + "\u001b[32m boto3 \u001b[00m 1.20.26 pyhd8ed1ab_0 conda-forge/noarch 71 KB\n", + "\u001b[32m botocore \u001b[00m 1.23.26 pyhd8ed1ab_0 conda-forge/noarch 5 MB\n", + "\u001b[32m charset-normalizer\u001b[00m 2.0.9 pyhd8ed1ab_0 conda-forge/noarch 34 KB\n", + "\u001b[32m click \u001b[00m 8.0.3 py37h89c1867_1 conda-forge/linux-64 145 KB\n", + "\u001b[32m cudatoolkit \u001b[00m 11.0.221 h6bb024c_0 pkgs/main/linux-64 623 MB\n", + "\u001b[32m cython \u001b[00m 0.29.26 py37hcd2ae1e_0 conda-forge/linux-64 2 MB\n", + "\u001b[32m dataclasses \u001b[00m 0.8 pyhc8e2a94_3 conda-forge/noarch 10 KB\n", + "\u001b[32m ffmpeg \u001b[00m 4.4.1 h6987444_0 conda-forge/linux-64 10 MB\n", + "\u001b[32m filelock \u001b[00m 3.4.2 pyhd8ed1ab_0 conda-forge/noarch 12 KB\n", + "\u001b[32m freetype \u001b[00m 2.11.0 h70c0345_0 pkgs/main/linux-64 618 KB\n", + "\u001b[32m gettext \u001b[00m 0.21.0 hf68c758_0 pkgs/main/linux-64 3 MB\n", + "\u001b[32m giflib \u001b[00m 5.2.1 h516909a_2 conda-forge/linux-64 80 KB\n", + "\u001b[32m gmp \u001b[00m 6.2.1 h58526e2_0 conda-forge/linux-64 806 KB\n", + "\u001b[32m gnutls \u001b[00m 3.6.15 he1e5248_0 pkgs/main/linux-64 1 MB\n", + "\u001b[32m huggingface_hub \u001b[00m 0.2.1 pyhd8ed1ab_0 conda-forge/noarch 49 KB\n", + "\u001b[32m importlib-metadata\u001b[00m 4.10.0 py37h89c1867_0 conda-forge/linux-64 32 KB\n", + "\u001b[32m importlib_metadata\u001b[00m 4.10.0 hd8ed1ab_0 conda-forge/noarch 4 KB\n", + "\u001b[32m intel-openmp \u001b[00m 2021.4.0 h06a4308_3561 pkgs/main/linux-64 4 MB\n", + "\u001b[32m jbig \u001b[00m 2.1 h7f98852_2003 conda-forge/linux-64 43 KB\n", + "\u001b[32m jmespath \u001b[00m 0.10.0 pyh9f0ad1d_0 conda-forge/noarch 21 KB\n", + "\u001b[32m joblib \u001b[00m 1.1.0 pyhd8ed1ab_0 conda-forge/noarch 210 KB\n", + "\u001b[32m jpeg \u001b[00m 9d h516909a_0 conda-forge/linux-64 266 KB\n", + "\u001b[32m lame \u001b[00m 3.100 h14c3975_1001 conda-forge/linux-64 498 KB\n", + "\u001b[32m lcms2 \u001b[00m 2.12 hddcbb42_0 conda-forge/linux-64 443 KB\n", + "\u001b[32m lerc \u001b[00m 3.0 h9c3ff4c_0 conda-forge/linux-64 216 KB\n", + "\u001b[32m libblas \u001b[00m 3.9.0 12_linux64_mkl conda-forge/linux-64 12 KB\n", + "\u001b[32m libcblas \u001b[00m 3.9.0 12_linux64_mkl conda-forge/linux-64 12 KB\n", + "\u001b[32m libdeflate \u001b[00m 1.8 h7f98852_0 conda-forge/linux-64 67 KB\n", + "\u001b[32m libdrm \u001b[00m 2.4.109 h7f98852_0 conda-forge/linux-64 284 KB\n", + "\u001b[32m libgfortran-ng \u001b[00m 7.5.0 h14aa051_19 conda-forge/linux-64 22 KB\n", + "\u001b[32m libgfortran4 \u001b[00m 7.5.0 h14aa051_19 conda-forge/linux-64 1 MB\n", + "\u001b[32m libidn2 \u001b[00m 2.3.2 h7f98852_0 conda-forge/linux-64 98 KB\n", + "\u001b[32m libpciaccess \u001b[00m 0.16 h516909a_0 conda-forge/linux-64 37 KB\n", + "\u001b[32m libpng \u001b[00m 1.6.37 hed695b0_2 conda-forge/linux-64 359 KB\n", + "\u001b[32m libprotobuf \u001b[00m 3.19.1 h780b84a_0 conda-forge/linux-64 3 MB\n", + "\u001b[32m libtasn1 \u001b[00m 4.18.0 h7f98852_0 conda-forge/linux-64 114 KB\n", + "\u001b[32m libtiff \u001b[00m 4.3.0 h6f004c6_2 conda-forge/linux-64 614 KB\n", + "\u001b[32m libunistring \u001b[00m 0.9.10 h14c3975_0 conda-forge/linux-64 1 MB\n", + "\u001b[32m libuv \u001b[00m 1.42.0 h7f98852_0 conda-forge/linux-64 1 MB\n", + "\u001b[32m libva \u001b[00m 2.13.0 h7f98852_1 conda-forge/linux-64 165 KB\n", + "\u001b[32m libvpx \u001b[00m 1.11.0 h9c3ff4c_3 conda-forge/linux-64 1 MB\n", + "\u001b[32m libwebp \u001b[00m 1.2.1 h3452ae3_0 conda-forge/linux-64 84 KB\n", + "\u001b[32m libwebp-base \u001b[00m 1.2.1 h7f98852_0 conda-forge/linux-64 845 KB\n", + "\u001b[32m libzlib \u001b[00m 1.2.11 h36c2ea0_1013 conda-forge/linux-64 59 KB\n", + "\u001b[32m llvm-openmp \u001b[00m 12.0.1 h4bd325d_1 conda-forge/linux-64 3 MB\n", + "\u001b[32m mkl \u001b[00m 2021.4.0 h8d4b97c_729 conda-forge/linux-64 219 MB\n", + "\u001b[32m mkl-service \u001b[00m 2.4.0 py37h402132d_0 conda-forge/linux-64 60 KB\n", + "\u001b[32m mkl_fft \u001b[00m 1.3.1 py37h3e078e5_1 conda-forge/linux-64 206 KB\n", + "\u001b[32m mkl_random \u001b[00m 1.2.2 py37h219a48f_0 conda-forge/linux-64 361 KB\n", + "\u001b[32m nettle \u001b[00m 3.7.3 hbbd107a_1 pkgs/main/linux-64 809 KB\n", + "\u001b[32m ninja \u001b[00m 1.10.2 h4bd325d_1 conda-forge/linux-64 2 MB\n", + "\u001b[32m numpy \u001b[00m 1.21.2 py37h20f2e39_0 pkgs/main/linux-64 23 KB\n", + "\u001b[32m numpy-base \u001b[00m 1.21.2 py37h79a1101_0 pkgs/main/linux-64 5 MB\n", + "\u001b[32m olefile \u001b[00m 0.46 pyh9f0ad1d_1 conda-forge/noarch 32 KB\n", + "\u001b[32m openh264 \u001b[00m 2.1.1 h780b84a_0 conda-forge/linux-64 2 MB\n", + "\u001b[32m openjpeg \u001b[00m 2.4.0 hb52868f_1 conda-forge/linux-64 444 KB\n", + "\u001b[32m packaging \u001b[00m 21.3 pyhd8ed1ab_0 conda-forge/noarch 36 KB\n", + "\u001b[32m pillow \u001b[00m 8.4.0 py37h0f21c89_0 conda-forge/linux-64 706 KB\n", + "\u001b[32m protobuf \u001b[00m 3.19.1 py37hcd2ae1e_1 conda-forge/linux-64 326 KB\n", + "\u001b[32m pyparsing \u001b[00m 3.0.6 pyhd8ed1ab_0 conda-forge/noarch 79 KB\n", + "\u001b[32m python-dateutil \u001b[00m 2.8.2 pyhd8ed1ab_0 conda-forge/noarch 240 KB\n", + "\u001b[32m pytorch \u001b[00m 1.7.0 py3.7_cuda11.0.221_cudnn8.0.3_0 pytorch/linux-64 661 MB\n", + "\u001b[32m pytorch-cpu \u001b[00m 1.1.0 py3.7_cpu_0 pytorch/linux-64 54 MB\n", + "\u001b[32m pytorch-mutex \u001b[00m 1.0 cuda pytorch/noarch 3 KB\n", + "\u001b[32m pyyaml \u001b[00m 6.0 py37h5e8e339_3 conda-forge/linux-64 187 KB\n", + "\u001b[32m quadprog \u001b[00m 0.1.11 py37h2527ec5_0 conda-forge/linux-64 106 KB\n", + "\u001b[32m regex \u001b[00m 2021.11.10 py37h5e8e339_0 conda-forge/linux-64 380 KB\n", + "\u001b[32m s3transfer \u001b[00m 0.5.0 pyhd8ed1ab_0 conda-forge/noarch 55 KB\n", + "\u001b[32m sacremoses \u001b[00m 0.0.46 pyhd8ed1ab_0 conda-forge/noarch 466 KB\n", + "\u001b[32m scikit-learn \u001b[00m 1.0.2 py37hf9e9bfc_0 conda-forge/linux-64 8 MB\n", + "\u001b[32m scipy \u001b[00m 1.7.1 py37h292c36d_2 pkgs/main/linux-64 16 MB\n", + "\u001b[32m svt-av1 \u001b[00m 0.8.7 h9c3ff4c_1 conda-forge/linux-64 3 MB\n", + "\u001b[32m tbb \u001b[00m 2021.5.0 h4bd325d_0 conda-forge/linux-64 2 MB\n", + "\u001b[32m threadpoolctl \u001b[00m 3.0.0 pyh8a188c0_0 conda-forge/noarch 17 KB\n", + "\u001b[32m tokenizers \u001b[00m 0.10.3 py37_0 huggingface/linux-64 3 MB\n", + "\u001b[32m torchaudio \u001b[00m 0.7.0 py37 pytorch/linux-64 10 MB\n", + "\u001b[32m torchvision \u001b[00m 0.10.1 py37h9e046cd_0_cpu conda-forge/linux-64 7 MB\n", + "\u001b[32m transformers \u001b[00m 4.10.2 py_0 huggingface/noarch 1 MB\n", + "\u001b[32m typing-extensions \u001b[00m 4.0.1 hd8ed1ab_0 conda-forge/noarch 8 KB\n", + "\u001b[32m typing_extensions \u001b[00m 4.0.1 pyha770c72_0 conda-forge/noarch 26 KB\n", + "\u001b[32m x264 \u001b[00m 1!161.3030 h7f98852_1 conda-forge/linux-64 2 MB\n", + "\u001b[32m x265 \u001b[00m 3.5 h4bd325d_1 conda-forge/linux-64 7 MB\n", + "\u001b[32m zipp \u001b[00m 3.6.0 pyhd8ed1ab_0 conda-forge/noarch 12 KB\n", + "\n", + " Change:\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────\n", + "\n", + "\u001b[31m _openmp_mutex \u001b[00m 4.5 1_gnu installed \n", + "\u001b[32m _openmp_mutex \u001b[00m 4.5 1_llvm conda-forge/linux-64 5 KB\n", + "\u001b[31m zlib \u001b[00m 1.2.11 h516909a_1010 installed \n", + "\u001b[32m zlib \u001b[00m 1.2.11 h36c2ea0_1013 conda-forge/linux-64 86 KB\n", + "\n", + " Upgrade:\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────\n", + "\n", + "\u001b[31m libarchive \u001b[00m 3.5.1 h3f442fb_1 installed \n", + "\u001b[32m libarchive \u001b[00m 3.5.2 hccf745f_1 conda-forge/linux-64 2 MB\n", + "\u001b[31m libgcc-ng \u001b[00m 9.3.0 h2828fa1_18 installed \n", + "\u001b[32m libgcc-ng \u001b[00m 11.2.0 h1d223b6_11 conda-forge/linux-64 887 KB\n", + "\u001b[31m libgomp \u001b[00m 9.3.0 h2828fa1_18 installed \n", + "\u001b[32m libgomp \u001b[00m 11.2.0 h1d223b6_11 conda-forge/linux-64 427 KB\n", + "\u001b[31m libstdcxx-ng \u001b[00m 9.3.0 h6de172a_18 installed \n", + "\u001b[32m libstdcxx-ng \u001b[00m 11.2.0 he4da1e4_11 conda-forge/linux-64 4 MB\n", + "\u001b[31m libxml2 \u001b[00m 2.9.10 h72842e0_3 installed \n", + "\u001b[32m libxml2 \u001b[00m 2.9.12 h72842e0_0 conda-forge/linux-64 772 KB\n", + "\u001b[31m openssl \u001b[00m 1.1.1j h7f98852_0 installed \n", + "\u001b[32m openssl \u001b[00m 1.1.1l h7f98852_0 conda-forge/linux-64 2 MB\n", + "\u001b[31m python \u001b[00m 3.7.10 hffdb5ce_100_cpython installed \n", + "\u001b[32m python \u001b[00m 3.7.11 h12debd9_0 pkgs/main/linux-64 45 MB\n", + "\u001b[31m sqlite \u001b[00m 3.34.0 h74cdb3f_0 installed \n", + "\u001b[32m sqlite \u001b[00m 3.37.0 hc218d9a_0 pkgs/main/linux-64 999 KB\n", + "\u001b[31m tk \u001b[00m 8.6.10 h21135ba_1 installed \n", + "\u001b[32m tk \u001b[00m 8.6.11 h27826a3_1 conda-forge/linux-64 3 MB\n", + "\u001b[31m zstd \u001b[00m 1.4.9 ha95c52a_0 installed \n", + "\u001b[32m zstd \u001b[00m 1.5.1 ha95c52a_0 conda-forge/linux-64 463 KB\n", + "\n", + " Summary:\n", + "\n", + " Install: 85 packages\n", + " Change: 2 packages\n", + " Upgrade: 10 packages\n", + "\n", + " Total download: 2 GB\n", + "\n", + "────────────────────────────────────────────────────────────────────────────────────────────────────\n", + "\n", + "\n", + "Downloading and Extracting Packages\n", + "libwebp-base-1.2.1 | 845 KB | : 100% 1.0/1 [00:00<00:00, 3.10it/s]\n", + "cython-0.29.26 | 2.2 MB | : 100% 1.0/1 [00:00<00:00, 2.00it/s]\n", + "jmespath-0.10.0 | 21 KB | : 100% 1.0/1 [00:00<00:00, 20.82it/s]\n", + "libblas-3.9.0 | 12 KB | : 100% 1.0/1 [00:00<00:00, 23.20it/s]\n", + "libdrm-2.4.109 | 284 KB | : 100% 1.0/1 [00:00<00:00, 10.61it/s]\n", + "huggingface_hub-0.2. | 49 KB | : 100% 1.0/1 [00:00<00:00, 15.69it/s]\n", + "libpng-1.6.37 | 359 KB | : 100% 1.0/1 [00:00<00:00, 9.20it/s]\n", + "libgfortran-ng-7.5.0 | 22 KB | : 100% 1.0/1 [00:00<00:00, 22.13it/s]\n", + "regex-2021.11.10 | 380 KB | : 100% 1.0/1 [00:00<00:00, 8.06it/s]\n", + "protobuf-3.19.1 | 326 KB | : 100% 1.0/1 [00:00<00:00, 6.84it/s]\n", + "pyparsing-3.0.6 | 79 KB | : 100% 1.0/1 [00:00<00:00, 15.31it/s]\n", + "libva-2.13.0 | 165 KB | : 100% 1.0/1 [00:00<00:00, 12.89it/s]\n", + "pytorch-cpu-1.1.0 | 53.6 MB | : 100% 1.0/1 [00:11<00:00, 11.45s/it] \n", + "lerc-3.0 | 216 KB | : 100% 1.0/1 [00:00<00:00, 10.80it/s]\n", + "botocore-1.23.26 | 5.2 MB | : 100% 1.0/1 [00:02<00:00, 2.18s/it]\n", + "tokenizers-0.10.3 | 2.8 MB | : 100% 1.0/1 [00:00<00:00, 1.45it/s]\n", + "gettext-0.21.0 | 2.6 MB | : 100% 1.0/1 [00:00<00:00, 3.19it/s]\n", + "torchaudio-0.7.0 | 9.8 MB | : 100% 1.0/1 [00:02<00:00, 2.21s/it] \n", + "jpeg-9d | 266 KB | : 100% 1.0/1 [00:00<00:00, 11.11it/s]\n", + "libtasn1-4.18.0 | 114 KB | : 100% 1.0/1 [00:00<00:00, 15.16it/s]\n", + "python-dateutil-2.8. | 240 KB | : 100% 1.0/1 [00:00<00:00, 13.49it/s]\n", + "openjpeg-2.4.0 | 444 KB | : 100% 1.0/1 [00:00<00:00, 7.84it/s]\n", + "libstdcxx-ng-11.2.0 | 4.2 MB | : 100% 1.0/1 [00:00<00:00, 1.47it/s]\n", + "llvm-openmp-12.0.1 | 2.8 MB | : 100% 1.0/1 [00:00<00:00, 1.89it/s]\n", + "packaging-21.3 | 36 KB | : 100% 1.0/1 [00:00<00:00, 17.86it/s]\n", + "click-8.0.3 | 145 KB | : 100% 1.0/1 [00:00<00:00, 13.01it/s]\n", + "pillow-8.4.0 | 706 KB | : 100% 1.0/1 [00:00<00:00, 5.16it/s]\n", + "sqlite-3.37.0 | 999 KB | : 100% 1.0/1 [00:00<00:00, 1.88it/s]\n", + "tk-8.6.11 | 3.3 MB | : 100% 1.0/1 [00:00<00:00, 1.59it/s]\n", + "aom-3.2.0 | 5.4 MB | : 100% 1.0/1 [00:00<00:00, 1.09it/s] \n", + "giflib-5.2.1 | 80 KB | : 100% 1.0/1 [00:00<00:00, 17.39it/s]\n", + "torchvision-0.10.1 | 6.7 MB | : 100% 1.0/1 [00:00<00:00, 1.16it/s]\n", + "libidn2-2.3.2 | 98 KB | : 100% 1.0/1 [00:00<00:00, 19.48it/s]\n", + "mkl_fft-1.3.1 | 206 KB | : 100% 1.0/1 [00:00<00:00, 12.22it/s]\n", + "tbb-2021.5.0 | 2.0 MB | : 100% 1.0/1 [00:00<00:00, 2.92it/s]\n", + "libuv-1.42.0 | 1.0 MB | : 100% 1.0/1 [00:00<00:00, 5.01it/s]\n", + "mkl-service-2.4.0 | 60 KB | : 100% 1.0/1 [00:00<00:00, 20.79it/s]\n", + "threadpoolctl-3.0.0 | 17 KB | : 100% 1.0/1 [00:00<00:00, 22.00it/s]\n", + "numpy-base-1.21.2 | 4.8 MB | : 100% 1.0/1 [00:00<00:00, 2.52it/s]\n", + "svt-av1-0.8.7 | 2.8 MB | : 100% 1.0/1 [00:00<00:00, 2.17it/s]\n", + "scipy-1.7.1 | 16.4 MB | : 100% 1.0/1 [00:00<00:00, 1.28it/s]\n", + "libzlib-1.2.11 | 59 KB | : 100% 1.0/1 [00:00<00:00, 24.81it/s]\n", + "cudatoolkit-11.0.221 | 622.9 MB | : 100% 1.0/1 [00:45<00:00, 45.59s/it] \n", + "libxml2-2.9.12 | 772 KB | : 100% 1.0/1 [00:00<00:00, 4.82it/s]\n", + "gmp-6.2.1 | 806 KB | : 100% 1.0/1 [00:00<00:00, 5.85it/s]\n", + "charset-normalizer-2 | 34 KB | : 100% 1.0/1 [00:00<00:00, 20.05it/s]\n", + "libtiff-4.3.0 | 614 KB | : 100% 1.0/1 [00:00<00:00, 5.97it/s]\n", + "libvpx-1.11.0 | 1.1 MB | : 100% 1.0/1 [00:00<00:00, 5.03it/s]\n", + "filelock-3.4.2 | 12 KB | : 100% 1.0/1 [00:00<00:00, 17.87it/s]\n", + "libunistring-0.9.10 | 1.4 MB | : 100% 1.0/1 [00:00<00:00, 3.76it/s]\n", + "jbig-2.1 | 43 KB | : 100% 1.0/1 [00:00<00:00, 20.01it/s]\n", + "zlib-1.2.11 | 86 KB | : 100% 1.0/1 [00:00<00:00, 19.96it/s]\n", + "x264-1!161.3030 | 2.5 MB | : 100% 1.0/1 [00:00<00:00, 2.35it/s]\n", + "importlib_metadata-4 | 4 KB | : 100% 1.0/1 [00:00<00:00, 19.23it/s]\n", + "scikit-learn-1.0.2 | 7.8 MB | : 100% 1.0/1 [00:01<00:00, 1.61s/it] \n", + "libwebp-1.2.1 | 84 KB | : 100% 1.0/1 [00:00<00:00, 17.17it/s]\n", + "s3transfer-0.5.0 | 55 KB | : 100% 1.0/1 [00:00<00:00, 14.56it/s]\n", + "gnutls-3.6.15 | 1.0 MB | : 100% 1.0/1 [00:00<00:00, 7.94it/s]\n", + "lame-3.100 | 498 KB | : 100% 1.0/1 [00:00<00:00, 8.04it/s]\n", + "libpciaccess-0.16 | 37 KB | : 100% 1.0/1 [00:00<00:00, 23.57it/s]\n", + "_openmp_mutex-4.5 | 5 KB | : 100% 1.0/1 [00:00<00:00, 26.90it/s]\n", + "zstd-1.5.1 | 463 KB | : 100% 1.0/1 [00:00<00:00, 8.12it/s]\n", + "quadprog-0.1.11 | 106 KB | : 100% 1.0/1 [00:00<00:00, 10.03it/s]\n", + "libgcc-ng-11.2.0 | 887 KB | : 100% 1.0/1 [00:00<00:00, 5.27it/s]\n", + "python-3.7.11 | 45.3 MB | : 100% 1.0/1 [00:02<00:00, 2.89s/it]\n", + "transformers-4.10.2 | 1.3 MB | : 100% 1.0/1 [00:00<00:00, 1.52it/s]\n", + "libgomp-11.2.0 | 427 KB | : 100% 1.0/1 [00:00<00:00, 10.06it/s]\n", + "zipp-3.6.0 | 12 KB | : 100% 1.0/1 [00:00<00:00, 23.79it/s]\n", + "blas-1.0 | 6 KB | : 100% 1.0/1 [00:00<00:00, 12.83it/s]\n", + "ninja-1.10.2 | 2.4 MB | : 100% 1.0/1 [00:00<00:00, 2.30it/s]\n", + "mkl-2021.4.0 | 219.1 MB | : 100% 1.0/1 [00:40<00:00, 40.70s/it] \n", + "openssl-1.1.1l | 2.1 MB | : 100% 1.0/1 [00:00<00:00, 2.21it/s]\n", + "lcms2-2.12 | 443 KB | : 100% 1.0/1 [00:00<00:00, 8.03it/s]\n", + "dataclasses-0.8 | 10 KB | : 100% 1.0/1 [00:00<00:00, 16.97it/s]\n", + "typing_extensions-4. | 26 KB | : 100% 1.0/1 [00:00<00:00, 22.57it/s]\n", + "libcblas-3.9.0 | 12 KB | : 100% 1.0/1 [00:00<00:00, 23.54it/s]\n", + "openh264-2.1.1 | 1.5 MB | : 100% 1.0/1 [00:00<00:00, 3.35it/s]\n", + "pytorch-1.7.0 | 661.4 MB | : 100% 1.0/1 [01:41<00:00, 101.98s/it] \n", + "libarchive-3.5.2 | 1.6 MB | : 100% 1.0/1 [00:00<00:00, 1.83it/s]\n", + "pyyaml-6.0 | 187 KB | : 100% 1.0/1 [00:00<00:00, 12.94it/s]\n", + "intel-openmp-2021.4. | 4.2 MB | : 100% 1.0/1 [00:00<00:00, 3.81it/s]\n", + "importlib-metadata-4 | 32 KB | : 100% 1.0/1 [00:00<00:00, 19.21it/s]\n", + "nettle-3.7.3 | 809 KB | : 100% 1.0/1 [00:00<00:00, 8.74it/s]\n", + "libprotobuf-3.19.1 | 2.6 MB | : 100% 1.0/1 [00:00<00:00, 1.78it/s]\n", + "libgfortran4-7.5.0 | 1.3 MB | : 100% 1.0/1 [00:00<00:00, 3.43it/s]\n", + "mkl_random-1.2.2 | 361 KB | : 100% 1.0/1 [00:00<00:00, 10.56it/s]\n", + "joblib-1.1.0 | 210 KB | : 100% 1.0/1 [00:00<00:00, 9.03it/s]\n", + "freetype-2.11.0 | 618 KB | : 100% 1.0/1 [00:00<00:00, 4.87it/s]\n", + "sacremoses-0.0.46 | 466 KB | : 100% 1.0/1 [00:00<00:00, 7.59it/s]\n", + "x265-3.5 | 6.7 MB | : 100% 1.0/1 [00:01<00:00, 1.56s/it]\n", + "typing-extensions-4. | 8 KB | : 100% 1.0/1 [00:00<00:00, 27.50it/s]\n", + "boto3-1.20.26 | 71 KB | : 100% 1.0/1 [00:00<00:00, 14.23it/s]\n", + "pytorch-mutex-1.0 | 3 KB | : 100% 1.0/1 [00:00<00:00, 2.80it/s]\n", + "libdeflate-1.8 | 67 KB | : 100% 1.0/1 [00:00<00:00, 20.23it/s]\n", + "ffmpeg-4.4.1 | 10.0 MB | : 100% 1.0/1 [00:01<00:00, 1.51s/it]\n", + "numpy-1.21.2 | 23 KB | : 100% 1.0/1 [00:00<00:00, 12.12it/s]\n", + "olefile-0.46 | 32 KB | : 100% 1.0/1 [00:00<00:00, 19.75it/s]\n", + "Preparing transaction: - \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\bdone\n", + "Verifying transaction: / \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\bdone\n", + "Executing transaction: \\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ By downloading and using the CUDA Toolkit conda packages, you accept the terms and conditions of the CUDA End User License Agreement (EULA): https://docs.nvidia.com/cuda/eula/index.html\n", + "\n", + "\b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\bdone\n", + "#\n", + "# To activate this environment, use\n", + "#\n", + "# $ conda activate base\n", + "#\n", + "# To deactivate an active environment, use\n", + "#\n", + "# $ conda deactivate\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%shell\n", + "eval \"$(conda shell.bash hook)\" # copy conda command to shell\n", + "conda activate base\n", + "python --version\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QZ8cYxLKkKKp", + "outputId": "d76858a9-3315-43d4-eb19-80a7a3ad6da1" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Python 3.7.11\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "code", + "source": [ + "cp data_prep/asc_random ../" + ], + "metadata": { + "id": "9q4EnaNqrN3W" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!conda install -c pytorch torchvision cudatoolkit=10.1 pytorch" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "g5OM5VemZZrO", + "outputId": "6fa7566f-8b26-4f85-8a35-751712af5414" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting package metadata (current_repodata.json): - \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\bdone\n", + "Solving environment: - \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ WARNING conda.core.solve:_add_specs(611): pinned spec cudatoolkit=11.1 conflicts with explicit specs. Overriding pinned spec.\n", + "\b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\bdone\n", + "\n", + "# All requested packages already installed.\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "" + ], + "metadata": { + "id": "xq5XI1NGZZcA" + } + }, + { + "cell_type": "code", + "source": [ + "!conda install -c huggingface tokenizers=0.10.1 transformers=4.6.1" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cH27SpviddOf", + "outputId": "7f50f0d4-d5e7-4943-f1e5-63a576d2c230" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting package metadata (current_repodata.json): - \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\bdone\n", + "Solving environment: | \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\bdone\n", + "\n", + "## Package Plan ##\n", + "\n", + " environment location: /usr/local\n", + "\n", + " added / updated specs:\n", + " - tokenizers=0.10.1\n", + " - transformers=4.6.1\n", + "\n", + "\n", + "The following packages will be downloaded:\n", + "\n", + " package | build\n", + " ---------------------------|-----------------\n", + " tokenizers-0.10.1 | py37_0 2.8 MB huggingface\n", + " transformers-4.6.1 | py_0 2.1 MB huggingface\n", + " ------------------------------------------------------------\n", + " Total: 4.9 MB\n", + "\n", + "The following packages will be DOWNGRADED:\n", + "\n", + " tokenizers 0.10.3-py37_0 --> 0.10.1-py37_0\n", + " transformers 4.10.2-py_0 --> 4.6.1-py_0\n", + "\n", + "\n", + "\n", + "Downloading and Extracting Packages\n", + "transformers-4.6.1 | 2.1 MB | : 100% 1.0/1 [00:00<00:00, 1.20it/s]\n", + "tokenizers-0.10.1 | 2.8 MB | : 100% 1.0/1 [00:01<00:00, 1.06s/it]\n", + "Preparing transaction: \\ \b\bdone\n", + "Verifying transaction: / \b\bdone\n", + "Executing transaction: \\ \b\bdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!python cuda_python_test.py" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sSbTkL3zci9f", + "outputId": "96e3eb9c-c768-4c87-e193-6da06f1b1794" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1\n", + "Tesla K80\n", + "[CUDA available]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + " !python run.py --bert_model 'bert-base-uncased' --backbone bert_adapter --baseline ctr \t--task asc --eval_batch_size 128 \t--train_batch_size 32 --scenario til_classification --idrandom 0 --use_predefine_args" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6THjeD2XuJwf", + "outputId": "5d8c26e1-073e-4457-e084-cb8b17ed996d" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Load data...\n", + "random_sep: ['XuSemEval14_laptop', 'XuSemEval14_rest', 'Bing9domains_HitachiRouter', 'Bing9domains_CanonS100', 'Bing9domains_ipod', 'Bing5domains_ApexAD2600Progressive', 'Bing9domains_Nokia6600', 'Bing9domains_DiaperChamp', 'Bing9domains_CanonPowerShotSD500', 'Bing5domains_CreativeLabsNomadJukeboxZenXtra40GB', 'Bing9domains_Norton', 'Bing9domains_MicroMP3', 'Bing3domains_Speaker', 'Bing9domains_LinksysRouter', 'Bing5domains_Nokia6610', 'Bing5domains_NikonCoolpix4300', 'Bing5domains_CanonG3', 'Bing3domains_Computer', 'Bing3domains_Router']\n", + "domains: ['XuSemEval14_rest', 'XuSemEval14_laptop', 'Bing3domains_Speaker', 'Bing3domains_Router', 'Bing3domains_Computer', 'Bing5domains_Nokia6610', 'Bing5domains_NikonCoolpix4300', 'Bing5domains_CreativeLabsNomadJukeboxZenXtra40GB', 'Bing5domains_CanonG3', 'Bing5domains_ApexAD2600Progressive', 'Bing9domains_CanonPowerShotSD500', 'Bing9domains_CanonS100', 'Bing9domains_DiaperChamp', 'Bing9domains_HitachiRouter', 'Bing9domains_ipod', 'Bing9domains_LinksysRouter', 'Bing9domains_MicroMP3', 'Bing9domains_Nokia6600', 'Bing9domains_Norton']\n", + "random_sep: 19\n", + "domains: 19\n", + "dataset: ./dat/absa/XuSemEval/asc/14/laptop\n", + "Downloading: 100% 232k/232k [00:00<00:00, 2.01MB/s]\n", + "Downloading: 100% 28.0/28.0 [00:00<00:00, 22.5kB/s]\n", + "Downloading: 100% 466k/466k [00:00<00:00, 3.21MB/s]\n", + "12/28/2021 23:28:19 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:19 - INFO - preparation - Num examples = 2163\n", + "12/28/2021 23:28:19 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:19 - INFO - preparation - Num steps = 680\n", + "12/28/2021 23:28:19 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:19 - INFO - preparation - Num orig examples = 150\n", + "12/28/2021 23:28:19 - INFO - preparation - Num split examples = 150\n", + "12/28/2021 23:28:19 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:21 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:21 - INFO - preparation - Num examples = 638\n", + "12/28/2021 23:28:21 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/XuSemEval/asc/14/rest\n", + "12/28/2021 23:28:24 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:24 - INFO - preparation - Num examples = 3452\n", + "12/28/2021 23:28:24 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:24 - INFO - preparation - Num steps = 1080\n", + "12/28/2021 23:28:24 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:24 - INFO - preparation - Num orig examples = 150\n", + "12/28/2021 23:28:24 - INFO - preparation - Num split examples = 150\n", + "12/28/2021 23:28:24 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:26 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:26 - INFO - preparation - Num examples = 1120\n", + "12/28/2021 23:28:26 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing9Domains/asc/HitachiRouter\n", + "12/28/2021 23:28:27 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:27 - INFO - preparation - Num examples = 212\n", + "12/28/2021 23:28:27 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:27 - INFO - preparation - Num steps = 70\n", + "12/28/2021 23:28:27 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:27 - INFO - preparation - Num orig examples = 26\n", + "12/28/2021 23:28:27 - INFO - preparation - Num split examples = 26\n", + "12/28/2021 23:28:27 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:28 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:28 - INFO - preparation - Num examples = 27\n", + "12/28/2021 23:28:28 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing9Domains/asc/CanonS100\n", + "12/28/2021 23:28:29 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:29 - INFO - preparation - Num examples = 175\n", + "12/28/2021 23:28:29 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:29 - INFO - preparation - Num steps = 60\n", + "12/28/2021 23:28:29 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:29 - INFO - preparation - Num orig examples = 22\n", + "12/28/2021 23:28:29 - INFO - preparation - Num split examples = 22\n", + "12/28/2021 23:28:29 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:30 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:30 - INFO - preparation - Num examples = 22\n", + "12/28/2021 23:28:30 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing9Domains/asc/ipod\n", + "12/28/2021 23:28:31 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:31 - INFO - preparation - Num examples = 153\n", + "12/28/2021 23:28:31 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:31 - INFO - preparation - Num steps = 50\n", + "12/28/2021 23:28:31 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:31 - INFO - preparation - Num orig examples = 19\n", + "12/28/2021 23:28:31 - INFO - preparation - Num split examples = 19\n", + "12/28/2021 23:28:31 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:32 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:32 - INFO - preparation - Num examples = 20\n", + "12/28/2021 23:28:32 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing5Domains/asc/ApexAD2600Progressive\n", + "12/28/2021 23:28:33 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:33 - INFO - preparation - Num examples = 343\n", + "12/28/2021 23:28:33 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:33 - INFO - preparation - Num steps = 110\n", + "12/28/2021 23:28:33 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:33 - INFO - preparation - Num orig examples = 43\n", + "12/28/2021 23:28:33 - INFO - preparation - Num split examples = 43\n", + "12/28/2021 23:28:33 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:34 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:34 - INFO - preparation - Num examples = 43\n", + "12/28/2021 23:28:34 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing9Domains/asc/Nokia6600\n", + "12/28/2021 23:28:36 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:36 - INFO - preparation - Num examples = 362\n", + "12/28/2021 23:28:36 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:36 - INFO - preparation - Num steps = 120\n", + "12/28/2021 23:28:36 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:36 - INFO - preparation - Num orig examples = 45\n", + "12/28/2021 23:28:36 - INFO - preparation - Num split examples = 45\n", + "12/28/2021 23:28:36 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:37 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:37 - INFO - preparation - Num examples = 46\n", + "12/28/2021 23:28:37 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing9Domains/asc/DiaperChamp\n", + "12/28/2021 23:28:38 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:38 - INFO - preparation - Num examples = 191\n", + "12/28/2021 23:28:38 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:38 - INFO - preparation - Num steps = 60\n", + "12/28/2021 23:28:38 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:38 - INFO - preparation - Num orig examples = 24\n", + "12/28/2021 23:28:38 - INFO - preparation - Num split examples = 24\n", + "12/28/2021 23:28:38 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:39 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:39 - INFO - preparation - Num examples = 24\n", + "12/28/2021 23:28:39 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing9Domains/asc/CanonPowerShotSD500\n", + "12/28/2021 23:28:40 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:40 - INFO - preparation - Num examples = 118\n", + "12/28/2021 23:28:40 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:40 - INFO - preparation - Num steps = 40\n", + "12/28/2021 23:28:40 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:40 - INFO - preparation - Num orig examples = 15\n", + "12/28/2021 23:28:40 - INFO - preparation - Num split examples = 15\n", + "12/28/2021 23:28:40 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:41 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:41 - INFO - preparation - Num examples = 15\n", + "12/28/2021 23:28:41 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing5Domains/asc/CreativeLabsNomadJukeboxZenXtra40GB\n", + "12/28/2021 23:28:42 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:42 - INFO - preparation - Num examples = 677\n", + "12/28/2021 23:28:42 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:42 - INFO - preparation - Num steps = 220\n", + "12/28/2021 23:28:43 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:43 - INFO - preparation - Num orig examples = 85\n", + "12/28/2021 23:28:43 - INFO - preparation - Num split examples = 85\n", + "12/28/2021 23:28:43 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:43 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:43 - INFO - preparation - Num examples = 85\n", + "12/28/2021 23:28:43 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing9Domains/asc/Norton\n", + "12/28/2021 23:28:44 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:44 - INFO - preparation - Num examples = 194\n", + "12/28/2021 23:28:44 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:44 - INFO - preparation - Num steps = 70\n", + "12/28/2021 23:28:45 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:45 - INFO - preparation - Num orig examples = 24\n", + "12/28/2021 23:28:45 - INFO - preparation - Num split examples = 24\n", + "12/28/2021 23:28:45 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:46 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:46 - INFO - preparation - Num examples = 25\n", + "12/28/2021 23:28:46 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing9Domains/asc/MicroMP3\n", + "12/28/2021 23:28:47 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:47 - INFO - preparation - Num examples = 484\n", + "12/28/2021 23:28:47 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:47 - INFO - preparation - Num steps = 160\n", + "12/28/2021 23:28:47 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:47 - INFO - preparation - Num orig examples = 61\n", + "12/28/2021 23:28:47 - INFO - preparation - Num split examples = 61\n", + "12/28/2021 23:28:47 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:48 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:48 - INFO - preparation - Num examples = 61\n", + "12/28/2021 23:28:48 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing3Domains/asc/Speaker\n", + "12/28/2021 23:28:49 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:49 - INFO - preparation - Num examples = 352\n", + "12/28/2021 23:28:49 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:49 - INFO - preparation - Num steps = 110\n", + "12/28/2021 23:28:49 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:49 - INFO - preparation - Num orig examples = 44\n", + "12/28/2021 23:28:49 - INFO - preparation - Num split examples = 44\n", + "12/28/2021 23:28:49 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:50 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:50 - INFO - preparation - Num examples = 44\n", + "12/28/2021 23:28:50 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing9Domains/asc/LinksysRouter\n", + "12/28/2021 23:28:51 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:51 - INFO - preparation - Num examples = 176\n", + "12/28/2021 23:28:51 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:51 - INFO - preparation - Num steps = 60\n", + "12/28/2021 23:28:51 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:51 - INFO - preparation - Num orig examples = 22\n", + "12/28/2021 23:28:51 - INFO - preparation - Num split examples = 22\n", + "12/28/2021 23:28:51 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:52 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:52 - INFO - preparation - Num examples = 23\n", + "12/28/2021 23:28:52 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing5Domains/asc/Nokia6610\n", + "12/28/2021 23:28:53 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:53 - INFO - preparation - Num examples = 271\n", + "12/28/2021 23:28:53 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:53 - INFO - preparation - Num steps = 90\n", + "12/28/2021 23:28:54 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:54 - INFO - preparation - Num orig examples = 34\n", + "12/28/2021 23:28:54 - INFO - preparation - Num split examples = 34\n", + "12/28/2021 23:28:54 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:55 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:55 - INFO - preparation - Num examples = 34\n", + "12/28/2021 23:28:55 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing5Domains/asc/NikonCoolpix4300\n", + "12/28/2021 23:28:56 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:56 - INFO - preparation - Num examples = 162\n", + "12/28/2021 23:28:56 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:56 - INFO - preparation - Num steps = 60\n", + "12/28/2021 23:28:56 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:56 - INFO - preparation - Num orig examples = 20\n", + "12/28/2021 23:28:56 - INFO - preparation - Num split examples = 20\n", + "12/28/2021 23:28:56 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:57 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:57 - INFO - preparation - Num examples = 21\n", + "12/28/2021 23:28:57 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing5Domains/asc/CanonG3\n", + "12/28/2021 23:28:58 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:28:58 - INFO - preparation - Num examples = 228\n", + "12/28/2021 23:28:58 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:58 - INFO - preparation - Num steps = 80\n", + "12/28/2021 23:28:58 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:28:58 - INFO - preparation - Num orig examples = 29\n", + "12/28/2021 23:28:58 - INFO - preparation - Num split examples = 29\n", + "12/28/2021 23:28:58 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:28:59 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:28:59 - INFO - preparation - Num examples = 29\n", + "12/28/2021 23:28:59 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing3Domains/asc/Computer\n", + "12/28/2021 23:29:00 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:29:00 - INFO - preparation - Num examples = 283\n", + "12/28/2021 23:29:00 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:29:00 - INFO - preparation - Num steps = 90\n", + "12/28/2021 23:29:00 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:29:00 - INFO - preparation - Num orig examples = 35\n", + "12/28/2021 23:29:00 - INFO - preparation - Num split examples = 35\n", + "12/28/2021 23:29:00 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:29:01 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:29:01 - INFO - preparation - Num examples = 36\n", + "12/28/2021 23:29:01 - INFO - preparation - Batch size = 128\n", + "dataset: ./dat/absa/Bing3Domains/asc/Router\n", + "12/28/2021 23:29:02 - INFO - preparation - ***** Running training *****\n", + "12/28/2021 23:29:02 - INFO - preparation - Num examples = 245\n", + "12/28/2021 23:29:02 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:29:02 - INFO - preparation - Num steps = 80\n", + "12/28/2021 23:29:02 - INFO - preparation - ***** Running validations *****\n", + "12/28/2021 23:29:02 - INFO - preparation - Num orig examples = 31\n", + "12/28/2021 23:29:02 - INFO - preparation - Num split examples = 31\n", + "12/28/2021 23:29:02 - INFO - preparation - Batch size = 32\n", + "12/28/2021 23:29:03 - INFO - preparation - ***** Running evaluation *****\n", + "12/28/2021 23:29:03 - INFO - preparation - Num examples = 31\n", + "12/28/2021 23:29:03 - INFO - preparation - Batch size = 128\n", + "\n", + "Task info = [(0, 3), (1, 3), (2, 2), (3, 2), (4, 2), (5, 2), (6, 2), (7, 2), (8, 2), (9, 2), (10, 2), (11, 2), (12, 2), (13, 2), (14, 2), (15, 2), (16, 2), (17, 2), (18, 2)]\n", + "Inits...\n", + "Downloading: 100% 570/570 [00:00<00:00, 525kB/s]\n", + "Downloading: 100% 440M/440M [00:29<00:00, 14.9MB/s]\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to attention\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "apply to output\n", + "BertAdapter\n", + "BertAdapterMask\n", + "apply_one_layer_shared \n", + "CapsuleLayer\n", + "CapsNet\n", + "BertAdapterCapsuleMaskImp\n", + "Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']\n", + "- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.11.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.1.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.9.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.5.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.4.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.6.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.4.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.7.output.adapter_capsule_mask.efc2.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "BERT ADAPTER CAPSULE MASK\n", + "12/28/2021 23:29:43 - INFO - preparation - device: cuda n_gpu: 1\n", + "DIL BERT ADAPTER MASK BASE\n", + "BERT ADAPTER CAPSULE MASK NCL\n", + "12/28/2021 23:29:43 - INFO - preparation - ****************************************************************************************************\n", + "12/28/2021 23:29:43 - INFO - preparation - Task 0 (./dat/absa/XuSemEval/asc/14/laptop)\n", + "12/28/2021 23:29:43 - INFO - preparation - ****************************************************************************************************\n", + "12/28/2021 23:29:43 - INFO - preparation - Start Training and Set the clock\n", + "train\n", + "Train Iter (loss=X.XXX): 0% 0/68 [00:57\n", + " appr.train(task,train_dataloader,valid_dataloader,num_train_steps,train,valid)\n", + " File \"/content/gdrive/My Drive/PyContinual-main/PyContinual-main/src/approaches/classification/bert_adapter_capsule_mask.py\", line 60, in train\n", + " global_step=self.train_epoch(t,train,iter_bar, optimizer,t_total,global_step)\n", + " File \"/content/gdrive/My Drive/PyContinual-main/PyContinual-main/src/approaches/classification/bert_adapter_capsule_mask.py\", line 118, in train_epoch\n", + " output_dict = self.model.forward(t,input_ids, segment_ids, input_mask,targets,s=s)\n", + " File \"/content/gdrive/My Drive/PyContinual-main/PyContinual-main/src/networks/classification/bert_adapter_capsule_mask.py\", line 68, in forward\n", + " targets=None,t=t,s=s)\n", + " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n", + " result = self.forward(*input, **kwargs)\n", + " File \"./networks/base/my_transformers.py\", line 796, in forward\n", + " output_hidden_states,return_dict,t=t,s=s,x_list=x_list,h_list=h_list)\n", + " File \"./networks/base/my_transformers.py\", line 853, in compute_encoder_outputs\n", + " return_dict=return_dict, t=t,s=s\n", + " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n", + " result = self.forward(*input, **kwargs)\n", + " File \"./networks/base/my_transformers.py\", line 664, in forward\n", + " t=t,s=s,x_list=x_list,h_list=h_list\n", + " File \"./networks/base/my_transformers.py\", line 601, in compute_layer_outputs\n", + " t=t,s=s\n", + " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n", + " result = self.forward(*input, **kwargs)\n", + " File \"./networks/base/my_transformers.py\", line 474, in forward\n", + " t=t,s=s,\n", + " File \"./networks/base/my_transformers.py\", line 963, in apply_chunking_to_forward\n", + " t=t,s=s)\n", + " File \"./networks/base/my_transformers.py\", line 521, in feed_forward_chunk\n", + " t=t,s=s,)\n", + " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n", + " result = self.forward(*input, **kwargs)\n", + " File \"./networks/base/my_transformers.py\", line 361, in forward\n", + " output_dict = self.adapter_capsule_mask(hidden_states,t,s)\n", + " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n", + " result = self.forward(*input, **kwargs)\n", + " File \"/content/gdrive/My Drive/PyContinual-main/PyContinual-main/src/networks/base/adapters.py\", line 118, in forward\n", + " h=self.activation(self.fc1(h))\n", + " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n", + " result = self.forward(*input, **kwargs)\n", + " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/linear.py\", line 93, in forward\n", + " return F.linear(input, self.weight, self.bias)\n", + " File \"/usr/local/lib/python3.7/site-packages/torch/nn/functional.py\", line 1692, in linear\n", + " output = input.matmul(weight.t())\n", + "RuntimeError: CUDA out of memory. Tried to allocate 32.00 MiB (GPU 0; 11.17 GiB total capacity; 10.45 GiB already allocated; 26.81 MiB free; 10.82 GiB reserved in total by PyTorch)\n" + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..c02a1af8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +et-xmlfile==1.1.0 +numpy @ file:///D:/Trabajo/rest-mex_2022_sentiment_data_training/numpy-1.24.2-cp311-cp311-win_amd64.whl +openpyxl==3.1.2 +pandas @ file:///D:/Trabajo/rest-mex_2022_sentiment_data_training/pandas-1.5.3-cp311-cp311-win_amd64.whl +python-dateutil==2.8.2 +pytz==2022.7.1 +six==1.16.0 diff --git a/src/_onlyrequirements.txt b/src/_onlyrequirements.txt new file mode 100644 index 00000000..63108059 --- /dev/null +++ b/src/_onlyrequirements.txt @@ -0,0 +1,107 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +#_libgcc_mutex=0.1=main +#_openmp_mutex=4.5=1_gnu +blas +boto3 +botocore +brotlipy +bzip2 +ca-certificates +certifi +cffi +charset-normalizer +click +cryptography +cudatoolkit +ffmpeg +filelock +freetype +giflib +gmp +gnutls +idna +importlib-metadata +intel-openmp +jmespath +joblib +jpeg +lame +lcms2 +ld_impl_linux-64 +libffi +libgcc-ng +libgfortran-ng +libgfortran4 +libgomp +libiconv +libidn2 +libpng +libstdcxx-ng +libtasn1 +libtiff +libunistring +libuv +libwebp +libwebp-base +lz4-c +mkl +mkl-service +mkl_fft +mkl_random +ncurses +nettle +ninja +numpy +numpy-base +olefile +openh264 +openssl +packaging +pillow +pip +pycparser +pyopenssl +pyparsing +pysocks +python==3.7.11 +python-dateutil==2.8.2 +pytorch==1.7.0 +pytorch-mutex==1.0 +pyyaml +quadprog +readline +regex +requests +s3transfer +sacremoses +scikit-learn +scipy +setuptools +six +sqlite +threadpoolctl +tk +tokenizers +torchaudio +torchvision +tqdm +transformers +typing_extensions +urllib3 +wheel +xz +zipp +zlib +zstd +transformers==4.10.2 +zipp==3.6.0 +tokenizers==0.10.3 +pyparsing==2.4.7 +packaging==21.2 +quadprog==0.1.10 +pyyaml==6.0 +filelock==3.3.2 +importlib-metadata==4.8.1 +huggingface-hub==0.1.1 \ No newline at end of file diff --git a/src/_requirements.txt b/src/_requirements.txt new file mode 100644 index 00000000..d90f4e52 --- /dev/null +++ b/src/_requirements.txt @@ -0,0 +1,99 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +#_libgcc_mutex=0.1=main +#_openmp_mutex=4.5=1_gnu +blas=1.0=mkl +boto3=1.18.21=pyhd3eb1b0_0 + +botocore=1.21.41=pyhd3eb1b0_1 +brotlipy=0.7.0=py37h27cfd23_1003 +bzip2=1.0.8=h7b6447c_0 +ca-certificates=2021.10.26=h06a4308_2 +certifi=2021.10.8=py37h06a4308_0 +cffi=1.14.6=py37h400218f_0 +charset-normalizer=2.0.4=pyhd3eb1b0_0 +click=8.0.3=pyhd3eb1b0_0 +cryptography=35.0.0=py37hd23ed53_0 +cudatoolkit=11.0.221=h6bb024c_0 +ffmpeg=4.3=hf484d3e_0 +filelock=3.3.2=pypi_0 +freetype=2.11.0=h70c0345_0 +giflib=5.2.1=h7b6447c_0 +gmp=6.2.1=h2531618_2 +gnutls=3.6.15=he1e5248_0 +huggingface-hub=0.1.1=pypi_0 +idna=3.2=pyhd3eb1b0_0 +importlib-metadata=4.8.1=pypi_0 +intel-openmp=2021.4.0=h06a4308_3561 +jmespath=0.10.0=pyhd3eb1b0_0 +joblib=1.1.0=pyhd3eb1b0_0 +jpeg=9d=h7f8727e_0 +lame=3.100=h7b6447c_0 +lcms2=2.12=h3be6417_0 +ld_impl_linux-64=2.35.1=h7274673_9 +libffi=3.3=he6710b0_2 +libgcc-ng=9.3.0=h5101ec6_17 +libgfortran-ng=7.5.0=ha8ba4b0_17 +libgfortran4=7.5.0=ha8ba4b0_17 +libgomp=9.3.0=h5101ec6_17 +libiconv=1.15=h63c8f33_5 +libidn2=2.3.2=h7f8727e_0 +libpng=1.6.37=hbc83047_0 +libstdcxx-ng=9.3.0=hd4cf53a_17 +libtasn1=4.16.0=h27cfd23_0 +libtiff=4.2.0=h85742a9_0 +libunistring=0.9.10=h27cfd23_0 +libuv=1.40.0=h7b6447c_0 +libwebp=1.2.0=h89dd481_0 +libwebp-base=1.2.0=h27cfd23_0 +lz4-c=1.9.3=h295c915_1 +mkl=2021.4.0=h06a4308_640 +mkl-service=2.4.0=py37h7f8727e_0 +mkl_fft=1.3.1=py37hd3c417c_0 +mkl_random=1.2.2=py37h51133e4_0 +ncurses=6.3=heee7806_1 +nettle=3.7.3=hbbd107a_1 +ninja=1.10.2=hff7bd54_1 +numpy=1.21.2=py37h20f2e39_0 +numpy-base=1.21.2=py37h79a1101_0 +olefile=0.46=py37_0 +openh264=2.1.0=hd408876_0 +openssl=1.1.1l=h7f8727e_0 +packaging=21.2=pypi_0 +pillow=8.4.0=py37h5aabda8_0 +pip=21.0.1=py37h06a4308_0 +pycparser=2.20=py_2 +pyopenssl=21.0.0=pyhd3eb1b0_1 +pyparsing=2.4.7=pypi_0 +pysocks=1.7.1=py37_1 +python=3.7.11=h12debd9_0 +python-dateutil=2.8.2=pyhd3eb1b0_0 +pytorch=1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0 +pytorch-mutex=1.0=cuda +pyyaml=6.0=pypi_0 +quadprog=0.1.10=pypi_0 +readline=8.1=h27cfd23_0 +regex=2021.8.3=py37h7f8727e_0 +requests=2.26.0=pyhd3eb1b0_0 +s3transfer=0.5.0=pyhd3eb1b0_0 +sacremoses=0.0.43=pyhd3eb1b0_0 +scikit-learn=1.0.1=py37h51133e4_0 +scipy=1.7.1=py37h292c36d_2 +setuptools=58.0.4=py37h06a4308_0 +six=1.16.0=pyhd3eb1b0_0 +sqlite=3.36.0=hc218d9a_0 +threadpoolctl=2.2.0=pyh0d69192_0 +tk=8.6.11=h1ccaba5_0 +tokenizers=0.10.3=pypi_0 +torchaudio=0.7.0=py37 +torchvision=0.8.1=py37_cu110 +tqdm=4.62.3=pyhd3eb1b0_1 +transformers=4.10.2=pypi_0 +typing_extensions=3.10.0.2=pyh06a4308_0 +urllib3=1.26.7=pyhd3eb1b0_0 +wheel=0.37.0=pyhd3eb1b0_1 +xz=5.2.5=h7b6447c_0 +zipp=3.6.0=pypi_0 +zlib=1.2.11=h7b6447c_3 +zstd=1.4.9=haebb681_0 diff --git a/src/_testrequirements.txt b/src/_testrequirements.txt new file mode 100644 index 00000000..7757e80d --- /dev/null +++ b/src/_testrequirements.txt @@ -0,0 +1,97 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +#_libgcc_mutex==0.1 +#_openmp_mutex==4.5 +blas +boto3 +botocore +brotlipy +bzip2 +ca-certificates +certifi +cffi +charset-normalizer +click +cryptography +cudatoolkit +ffmpeg +filelock +freetype +giflib +gmp +gnutls +idna +importlib-metadata +intel-openmp +jmespath +joblib +jpeg +lame +lcms2 +ld_impl_linux-64 +libffi +libgcc-ng +libgfortran-ng +libgfortran4 +libgomp +libiconv +libidn2 +libpng +libstdcxx-ng +libtasn1 +libtiff +libunistring +libuv +libwebp +libwebp-base +lz4-c +mkl +mkl-service +mkl_fft +mkl_random +ncurses +nettle +ninja +numpy +numpy-base +olefile +openh264 +openssl +packaging +pillow +pip +pycparser +pyopenssl +pyparsing +pysocks +python==3.7.11 +python-dateutil==2.8.2 +pytorch=1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0 +pytorch-mutex=1.0=cuda +pyyaml +quadprog +readline +regex +requests +s3transfer +sacremoses +scikit-learn +scipy +setuptools +six +sqlite +threadpoolctl +tk +tokenizers +torchaudio +torchvision +tqdm +transformers=4.10.2 +typing_extensions +urllib3 +wheel +xz +zipp +zlib +zstd \ No newline at end of file diff --git a/src/piprequirements.txt b/src/piprequirements.txt new file mode 100644 index 00000000..e8897635 --- /dev/null +++ b/src/piprequirements.txt @@ -0,0 +1,13 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +transformers==4.10.2 +zipp==3.6.0 +tokenizers==0.10.3 +pyparsing==2.4.7 +packaging==21.2 +quadprog==0.1.10 +pyyaml==6.0 +filelock==3.3.2 +importlib-metadata==4.8.1 +huggingface-hub==0.1.1 \ No newline at end of file diff --git a/src/serialization/README.MD b/src/serialization/README.MD new file mode 100644 index 00000000..fe280bf7 --- /dev/null +++ b/src/serialization/README.MD @@ -0,0 +1,5 @@ +# INSTRUCCIONES +1. Copiar los 2 archivos excel en esta carpeta +2. Tener instalado Python 3.11 +3. Instalar las dependencias necesarias que estan al inicio del repositorio en el archivo requirements.txt +4. Ejecutar los scripts diff --git a/src/serialization/classify_to_json.py b/src/serialization/classify_to_json.py new file mode 100644 index 00000000..fe0c702b --- /dev/null +++ b/src/serialization/classify_to_json.py @@ -0,0 +1,15 @@ +import pandas as pd + +""" +In this algorithm iΒ΄m using python 3.11 cause itΒ΄s 80% faster than previous versions +""" +# Reading dataset +df = pd.read_excel("Rest_Mex_Sentiment_Analysis_2023_Train.xlsx") + +# Filter +with open("classified\\hotel.json", "w", encoding='utf-8') as file: + file.write(df[df["Type"] == "Hotel"].to_json(force_ascii=False, orient='index')) +with open("classified\\restaurant.json", "w", encoding='utf-8') as file: + file.write(df[df["Type"] == "Restaurant"].to_json(force_ascii=False, orient='index')) +with open("classified\\attractive.json", "w", encoding='utf-8') as file: + file.write(df[df["Type"] == "Attractive"].to_json(force_ascii=False, orient='index')) diff --git a/src/serialization/find_diferents.py b/src/serialization/find_diferents.py new file mode 100644 index 00000000..b8c998a2 --- /dev/null +++ b/src/serialization/find_diferents.py @@ -0,0 +1,10 @@ +import pandas as pd + +df2022 = pd.read_excel('Track_Train.xlsx') +df2023 = pd.read_excel('Rest_Mex_Sentiment_Analysis_2023_Train.xlsx') + +df = pd.concat([df2023, df2022]).drop_duplicates(keep=False) + +df = df[~df.astype(str).apply(lambda x: x.str.contains('Attractive')).any(axis=1)] + +df.to_excel('dataset_filtrado.xlsx', index=False) \ No newline at end of file diff --git a/src/serialization/find_equals.py b/src/serialization/find_equals.py new file mode 100644 index 00000000..7964fe8a --- /dev/null +++ b/src/serialization/find_equals.py @@ -0,0 +1,18 @@ +import pandas as pd + +# Cargando los datasets +df2022 = pd.read_excel('Track_Train.xlsx') +df2023 = pd.read_excel('Rest_Mex_Sentiment_Analysis_2023_Train.xlsx') + +# Filtrando hasta quedarse con las filas unicas +reviews_ds22 = df2022['Review'].unique().tolist() +reviews_ds23 = df2023['Review'].unique().tolist() + +# Esta query permite saber si el contenido de la columna review esta en el dataset +# con el q se le esta comparando +rows_only_in_ds22_df = df2023[~df2023['Review'].isin(reviews_ds22)] +rows_only_in_ds23_df = df2022[~df2022['Review'].isin(reviews_ds23)] + +# Exportando los datasets resultantes +rows_only_in_ds22_df.to_excel('only_2022.xlsx', index=False) +rows_only_in_ds23_df.to_excel('only_2023.xlsx', index=False) \ No newline at end of file diff --git a/src/serialization/find_intersection.py b/src/serialization/find_intersection.py new file mode 100644 index 00000000..625328c0 --- /dev/null +++ b/src/serialization/find_intersection.py @@ -0,0 +1,25 @@ +import pandas as pd +import json + +df2022 = pd.read_excel('Track_Train.xlsx') +df2023 = pd.read_excel('Rest_Mex_Sentiment_Analysis_2023_Train.xlsx') + +# filas2022, columnas2022 = df2022.shape +# filas2023, columnas2023 = df2023.shape + +# print('Dataset de 2022', filas2022, 'filas.') +# print('Dataset de 2023', filas2023, 'filas.') + + +# Encontrar la intersecciΓ³n +interseccion1 = pd.merge( df2023,df2022, on='Review') +interseccion2 = pd.merge( df2022,df2023, on='Review') + +# Saving... +interseccion1.to_excel('interseccion2023-2022.xlsx', index=False) +interseccion2.to_excel('interseccion2022-2023.xlsx', index=False) + + + + + diff --git a/src/serialization/model_export_json.py b/src/serialization/model_export_json.py new file mode 100644 index 00000000..fc87fca5 --- /dev/null +++ b/src/serialization/model_export_json.py @@ -0,0 +1,15 @@ +import pandas as pd + +""" +In this algorithm iΒ΄m using python 3.11 cause itΒ΄s 80% faster than previous versions +""" +# Reading dataset +df = pd.read_excel("Rest_Mex_Sentiment_Analysis_2023_Train.xlsx") + +# parsing to json the df as index form +payload = df.to_json(force_ascii=False, orient='index') + +# Exporting the file as .json +with open("Train.json", "w", encoding='utf-8') as file: + file.write(payload) +