diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 00000000..26d33521
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/SUCESSFUL_LL_APPROACH.ipynb b/SUCESSFUL_LL_APPROACH.ipynb
new file mode 100644
index 00000000..9e758436
--- /dev/null
+++ b/SUCESSFUL_LL_APPROACH.ipynb
@@ -0,0 +1,1641 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "SUCESSFUL_LL_APPROACH.ipynb",
+ "provenance": [],
+ "collapsed_sections": [],
+ "authorship_tag": "ABX9TyNXvNjLnuleHrTWI+/Wy02y",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "accelerator": "GPU"
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "qEu-Kgavdp35",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "8a4ceca7-d87c-4661-8b91-a446df3224ba"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Mounted at /content/gdrive\n"
+ ]
+ }
+ ],
+ "source": [
+ "from google.colab import drive\n",
+ "drive.mount('/content/gdrive')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import torch\n",
+ "import numpy\n",
+ "print(torch.cuda.device_count())\n",
+ "print(torch.cuda.get_device_name(0))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "pHjt0iX5bsRk",
+ "outputId": "37d37cc7-3cf0-44e3-85ed-93f5b93af360"
+ },
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "1\n",
+ "Tesla K80\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install -q condacolab\n",
+ "import condacolab\n",
+ "condacolab.install()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "NmrKqS-jE7Pa",
+ "outputId": "1bd9c028-f32a-4211-e02f-4842b6739932"
+ },
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "β¬ Downloading https://github.com/jaimergp/miniforge/releases/latest/download/Mambaforge-colab-Linux-x86_64.sh...\n",
+ "π¦ Installing...\n",
+ "π Adjusting configuration...\n",
+ "π©Ή Patching environment...\n",
+ "β² Done in 0:00:36\n",
+ "π Restarting kernel...\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import condacolab\n",
+ "condacolab.check()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "QPO8EymEFHgS",
+ "outputId": "32424e44-e22f-4e5c-ef34-c641ae4c414d"
+ },
+ "execution_count": 1,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "β¨π°β¨ Everything looks OK!\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "cd gdrive/MyDrive/PyContinual-main/PyContinual-main/src"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "OiAq5U_zhrA0",
+ "outputId": "e250ad01-5e22-4560-fa5e-ff893cee7870"
+ },
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "/content/gdrive/MyDrive/PyContinual-main/PyContinual-main/src\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Using conda in Colab https://inside-machinelearning.com/en/how-to-install-use-conda-on-google-colab/"
+ ],
+ "metadata": {
+ "id": "BIToLrFspncE"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Using other https://pypi.org/project/condacolab/"
+ ],
+ "metadata": {
+ "id": "KqSnDJShZ4HN"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!python --version # now returns Python 3.6.5 :: Anaconda, Inc."
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "QzIBvcF9K9eX",
+ "outputId": "101bac79-d7cc-4205-ce85-d339f66dcb99"
+ },
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Python 3.7.10\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!conda config --add channels conda-forge\n",
+ "!conda config --add channels huggingface \n",
+ "!conda config --add channels pytorch \n"
+ ],
+ "metadata": {
+ "id": "7qFVnqZjHW6v"
+ },
+ "execution_count": 3,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ ""
+ ],
+ "metadata": {
+ "id": "stegCcCiRtY0"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "https://colab.research.google.com/drive/1c_RGCgQeLHVXlF44LyOFjfUW32CmG6BP"
+ ],
+ "metadata": {
+ "id": "B8Wdtnk3mv-W"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ ""
+ ],
+ "metadata": {
+ "id": "vKlz6GkHRuYL"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install -r piprequirements.txt"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "wO_yjgAeTl6J",
+ "outputId": "99a66ded-84ca-453c-f10d-1b97a251025a"
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting transformers==4.10.2\n",
+ " Downloading transformers-4.10.2-py3-none-any.whl (2.8 MB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 2.8 MB 5.4 MB/s \n",
+ "\u001b[?25hCollecting zipp==3.6.0\n",
+ " Downloading zipp-3.6.0-py3-none-any.whl (5.3 kB)\n",
+ "Collecting tokenizers==0.10.3\n",
+ " Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 3.3 MB 26.7 MB/s \n",
+ "\u001b[?25hCollecting pyparsing==2.4.7\n",
+ " Downloading pyparsing-2.4.7-py2.py3-none-any.whl (67 kB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 67 kB 5.5 MB/s \n",
+ "\u001b[?25hCollecting packaging==21.2\n",
+ " Downloading packaging-21.2-py3-none-any.whl (40 kB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 40 kB 5.4 MB/s \n",
+ "\u001b[?25hCollecting quadprog==0.1.10\n",
+ " Downloading quadprog-0.1.10.tar.gz (121 kB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 121 kB 47.2 MB/s \n",
+ "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+ " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+ " Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting pyyaml==6.0\n",
+ " Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 596 kB 44.2 MB/s \n",
+ "\u001b[?25hCollecting filelock==3.3.2\n",
+ " Downloading filelock-3.3.2-py3-none-any.whl (9.7 kB)\n",
+ "Collecting importlib-metadata==4.8.1\n",
+ " Downloading importlib_metadata-4.8.1-py3-none-any.whl (17 kB)\n",
+ "Collecting huggingface-hub==0.1.1\n",
+ " Downloading huggingface_hub-0.1.1-py3-none-any.whl (59 kB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 59 kB 6.8 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.7/site-packages (from huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (4.59.0)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.7/site-packages (from huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (2.25.1)\n",
+ "Collecting typing-extensions>=3.7.4.3\n",
+ " Downloading typing_extensions-4.0.1-py3-none-any.whl (22 kB)\n",
+ "Collecting numpy\n",
+ " Downloading numpy-1.21.5-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 15.7 MB 36.2 MB/s \n",
+ "\u001b[?25hCollecting sacremoses\n",
+ " Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 895 kB 40.2 MB/s \n",
+ "\u001b[?25hCollecting regex!=2019.12.17\n",
+ " Downloading regex-2021.11.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (749 kB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 749 kB 45.8 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/site-packages (from requests->huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (2020.12.5)\n",
+ "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.7/site-packages (from requests->huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (4.0.0)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/site-packages (from requests->huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (2.10)\n",
+ "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/site-packages (from requests->huggingface-hub==0.1.1->-r piprequirements.txt (line 13)) (1.26.3)\n",
+ "Collecting joblib\n",
+ " Downloading joblib-1.1.0-py2.py3-none-any.whl (306 kB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 306 kB 48.7 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: six in /usr/local/lib/python3.7/site-packages (from sacremoses->transformers==4.10.2->-r piprequirements.txt (line 4)) (1.15.0)\n",
+ "Collecting click\n",
+ " Downloading click-8.0.3-py3-none-any.whl (97 kB)\n",
+ "\u001b[K |ββββββββββββββββββββββββββββββββ| 97 kB 5.6 MB/s \n",
+ "\u001b[33mWARNING: The candidate selected for download or install is a yanked version: 'quadprog' candidate (version 0.1.10 at https://files.pythonhosted.org/packages/78/7a/f0355bad3cf730747cd8971dd8b3b826a802a4f8b11648748af964b18d71/quadprog-0.1.10.tar.gz#sha256=f13bf9609593781a5686ccfd8b38188542dc3c6e00129574817d049fb19ce966 (from https://pypi.org/simple/quadprog/))\n",
+ "Reason for being yanked: Suffers from https://github.com/quadprog/quadprog/issues/32\u001b[0m\n",
+ "\u001b[?25hBuilding wheels for collected packages: quadprog\n",
+ " Building wheel for quadprog (PEP 517) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for quadprog: filename=quadprog-0.1.10-cp37-cp37m-linux_x86_64.whl size=313103 sha256=669d3fefeb1d547afff57157cf676c91ffa82ba8cbb2d69aa939b8b896c2733c\n",
+ " Stored in directory: /root/.cache/pip/wheels/e9/af/76/c5335ed32afc1284e6100b86588d1f75f5c4906fa26df6efda\n",
+ "Successfully built quadprog\n",
+ "Installing collected packages: zipp, typing-extensions, pyparsing, importlib-metadata, regex, pyyaml, packaging, joblib, filelock, click, tokenizers, sacremoses, numpy, huggingface-hub, transformers, quadprog\n",
+ "Successfully installed click-8.0.3 filelock-3.3.2 huggingface-hub-0.1.1 importlib-metadata-4.8.1 joblib-1.1.0 numpy-1.21.5 packaging-21.2 pyparsing-2.4.7 pyyaml-6.0 quadprog-0.1.10 regex-2021.11.10 sacremoses-0.0.46 tokenizers-0.10.3 transformers-4.10.2 typing-extensions-4.0.1 zipp-3.6.0\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!mamba env update -n base -f _testrequirements.txt"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "xfeHZQbUqhWo",
+ "outputId": "eff92688-e032-4d0b-c994-10cb8a596981"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[2K\rconda-forge/linux-64 [] (00m:00s) \n",
+ "\u001b[1A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[1A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[2K\rhuggingface/linux-64 [] (--:--) Finalizing...\n",
+ "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[2K\rhuggingface/linux-64 [] (--:--) Done\n",
+ "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[2K\rhuggingface/linux-64 [] (00m:00s) Done\n",
+ "\u001b[2A\u001b[0K\u001b[2K\rhuggingface/linux-64 [] (00m:00s) Done\n",
+ "\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[1A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[2K\rhuggingface/noarch [] (--:--) Finalizing...\n",
+ "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[2K\rhuggingface/noarch [] (--:--) Done\n",
+ "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[2K\rhuggingface/noarch [] (00m:00s) Done\n",
+ "\u001b[2A\u001b[0K\u001b[2K\rhuggingface/noarch [] (00m:00s) Done\n",
+ "\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[1A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[2K\rpytorch/noarch [] (--:--) Finalizing...\n",
+ "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[2K\rpytorch/noarch [] (--:--) Done\n",
+ "\u001b[2A\u001b[2K\rconda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "\u001b[2K\rpytorch/noarch [] (00m:00s) Done\n",
+ "\u001b[2A\u001b[0K\u001b[2K\rpytorch/noarch [] (00m:00s) Done\n",
+ "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "pytorch/linux-64 [] (--:--) Finalizing...\n",
+ "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "pytorch/linux-64 [] (--:--) Done\n",
+ "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "pytorch/linux-64 [] (00m:00s) Done\n",
+ "pytorch/linux-64 [] (00m:00s) Done\n",
+ "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 341 KB / ?? (1.11 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) \n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) \n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n",
+ "pkgs/r/noarch [] (00m:00s) \n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n",
+ "pkgs/r/noarch [] (00m:00s) 312 KB / ?? (587.59 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n",
+ "pkgs/r/noarch [] (00m:00s) Finalizing...\n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n",
+ "pkgs/r/noarch [] (00m:00s) Done\n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n",
+ "pkgs/r/noarch [] (00m:00s) Done\n",
+ "pkgs/r/noarch [] (00m:00s) Done\n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n",
+ "pkgs/r/linux-64 [] (00m:00s) \n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n",
+ "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 818 KB / ?? (1.76 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n",
+ "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) 336 KB / ?? (647.41 KB/s)\n",
+ "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) Finalizing...\n",
+ "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) Done\n",
+ "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) Done\n",
+ "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n",
+ "pkgs/main/noarch [] (00m:00s) Done\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 580 KB / ?? (1.12 MB/s)\n",
+ "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n",
+ "pkgs/r/linux-64 [] (00m:00s) 360 KB / ?? (617.51 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n",
+ "pkgs/r/linux-64 [] (00m:00s) Finalizing...\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n",
+ "pkgs/r/linux-64 [] (00m:00s) Done\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n",
+ "pkgs/r/linux-64 [] (00m:00s) Done\n",
+ "pkgs/r/linux-64 [] (00m:00s) Done\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) \n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 1 MB / ?? (2.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 1 MB / ?? (1.96 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 500 KB / ?? (694.46 KB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 2 MB / ?? (2.81 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 2 MB / ?? (2.41 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 1 MB / ?? (1.52 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.04 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.68 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 2 MB / ?? (2.03 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 3 MB / ?? (3.20 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 4 MB / ?? (3.26 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 4 MB / ?? (3.26 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 3 MB / ?? (2.93 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:00s) 4 MB / ?? (3.26 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.26 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.30 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.26 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.58 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.26 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.58 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.58 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 4 MB / ?? (3.08 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.58 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 5 MB / ?? (3.22 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) 3 MB / ?? (2.58 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n",
+ "conda-forge/noarch [] (00m:00s) 5 MB / ?? (3.22 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) Finalizing...\n",
+ "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) Done\n",
+ "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n",
+ "pkgs/main/linux-64 [] (00m:00s) Done\n",
+ "pkgs/main/linux-64 [] (00m:00s) Done\n",
+ "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 4 MB / ?? (3.30 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.22 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.47 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.47 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 5 MB / ?? (3.18 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.47 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 6 MB / ?? (3.45 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.47 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 6 MB / ?? (3.45 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) 6 MB / ?? (3.45 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) Finalizing...\n",
+ "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) Done\n",
+ "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n",
+ "conda-forge/noarch [] (00m:01s) Done\n",
+ "conda-forge/noarch [] (00m:01s) Done\n",
+ "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (3.39 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:01s) 6 MB / ?? (2.94 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 6 MB / ?? (2.94 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 9 MB / ?? (3.71 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 9 MB / ?? (3.71 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 9 MB / ?? (3.78 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 9 MB / ?? (3.78 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 10 MB / ?? (3.85 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 10 MB / ?? (3.85 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 11 MB / ?? (3.90 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 11 MB / ?? (3.90 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 12 MB / ?? (3.97 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 12 MB / ?? (3.97 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 12 MB / ?? (4.01 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 12 MB / ?? (4.01 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:02s) 13 MB / ?? (4.02 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 13 MB / ?? (4.02 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 14 MB / ?? (4.03 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 14 MB / ?? (4.03 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 14 MB / ?? (4.06 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 14 MB / ?? (4.06 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 15 MB / ?? (4.09 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 15 MB / ?? (4.09 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 16 MB / ?? (4.13 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 16 MB / ?? (4.13 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 17 MB / ?? (4.16 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 17 MB / ?? (4.16 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 17 MB / ?? (4.19 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 17 MB / ?? (4.19 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:03s) 18 MB / ?? (4.23 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:04s) 18 MB / ?? (4.23 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:04s) 19 MB / ?? (4.26 MB/s)\n",
+ "conda-forge/linux-64 [] (00m:04s) Finalizing...\n",
+ "conda-forge/linux-64 [] (00m:04s) Done\n",
+ "conda-forge/linux-64 [] (00m:04s) Done\n",
+ "conda-forge/linux-64 [] (00m:04s) Done\n",
+ "\n",
+ "\n",
+ "Looking for: ['blas', 'boto3', 'botocore', 'brotlipy', 'bzip2', 'ca-certificates', 'certifi', 'cffi', 'charset-normalizer', 'click', 'cryptography', 'cudatoolkit', 'ffmpeg', 'filelock', 'freetype', 'giflib', 'gmp', 'gnutls', 'idna', 'importlib-metadata', 'intel-openmp', 'jmespath', 'joblib', 'jpeg', 'lame', 'lcms2', 'ld_impl_linux-64', 'libffi', 'libgcc-ng', 'libgfortran-ng', 'libgfortran4', 'libgomp', 'libiconv', 'libidn2', 'libpng', 'libstdcxx-ng', 'libtasn1', 'libtiff', 'libunistring', 'libuv', 'libwebp', 'libwebp-base', 'lz4-c', 'mkl', 'mkl-service', 'mkl_fft', 'mkl_random', 'ncurses', 'nettle', 'ninja', 'numpy', 'numpy-base', 'olefile', 'openh264', 'openssl', 'packaging', 'pillow', 'pip', 'pycparser', 'pyopenssl', 'pyparsing', 'pysocks', 'python==3.7.11', 'python-dateutil==2.8.2', 'pytorch==1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0', 'pytorch-mutex==1.0=cuda', 'pyyaml', 'quadprog', 'readline', 'regex', 'requests', 's3transfer', 'sacremoses', 'scikit-learn', 'scipy', 'setuptools', 'six', 'sqlite', 'threadpoolctl', 'tk', 'tokenizers', 'torchaudio', 'torchvision', 'tqdm', 'transformers=4.10.2', 'typing_extensions', 'urllib3', 'wheel', 'xz', 'zipp', 'zlib', 'zstd']\n",
+ "\n",
+ "\n",
+ "Transaction\n",
+ "\n",
+ " Prefix: /usr/local\n",
+ "\n",
+ " Updating specs:\n",
+ "\n",
+ " - blas\n",
+ " - boto3\n",
+ " - botocore\n",
+ " - brotlipy\n",
+ " - bzip2\n",
+ " - ca-certificates\n",
+ " - certifi\n",
+ " - cffi\n",
+ " - charset-normalizer\n",
+ " - click\n",
+ " - cryptography\n",
+ " - cudatoolkit\n",
+ " - ffmpeg\n",
+ " - filelock\n",
+ " - freetype\n",
+ " - giflib\n",
+ " - gmp\n",
+ " - gnutls\n",
+ " - idna\n",
+ " - importlib-metadata\n",
+ " - intel-openmp\n",
+ " - jmespath\n",
+ " - joblib\n",
+ " - jpeg\n",
+ " - lame\n",
+ " - lcms2\n",
+ " - ld_impl_linux-64\n",
+ " - libffi\n",
+ " - libgcc-ng\n",
+ " - libgfortran-ng\n",
+ " - libgfortran4\n",
+ " - libgomp\n",
+ " - libiconv\n",
+ " - libidn2\n",
+ " - libpng\n",
+ " - libstdcxx-ng\n",
+ " - libtasn1\n",
+ " - libtiff\n",
+ " - libunistring\n",
+ " - libuv\n",
+ " - libwebp\n",
+ " - libwebp-base\n",
+ " - lz4-c\n",
+ " - mkl\n",
+ " - mkl-service\n",
+ " - mkl_fft\n",
+ " - mkl_random\n",
+ " - ncurses\n",
+ " - nettle\n",
+ " - ninja\n",
+ " - numpy\n",
+ " - numpy-base\n",
+ " - olefile\n",
+ " - openh264\n",
+ " - openssl\n",
+ " - packaging\n",
+ " - pillow\n",
+ " - pip\n",
+ " - pycparser\n",
+ " - pyopenssl\n",
+ " - pyparsing\n",
+ " - pysocks\n",
+ " - python==3.7.11\n",
+ " - python-dateutil==2.8.2\n",
+ " - pytorch==1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0\n",
+ " - pytorch-mutex==1.0=cuda\n",
+ " - pyyaml\n",
+ " - quadprog\n",
+ " - readline\n",
+ " - regex\n",
+ " - requests\n",
+ " - s3transfer\n",
+ " - sacremoses\n",
+ " - scikit-learn\n",
+ " - scipy\n",
+ " - setuptools\n",
+ " - six\n",
+ " - sqlite\n",
+ " - threadpoolctl\n",
+ " - tk\n",
+ " - tokenizers\n",
+ " - torchaudio\n",
+ " - torchvision\n",
+ " - tqdm\n",
+ " - transformers==4.10.2\n",
+ " - typing_extensions\n",
+ " - urllib3\n",
+ " - wheel\n",
+ " - xz\n",
+ " - zipp\n",
+ " - zlib\n",
+ " - zstd\n",
+ "\n",
+ "\n",
+ " Package Version Build Channel Size\n",
+ "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
+ " Install:\n",
+ "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
+ "\n",
+ "\u001b[32m aom \u001b[00m 3.2.0 h9c3ff4c_2 conda-forge/linux-64 5 MB\n",
+ "\u001b[32m blas \u001b[00m 1.0 mkl pkgs/main/linux-64 6 KB\n",
+ "\u001b[32m boto3 \u001b[00m 1.20.26 pyhd8ed1ab_0 conda-forge/noarch 71 KB\n",
+ "\u001b[32m botocore \u001b[00m 1.23.26 pyhd8ed1ab_0 conda-forge/noarch 5 MB\n",
+ "\u001b[32m charset-normalizer\u001b[00m 2.0.9 pyhd8ed1ab_0 conda-forge/noarch 34 KB\n",
+ "\u001b[32m click \u001b[00m 8.0.3 py37h89c1867_1 conda-forge/linux-64 145 KB\n",
+ "\u001b[32m cudatoolkit \u001b[00m 11.0.221 h6bb024c_0 pkgs/main/linux-64 623 MB\n",
+ "\u001b[32m cython \u001b[00m 0.29.26 py37hcd2ae1e_0 conda-forge/linux-64 2 MB\n",
+ "\u001b[32m dataclasses \u001b[00m 0.8 pyhc8e2a94_3 conda-forge/noarch 10 KB\n",
+ "\u001b[32m ffmpeg \u001b[00m 4.4.1 h6987444_0 conda-forge/linux-64 10 MB\n",
+ "\u001b[32m filelock \u001b[00m 3.4.2 pyhd8ed1ab_0 conda-forge/noarch 12 KB\n",
+ "\u001b[32m freetype \u001b[00m 2.11.0 h70c0345_0 pkgs/main/linux-64 618 KB\n",
+ "\u001b[32m gettext \u001b[00m 0.21.0 hf68c758_0 pkgs/main/linux-64 3 MB\n",
+ "\u001b[32m giflib \u001b[00m 5.2.1 h516909a_2 conda-forge/linux-64 80 KB\n",
+ "\u001b[32m gmp \u001b[00m 6.2.1 h58526e2_0 conda-forge/linux-64 806 KB\n",
+ "\u001b[32m gnutls \u001b[00m 3.6.15 he1e5248_0 pkgs/main/linux-64 1 MB\n",
+ "\u001b[32m huggingface_hub \u001b[00m 0.2.1 pyhd8ed1ab_0 conda-forge/noarch 49 KB\n",
+ "\u001b[32m importlib-metadata\u001b[00m 4.10.0 py37h89c1867_0 conda-forge/linux-64 32 KB\n",
+ "\u001b[32m importlib_metadata\u001b[00m 4.10.0 hd8ed1ab_0 conda-forge/noarch 4 KB\n",
+ "\u001b[32m intel-openmp \u001b[00m 2021.4.0 h06a4308_3561 pkgs/main/linux-64 4 MB\n",
+ "\u001b[32m jbig \u001b[00m 2.1 h7f98852_2003 conda-forge/linux-64 43 KB\n",
+ "\u001b[32m jmespath \u001b[00m 0.10.0 pyh9f0ad1d_0 conda-forge/noarch 21 KB\n",
+ "\u001b[32m joblib \u001b[00m 1.1.0 pyhd8ed1ab_0 conda-forge/noarch 210 KB\n",
+ "\u001b[32m jpeg \u001b[00m 9d h516909a_0 conda-forge/linux-64 266 KB\n",
+ "\u001b[32m lame \u001b[00m 3.100 h14c3975_1001 conda-forge/linux-64 498 KB\n",
+ "\u001b[32m lcms2 \u001b[00m 2.12 hddcbb42_0 conda-forge/linux-64 443 KB\n",
+ "\u001b[32m lerc \u001b[00m 3.0 h9c3ff4c_0 conda-forge/linux-64 216 KB\n",
+ "\u001b[32m libblas \u001b[00m 3.9.0 12_linux64_mkl conda-forge/linux-64 12 KB\n",
+ "\u001b[32m libcblas \u001b[00m 3.9.0 12_linux64_mkl conda-forge/linux-64 12 KB\n",
+ "\u001b[32m libdeflate \u001b[00m 1.8 h7f98852_0 conda-forge/linux-64 67 KB\n",
+ "\u001b[32m libdrm \u001b[00m 2.4.109 h7f98852_0 conda-forge/linux-64 284 KB\n",
+ "\u001b[32m libgfortran-ng \u001b[00m 7.5.0 h14aa051_19 conda-forge/linux-64 22 KB\n",
+ "\u001b[32m libgfortran4 \u001b[00m 7.5.0 h14aa051_19 conda-forge/linux-64 1 MB\n",
+ "\u001b[32m libidn2 \u001b[00m 2.3.2 h7f98852_0 conda-forge/linux-64 98 KB\n",
+ "\u001b[32m libpciaccess \u001b[00m 0.16 h516909a_0 conda-forge/linux-64 37 KB\n",
+ "\u001b[32m libpng \u001b[00m 1.6.37 hed695b0_2 conda-forge/linux-64 359 KB\n",
+ "\u001b[32m libprotobuf \u001b[00m 3.19.1 h780b84a_0 conda-forge/linux-64 3 MB\n",
+ "\u001b[32m libtasn1 \u001b[00m 4.18.0 h7f98852_0 conda-forge/linux-64 114 KB\n",
+ "\u001b[32m libtiff \u001b[00m 4.3.0 h6f004c6_2 conda-forge/linux-64 614 KB\n",
+ "\u001b[32m libunistring \u001b[00m 0.9.10 h14c3975_0 conda-forge/linux-64 1 MB\n",
+ "\u001b[32m libuv \u001b[00m 1.42.0 h7f98852_0 conda-forge/linux-64 1 MB\n",
+ "\u001b[32m libva \u001b[00m 2.13.0 h7f98852_1 conda-forge/linux-64 165 KB\n",
+ "\u001b[32m libvpx \u001b[00m 1.11.0 h9c3ff4c_3 conda-forge/linux-64 1 MB\n",
+ "\u001b[32m libwebp \u001b[00m 1.2.1 h3452ae3_0 conda-forge/linux-64 84 KB\n",
+ "\u001b[32m libwebp-base \u001b[00m 1.2.1 h7f98852_0 conda-forge/linux-64 845 KB\n",
+ "\u001b[32m libzlib \u001b[00m 1.2.11 h36c2ea0_1013 conda-forge/linux-64 59 KB\n",
+ "\u001b[32m llvm-openmp \u001b[00m 12.0.1 h4bd325d_1 conda-forge/linux-64 3 MB\n",
+ "\u001b[32m mkl \u001b[00m 2021.4.0 h8d4b97c_729 conda-forge/linux-64 219 MB\n",
+ "\u001b[32m mkl-service \u001b[00m 2.4.0 py37h402132d_0 conda-forge/linux-64 60 KB\n",
+ "\u001b[32m mkl_fft \u001b[00m 1.3.1 py37h3e078e5_1 conda-forge/linux-64 206 KB\n",
+ "\u001b[32m mkl_random \u001b[00m 1.2.2 py37h219a48f_0 conda-forge/linux-64 361 KB\n",
+ "\u001b[32m nettle \u001b[00m 3.7.3 hbbd107a_1 pkgs/main/linux-64 809 KB\n",
+ "\u001b[32m ninja \u001b[00m 1.10.2 h4bd325d_1 conda-forge/linux-64 2 MB\n",
+ "\u001b[32m numpy \u001b[00m 1.21.2 py37h20f2e39_0 pkgs/main/linux-64 23 KB\n",
+ "\u001b[32m numpy-base \u001b[00m 1.21.2 py37h79a1101_0 pkgs/main/linux-64 5 MB\n",
+ "\u001b[32m olefile \u001b[00m 0.46 pyh9f0ad1d_1 conda-forge/noarch 32 KB\n",
+ "\u001b[32m openh264 \u001b[00m 2.1.1 h780b84a_0 conda-forge/linux-64 2 MB\n",
+ "\u001b[32m openjpeg \u001b[00m 2.4.0 hb52868f_1 conda-forge/linux-64 444 KB\n",
+ "\u001b[32m packaging \u001b[00m 21.3 pyhd8ed1ab_0 conda-forge/noarch 36 KB\n",
+ "\u001b[32m pillow \u001b[00m 8.4.0 py37h0f21c89_0 conda-forge/linux-64 706 KB\n",
+ "\u001b[32m protobuf \u001b[00m 3.19.1 py37hcd2ae1e_1 conda-forge/linux-64 326 KB\n",
+ "\u001b[32m pyparsing \u001b[00m 3.0.6 pyhd8ed1ab_0 conda-forge/noarch 79 KB\n",
+ "\u001b[32m python-dateutil \u001b[00m 2.8.2 pyhd8ed1ab_0 conda-forge/noarch 240 KB\n",
+ "\u001b[32m pytorch \u001b[00m 1.7.0 py3.7_cuda11.0.221_cudnn8.0.3_0 pytorch/linux-64 661 MB\n",
+ "\u001b[32m pytorch-cpu \u001b[00m 1.1.0 py3.7_cpu_0 pytorch/linux-64 54 MB\n",
+ "\u001b[32m pytorch-mutex \u001b[00m 1.0 cuda pytorch/noarch 3 KB\n",
+ "\u001b[32m pyyaml \u001b[00m 6.0 py37h5e8e339_3 conda-forge/linux-64 187 KB\n",
+ "\u001b[32m quadprog \u001b[00m 0.1.11 py37h2527ec5_0 conda-forge/linux-64 106 KB\n",
+ "\u001b[32m regex \u001b[00m 2021.11.10 py37h5e8e339_0 conda-forge/linux-64 380 KB\n",
+ "\u001b[32m s3transfer \u001b[00m 0.5.0 pyhd8ed1ab_0 conda-forge/noarch 55 KB\n",
+ "\u001b[32m sacremoses \u001b[00m 0.0.46 pyhd8ed1ab_0 conda-forge/noarch 466 KB\n",
+ "\u001b[32m scikit-learn \u001b[00m 1.0.2 py37hf9e9bfc_0 conda-forge/linux-64 8 MB\n",
+ "\u001b[32m scipy \u001b[00m 1.7.1 py37h292c36d_2 pkgs/main/linux-64 16 MB\n",
+ "\u001b[32m svt-av1 \u001b[00m 0.8.7 h9c3ff4c_1 conda-forge/linux-64 3 MB\n",
+ "\u001b[32m tbb \u001b[00m 2021.5.0 h4bd325d_0 conda-forge/linux-64 2 MB\n",
+ "\u001b[32m threadpoolctl \u001b[00m 3.0.0 pyh8a188c0_0 conda-forge/noarch 17 KB\n",
+ "\u001b[32m tokenizers \u001b[00m 0.10.3 py37_0 huggingface/linux-64 3 MB\n",
+ "\u001b[32m torchaudio \u001b[00m 0.7.0 py37 pytorch/linux-64 10 MB\n",
+ "\u001b[32m torchvision \u001b[00m 0.10.1 py37h9e046cd_0_cpu conda-forge/linux-64 7 MB\n",
+ "\u001b[32m transformers \u001b[00m 4.10.2 py_0 huggingface/noarch 1 MB\n",
+ "\u001b[32m typing-extensions \u001b[00m 4.0.1 hd8ed1ab_0 conda-forge/noarch 8 KB\n",
+ "\u001b[32m typing_extensions \u001b[00m 4.0.1 pyha770c72_0 conda-forge/noarch 26 KB\n",
+ "\u001b[32m x264 \u001b[00m 1!161.3030 h7f98852_1 conda-forge/linux-64 2 MB\n",
+ "\u001b[32m x265 \u001b[00m 3.5 h4bd325d_1 conda-forge/linux-64 7 MB\n",
+ "\u001b[32m zipp \u001b[00m 3.6.0 pyhd8ed1ab_0 conda-forge/noarch 12 KB\n",
+ "\n",
+ " Change:\n",
+ "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
+ "\n",
+ "\u001b[31m _openmp_mutex \u001b[00m 4.5 1_gnu installed \n",
+ "\u001b[32m _openmp_mutex \u001b[00m 4.5 1_llvm conda-forge/linux-64 5 KB\n",
+ "\u001b[31m zlib \u001b[00m 1.2.11 h516909a_1010 installed \n",
+ "\u001b[32m zlib \u001b[00m 1.2.11 h36c2ea0_1013 conda-forge/linux-64 86 KB\n",
+ "\n",
+ " Upgrade:\n",
+ "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
+ "\n",
+ "\u001b[31m libarchive \u001b[00m 3.5.1 h3f442fb_1 installed \n",
+ "\u001b[32m libarchive \u001b[00m 3.5.2 hccf745f_1 conda-forge/linux-64 2 MB\n",
+ "\u001b[31m libgcc-ng \u001b[00m 9.3.0 h2828fa1_18 installed \n",
+ "\u001b[32m libgcc-ng \u001b[00m 11.2.0 h1d223b6_11 conda-forge/linux-64 887 KB\n",
+ "\u001b[31m libgomp \u001b[00m 9.3.0 h2828fa1_18 installed \n",
+ "\u001b[32m libgomp \u001b[00m 11.2.0 h1d223b6_11 conda-forge/linux-64 427 KB\n",
+ "\u001b[31m libstdcxx-ng \u001b[00m 9.3.0 h6de172a_18 installed \n",
+ "\u001b[32m libstdcxx-ng \u001b[00m 11.2.0 he4da1e4_11 conda-forge/linux-64 4 MB\n",
+ "\u001b[31m libxml2 \u001b[00m 2.9.10 h72842e0_3 installed \n",
+ "\u001b[32m libxml2 \u001b[00m 2.9.12 h72842e0_0 conda-forge/linux-64 772 KB\n",
+ "\u001b[31m openssl \u001b[00m 1.1.1j h7f98852_0 installed \n",
+ "\u001b[32m openssl \u001b[00m 1.1.1l h7f98852_0 conda-forge/linux-64 2 MB\n",
+ "\u001b[31m python \u001b[00m 3.7.10 hffdb5ce_100_cpython installed \n",
+ "\u001b[32m python \u001b[00m 3.7.11 h12debd9_0 pkgs/main/linux-64 45 MB\n",
+ "\u001b[31m sqlite \u001b[00m 3.34.0 h74cdb3f_0 installed \n",
+ "\u001b[32m sqlite \u001b[00m 3.37.0 hc218d9a_0 pkgs/main/linux-64 999 KB\n",
+ "\u001b[31m tk \u001b[00m 8.6.10 h21135ba_1 installed \n",
+ "\u001b[32m tk \u001b[00m 8.6.11 h27826a3_1 conda-forge/linux-64 3 MB\n",
+ "\u001b[31m zstd \u001b[00m 1.4.9 ha95c52a_0 installed \n",
+ "\u001b[32m zstd \u001b[00m 1.5.1 ha95c52a_0 conda-forge/linux-64 463 KB\n",
+ "\n",
+ " Summary:\n",
+ "\n",
+ " Install: 85 packages\n",
+ " Change: 2 packages\n",
+ " Upgrade: 10 packages\n",
+ "\n",
+ " Total download: 2 GB\n",
+ "\n",
+ "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
+ "\n",
+ "\n",
+ "Downloading and Extracting Packages\n",
+ "libwebp-base-1.2.1 | 845 KB | : 100% 1.0/1 [00:00<00:00, 3.10it/s]\n",
+ "cython-0.29.26 | 2.2 MB | : 100% 1.0/1 [00:00<00:00, 2.00it/s]\n",
+ "jmespath-0.10.0 | 21 KB | : 100% 1.0/1 [00:00<00:00, 20.82it/s]\n",
+ "libblas-3.9.0 | 12 KB | : 100% 1.0/1 [00:00<00:00, 23.20it/s]\n",
+ "libdrm-2.4.109 | 284 KB | : 100% 1.0/1 [00:00<00:00, 10.61it/s]\n",
+ "huggingface_hub-0.2. | 49 KB | : 100% 1.0/1 [00:00<00:00, 15.69it/s]\n",
+ "libpng-1.6.37 | 359 KB | : 100% 1.0/1 [00:00<00:00, 9.20it/s]\n",
+ "libgfortran-ng-7.5.0 | 22 KB | : 100% 1.0/1 [00:00<00:00, 22.13it/s]\n",
+ "regex-2021.11.10 | 380 KB | : 100% 1.0/1 [00:00<00:00, 8.06it/s]\n",
+ "protobuf-3.19.1 | 326 KB | : 100% 1.0/1 [00:00<00:00, 6.84it/s]\n",
+ "pyparsing-3.0.6 | 79 KB | : 100% 1.0/1 [00:00<00:00, 15.31it/s]\n",
+ "libva-2.13.0 | 165 KB | : 100% 1.0/1 [00:00<00:00, 12.89it/s]\n",
+ "pytorch-cpu-1.1.0 | 53.6 MB | : 100% 1.0/1 [00:11<00:00, 11.45s/it] \n",
+ "lerc-3.0 | 216 KB | : 100% 1.0/1 [00:00<00:00, 10.80it/s]\n",
+ "botocore-1.23.26 | 5.2 MB | : 100% 1.0/1 [00:02<00:00, 2.18s/it]\n",
+ "tokenizers-0.10.3 | 2.8 MB | : 100% 1.0/1 [00:00<00:00, 1.45it/s]\n",
+ "gettext-0.21.0 | 2.6 MB | : 100% 1.0/1 [00:00<00:00, 3.19it/s]\n",
+ "torchaudio-0.7.0 | 9.8 MB | : 100% 1.0/1 [00:02<00:00, 2.21s/it] \n",
+ "jpeg-9d | 266 KB | : 100% 1.0/1 [00:00<00:00, 11.11it/s]\n",
+ "libtasn1-4.18.0 | 114 KB | : 100% 1.0/1 [00:00<00:00, 15.16it/s]\n",
+ "python-dateutil-2.8. | 240 KB | : 100% 1.0/1 [00:00<00:00, 13.49it/s]\n",
+ "openjpeg-2.4.0 | 444 KB | : 100% 1.0/1 [00:00<00:00, 7.84it/s]\n",
+ "libstdcxx-ng-11.2.0 | 4.2 MB | : 100% 1.0/1 [00:00<00:00, 1.47it/s]\n",
+ "llvm-openmp-12.0.1 | 2.8 MB | : 100% 1.0/1 [00:00<00:00, 1.89it/s]\n",
+ "packaging-21.3 | 36 KB | : 100% 1.0/1 [00:00<00:00, 17.86it/s]\n",
+ "click-8.0.3 | 145 KB | : 100% 1.0/1 [00:00<00:00, 13.01it/s]\n",
+ "pillow-8.4.0 | 706 KB | : 100% 1.0/1 [00:00<00:00, 5.16it/s]\n",
+ "sqlite-3.37.0 | 999 KB | : 100% 1.0/1 [00:00<00:00, 1.88it/s]\n",
+ "tk-8.6.11 | 3.3 MB | : 100% 1.0/1 [00:00<00:00, 1.59it/s]\n",
+ "aom-3.2.0 | 5.4 MB | : 100% 1.0/1 [00:00<00:00, 1.09it/s] \n",
+ "giflib-5.2.1 | 80 KB | : 100% 1.0/1 [00:00<00:00, 17.39it/s]\n",
+ "torchvision-0.10.1 | 6.7 MB | : 100% 1.0/1 [00:00<00:00, 1.16it/s]\n",
+ "libidn2-2.3.2 | 98 KB | : 100% 1.0/1 [00:00<00:00, 19.48it/s]\n",
+ "mkl_fft-1.3.1 | 206 KB | : 100% 1.0/1 [00:00<00:00, 12.22it/s]\n",
+ "tbb-2021.5.0 | 2.0 MB | : 100% 1.0/1 [00:00<00:00, 2.92it/s]\n",
+ "libuv-1.42.0 | 1.0 MB | : 100% 1.0/1 [00:00<00:00, 5.01it/s]\n",
+ "mkl-service-2.4.0 | 60 KB | : 100% 1.0/1 [00:00<00:00, 20.79it/s]\n",
+ "threadpoolctl-3.0.0 | 17 KB | : 100% 1.0/1 [00:00<00:00, 22.00it/s]\n",
+ "numpy-base-1.21.2 | 4.8 MB | : 100% 1.0/1 [00:00<00:00, 2.52it/s]\n",
+ "svt-av1-0.8.7 | 2.8 MB | : 100% 1.0/1 [00:00<00:00, 2.17it/s]\n",
+ "scipy-1.7.1 | 16.4 MB | : 100% 1.0/1 [00:00<00:00, 1.28it/s]\n",
+ "libzlib-1.2.11 | 59 KB | : 100% 1.0/1 [00:00<00:00, 24.81it/s]\n",
+ "cudatoolkit-11.0.221 | 622.9 MB | : 100% 1.0/1 [00:45<00:00, 45.59s/it] \n",
+ "libxml2-2.9.12 | 772 KB | : 100% 1.0/1 [00:00<00:00, 4.82it/s]\n",
+ "gmp-6.2.1 | 806 KB | : 100% 1.0/1 [00:00<00:00, 5.85it/s]\n",
+ "charset-normalizer-2 | 34 KB | : 100% 1.0/1 [00:00<00:00, 20.05it/s]\n",
+ "libtiff-4.3.0 | 614 KB | : 100% 1.0/1 [00:00<00:00, 5.97it/s]\n",
+ "libvpx-1.11.0 | 1.1 MB | : 100% 1.0/1 [00:00<00:00, 5.03it/s]\n",
+ "filelock-3.4.2 | 12 KB | : 100% 1.0/1 [00:00<00:00, 17.87it/s]\n",
+ "libunistring-0.9.10 | 1.4 MB | : 100% 1.0/1 [00:00<00:00, 3.76it/s]\n",
+ "jbig-2.1 | 43 KB | : 100% 1.0/1 [00:00<00:00, 20.01it/s]\n",
+ "zlib-1.2.11 | 86 KB | : 100% 1.0/1 [00:00<00:00, 19.96it/s]\n",
+ "x264-1!161.3030 | 2.5 MB | : 100% 1.0/1 [00:00<00:00, 2.35it/s]\n",
+ "importlib_metadata-4 | 4 KB | : 100% 1.0/1 [00:00<00:00, 19.23it/s]\n",
+ "scikit-learn-1.0.2 | 7.8 MB | : 100% 1.0/1 [00:01<00:00, 1.61s/it] \n",
+ "libwebp-1.2.1 | 84 KB | : 100% 1.0/1 [00:00<00:00, 17.17it/s]\n",
+ "s3transfer-0.5.0 | 55 KB | : 100% 1.0/1 [00:00<00:00, 14.56it/s]\n",
+ "gnutls-3.6.15 | 1.0 MB | : 100% 1.0/1 [00:00<00:00, 7.94it/s]\n",
+ "lame-3.100 | 498 KB | : 100% 1.0/1 [00:00<00:00, 8.04it/s]\n",
+ "libpciaccess-0.16 | 37 KB | : 100% 1.0/1 [00:00<00:00, 23.57it/s]\n",
+ "_openmp_mutex-4.5 | 5 KB | : 100% 1.0/1 [00:00<00:00, 26.90it/s]\n",
+ "zstd-1.5.1 | 463 KB | : 100% 1.0/1 [00:00<00:00, 8.12it/s]\n",
+ "quadprog-0.1.11 | 106 KB | : 100% 1.0/1 [00:00<00:00, 10.03it/s]\n",
+ "libgcc-ng-11.2.0 | 887 KB | : 100% 1.0/1 [00:00<00:00, 5.27it/s]\n",
+ "python-3.7.11 | 45.3 MB | : 100% 1.0/1 [00:02<00:00, 2.89s/it]\n",
+ "transformers-4.10.2 | 1.3 MB | : 100% 1.0/1 [00:00<00:00, 1.52it/s]\n",
+ "libgomp-11.2.0 | 427 KB | : 100% 1.0/1 [00:00<00:00, 10.06it/s]\n",
+ "zipp-3.6.0 | 12 KB | : 100% 1.0/1 [00:00<00:00, 23.79it/s]\n",
+ "blas-1.0 | 6 KB | : 100% 1.0/1 [00:00<00:00, 12.83it/s]\n",
+ "ninja-1.10.2 | 2.4 MB | : 100% 1.0/1 [00:00<00:00, 2.30it/s]\n",
+ "mkl-2021.4.0 | 219.1 MB | : 100% 1.0/1 [00:40<00:00, 40.70s/it] \n",
+ "openssl-1.1.1l | 2.1 MB | : 100% 1.0/1 [00:00<00:00, 2.21it/s]\n",
+ "lcms2-2.12 | 443 KB | : 100% 1.0/1 [00:00<00:00, 8.03it/s]\n",
+ "dataclasses-0.8 | 10 KB | : 100% 1.0/1 [00:00<00:00, 16.97it/s]\n",
+ "typing_extensions-4. | 26 KB | : 100% 1.0/1 [00:00<00:00, 22.57it/s]\n",
+ "libcblas-3.9.0 | 12 KB | : 100% 1.0/1 [00:00<00:00, 23.54it/s]\n",
+ "openh264-2.1.1 | 1.5 MB | : 100% 1.0/1 [00:00<00:00, 3.35it/s]\n",
+ "pytorch-1.7.0 | 661.4 MB | : 100% 1.0/1 [01:41<00:00, 101.98s/it] \n",
+ "libarchive-3.5.2 | 1.6 MB | : 100% 1.0/1 [00:00<00:00, 1.83it/s]\n",
+ "pyyaml-6.0 | 187 KB | : 100% 1.0/1 [00:00<00:00, 12.94it/s]\n",
+ "intel-openmp-2021.4. | 4.2 MB | : 100% 1.0/1 [00:00<00:00, 3.81it/s]\n",
+ "importlib-metadata-4 | 32 KB | : 100% 1.0/1 [00:00<00:00, 19.21it/s]\n",
+ "nettle-3.7.3 | 809 KB | : 100% 1.0/1 [00:00<00:00, 8.74it/s]\n",
+ "libprotobuf-3.19.1 | 2.6 MB | : 100% 1.0/1 [00:00<00:00, 1.78it/s]\n",
+ "libgfortran4-7.5.0 | 1.3 MB | : 100% 1.0/1 [00:00<00:00, 3.43it/s]\n",
+ "mkl_random-1.2.2 | 361 KB | : 100% 1.0/1 [00:00<00:00, 10.56it/s]\n",
+ "joblib-1.1.0 | 210 KB | : 100% 1.0/1 [00:00<00:00, 9.03it/s]\n",
+ "freetype-2.11.0 | 618 KB | : 100% 1.0/1 [00:00<00:00, 4.87it/s]\n",
+ "sacremoses-0.0.46 | 466 KB | : 100% 1.0/1 [00:00<00:00, 7.59it/s]\n",
+ "x265-3.5 | 6.7 MB | : 100% 1.0/1 [00:01<00:00, 1.56s/it]\n",
+ "typing-extensions-4. | 8 KB | : 100% 1.0/1 [00:00<00:00, 27.50it/s]\n",
+ "boto3-1.20.26 | 71 KB | : 100% 1.0/1 [00:00<00:00, 14.23it/s]\n",
+ "pytorch-mutex-1.0 | 3 KB | : 100% 1.0/1 [00:00<00:00, 2.80it/s]\n",
+ "libdeflate-1.8 | 67 KB | : 100% 1.0/1 [00:00<00:00, 20.23it/s]\n",
+ "ffmpeg-4.4.1 | 10.0 MB | : 100% 1.0/1 [00:01<00:00, 1.51s/it]\n",
+ "numpy-1.21.2 | 23 KB | : 100% 1.0/1 [00:00<00:00, 12.12it/s]\n",
+ "olefile-0.46 | 32 KB | : 100% 1.0/1 [00:00<00:00, 19.75it/s]\n",
+ "Preparing transaction: - \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\bdone\n",
+ "Verifying transaction: / \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\bdone\n",
+ "Executing transaction: \\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ By downloading and using the CUDA Toolkit conda packages, you accept the terms and conditions of the CUDA End User License Agreement (EULA): https://docs.nvidia.com/cuda/eula/index.html\n",
+ "\n",
+ "\b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\bdone\n",
+ "#\n",
+ "# To activate this environment, use\n",
+ "#\n",
+ "# $ conda activate base\n",
+ "#\n",
+ "# To deactivate an active environment, use\n",
+ "#\n",
+ "# $ conda deactivate\n",
+ "\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "%%shell\n",
+ "eval \"$(conda shell.bash hook)\" # copy conda command to shell\n",
+ "conda activate base\n",
+ "python --version\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "QZ8cYxLKkKKp",
+ "outputId": "d76858a9-3315-43d4-eb19-80a7a3ad6da1"
+ },
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Python 3.7.11\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "cp data_prep/asc_random ../"
+ ],
+ "metadata": {
+ "id": "9q4EnaNqrN3W"
+ },
+ "execution_count": 12,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!conda install -c pytorch torchvision cudatoolkit=10.1 pytorch"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "g5OM5VemZZrO",
+ "outputId": "6fa7566f-8b26-4f85-8a35-751712af5414"
+ },
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting package metadata (current_repodata.json): - \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\bdone\n",
+ "Solving environment: - \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ WARNING conda.core.solve:_add_specs(611): pinned spec cudatoolkit=11.1 conflicts with explicit specs. Overriding pinned spec.\n",
+ "\b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\bdone\n",
+ "\n",
+ "# All requested packages already installed.\n",
+ "\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ ""
+ ],
+ "metadata": {
+ "id": "xq5XI1NGZZcA"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!conda install -c huggingface tokenizers=0.10.1 transformers=4.6.1"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "cH27SpviddOf",
+ "outputId": "7f50f0d4-d5e7-4943-f1e5-63a576d2c230"
+ },
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting package metadata (current_repodata.json): - \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\bdone\n",
+ "Solving environment: | \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\bdone\n",
+ "\n",
+ "## Package Plan ##\n",
+ "\n",
+ " environment location: /usr/local\n",
+ "\n",
+ " added / updated specs:\n",
+ " - tokenizers=0.10.1\n",
+ " - transformers=4.6.1\n",
+ "\n",
+ "\n",
+ "The following packages will be downloaded:\n",
+ "\n",
+ " package | build\n",
+ " ---------------------------|-----------------\n",
+ " tokenizers-0.10.1 | py37_0 2.8 MB huggingface\n",
+ " transformers-4.6.1 | py_0 2.1 MB huggingface\n",
+ " ------------------------------------------------------------\n",
+ " Total: 4.9 MB\n",
+ "\n",
+ "The following packages will be DOWNGRADED:\n",
+ "\n",
+ " tokenizers 0.10.3-py37_0 --> 0.10.1-py37_0\n",
+ " transformers 4.10.2-py_0 --> 4.6.1-py_0\n",
+ "\n",
+ "\n",
+ "\n",
+ "Downloading and Extracting Packages\n",
+ "transformers-4.6.1 | 2.1 MB | : 100% 1.0/1 [00:00<00:00, 1.20it/s]\n",
+ "tokenizers-0.10.1 | 2.8 MB | : 100% 1.0/1 [00:01<00:00, 1.06s/it]\n",
+ "Preparing transaction: \\ \b\bdone\n",
+ "Verifying transaction: / \b\bdone\n",
+ "Executing transaction: \\ \b\bdone\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!python cuda_python_test.py"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "sSbTkL3zci9f",
+ "outputId": "96e3eb9c-c768-4c87-e193-6da06f1b1794"
+ },
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "1\n",
+ "Tesla K80\n",
+ "[CUDA available]\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ " !python run.py --bert_model 'bert-base-uncased' --backbone bert_adapter --baseline ctr \t--task asc --eval_batch_size 128 \t--train_batch_size 32 --scenario til_classification --idrandom 0 --use_predefine_args"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "6THjeD2XuJwf",
+ "outputId": "5d8c26e1-073e-4457-e084-cb8b17ed996d"
+ },
+ "execution_count": 16,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Load data...\n",
+ "random_sep: ['XuSemEval14_laptop', 'XuSemEval14_rest', 'Bing9domains_HitachiRouter', 'Bing9domains_CanonS100', 'Bing9domains_ipod', 'Bing5domains_ApexAD2600Progressive', 'Bing9domains_Nokia6600', 'Bing9domains_DiaperChamp', 'Bing9domains_CanonPowerShotSD500', 'Bing5domains_CreativeLabsNomadJukeboxZenXtra40GB', 'Bing9domains_Norton', 'Bing9domains_MicroMP3', 'Bing3domains_Speaker', 'Bing9domains_LinksysRouter', 'Bing5domains_Nokia6610', 'Bing5domains_NikonCoolpix4300', 'Bing5domains_CanonG3', 'Bing3domains_Computer', 'Bing3domains_Router']\n",
+ "domains: ['XuSemEval14_rest', 'XuSemEval14_laptop', 'Bing3domains_Speaker', 'Bing3domains_Router', 'Bing3domains_Computer', 'Bing5domains_Nokia6610', 'Bing5domains_NikonCoolpix4300', 'Bing5domains_CreativeLabsNomadJukeboxZenXtra40GB', 'Bing5domains_CanonG3', 'Bing5domains_ApexAD2600Progressive', 'Bing9domains_CanonPowerShotSD500', 'Bing9domains_CanonS100', 'Bing9domains_DiaperChamp', 'Bing9domains_HitachiRouter', 'Bing9domains_ipod', 'Bing9domains_LinksysRouter', 'Bing9domains_MicroMP3', 'Bing9domains_Nokia6600', 'Bing9domains_Norton']\n",
+ "random_sep: 19\n",
+ "domains: 19\n",
+ "dataset: ./dat/absa/XuSemEval/asc/14/laptop\n",
+ "Downloading: 100% 232k/232k [00:00<00:00, 2.01MB/s]\n",
+ "Downloading: 100% 28.0/28.0 [00:00<00:00, 22.5kB/s]\n",
+ "Downloading: 100% 466k/466k [00:00<00:00, 3.21MB/s]\n",
+ "12/28/2021 23:28:19 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:19 - INFO - preparation - Num examples = 2163\n",
+ "12/28/2021 23:28:19 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:19 - INFO - preparation - Num steps = 680\n",
+ "12/28/2021 23:28:19 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:19 - INFO - preparation - Num orig examples = 150\n",
+ "12/28/2021 23:28:19 - INFO - preparation - Num split examples = 150\n",
+ "12/28/2021 23:28:19 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:21 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:21 - INFO - preparation - Num examples = 638\n",
+ "12/28/2021 23:28:21 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/XuSemEval/asc/14/rest\n",
+ "12/28/2021 23:28:24 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:24 - INFO - preparation - Num examples = 3452\n",
+ "12/28/2021 23:28:24 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:24 - INFO - preparation - Num steps = 1080\n",
+ "12/28/2021 23:28:24 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:24 - INFO - preparation - Num orig examples = 150\n",
+ "12/28/2021 23:28:24 - INFO - preparation - Num split examples = 150\n",
+ "12/28/2021 23:28:24 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:26 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:26 - INFO - preparation - Num examples = 1120\n",
+ "12/28/2021 23:28:26 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing9Domains/asc/HitachiRouter\n",
+ "12/28/2021 23:28:27 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:27 - INFO - preparation - Num examples = 212\n",
+ "12/28/2021 23:28:27 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:27 - INFO - preparation - Num steps = 70\n",
+ "12/28/2021 23:28:27 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:27 - INFO - preparation - Num orig examples = 26\n",
+ "12/28/2021 23:28:27 - INFO - preparation - Num split examples = 26\n",
+ "12/28/2021 23:28:27 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:28 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:28 - INFO - preparation - Num examples = 27\n",
+ "12/28/2021 23:28:28 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing9Domains/asc/CanonS100\n",
+ "12/28/2021 23:28:29 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:29 - INFO - preparation - Num examples = 175\n",
+ "12/28/2021 23:28:29 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:29 - INFO - preparation - Num steps = 60\n",
+ "12/28/2021 23:28:29 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:29 - INFO - preparation - Num orig examples = 22\n",
+ "12/28/2021 23:28:29 - INFO - preparation - Num split examples = 22\n",
+ "12/28/2021 23:28:29 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:30 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:30 - INFO - preparation - Num examples = 22\n",
+ "12/28/2021 23:28:30 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing9Domains/asc/ipod\n",
+ "12/28/2021 23:28:31 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:31 - INFO - preparation - Num examples = 153\n",
+ "12/28/2021 23:28:31 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:31 - INFO - preparation - Num steps = 50\n",
+ "12/28/2021 23:28:31 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:31 - INFO - preparation - Num orig examples = 19\n",
+ "12/28/2021 23:28:31 - INFO - preparation - Num split examples = 19\n",
+ "12/28/2021 23:28:31 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:32 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:32 - INFO - preparation - Num examples = 20\n",
+ "12/28/2021 23:28:32 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing5Domains/asc/ApexAD2600Progressive\n",
+ "12/28/2021 23:28:33 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:33 - INFO - preparation - Num examples = 343\n",
+ "12/28/2021 23:28:33 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:33 - INFO - preparation - Num steps = 110\n",
+ "12/28/2021 23:28:33 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:33 - INFO - preparation - Num orig examples = 43\n",
+ "12/28/2021 23:28:33 - INFO - preparation - Num split examples = 43\n",
+ "12/28/2021 23:28:33 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:34 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:34 - INFO - preparation - Num examples = 43\n",
+ "12/28/2021 23:28:34 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing9Domains/asc/Nokia6600\n",
+ "12/28/2021 23:28:36 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:36 - INFO - preparation - Num examples = 362\n",
+ "12/28/2021 23:28:36 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:36 - INFO - preparation - Num steps = 120\n",
+ "12/28/2021 23:28:36 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:36 - INFO - preparation - Num orig examples = 45\n",
+ "12/28/2021 23:28:36 - INFO - preparation - Num split examples = 45\n",
+ "12/28/2021 23:28:36 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:37 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:37 - INFO - preparation - Num examples = 46\n",
+ "12/28/2021 23:28:37 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing9Domains/asc/DiaperChamp\n",
+ "12/28/2021 23:28:38 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:38 - INFO - preparation - Num examples = 191\n",
+ "12/28/2021 23:28:38 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:38 - INFO - preparation - Num steps = 60\n",
+ "12/28/2021 23:28:38 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:38 - INFO - preparation - Num orig examples = 24\n",
+ "12/28/2021 23:28:38 - INFO - preparation - Num split examples = 24\n",
+ "12/28/2021 23:28:38 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:39 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:39 - INFO - preparation - Num examples = 24\n",
+ "12/28/2021 23:28:39 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing9Domains/asc/CanonPowerShotSD500\n",
+ "12/28/2021 23:28:40 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:40 - INFO - preparation - Num examples = 118\n",
+ "12/28/2021 23:28:40 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:40 - INFO - preparation - Num steps = 40\n",
+ "12/28/2021 23:28:40 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:40 - INFO - preparation - Num orig examples = 15\n",
+ "12/28/2021 23:28:40 - INFO - preparation - Num split examples = 15\n",
+ "12/28/2021 23:28:40 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:41 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:41 - INFO - preparation - Num examples = 15\n",
+ "12/28/2021 23:28:41 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing5Domains/asc/CreativeLabsNomadJukeboxZenXtra40GB\n",
+ "12/28/2021 23:28:42 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:42 - INFO - preparation - Num examples = 677\n",
+ "12/28/2021 23:28:42 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:42 - INFO - preparation - Num steps = 220\n",
+ "12/28/2021 23:28:43 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:43 - INFO - preparation - Num orig examples = 85\n",
+ "12/28/2021 23:28:43 - INFO - preparation - Num split examples = 85\n",
+ "12/28/2021 23:28:43 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:43 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:43 - INFO - preparation - Num examples = 85\n",
+ "12/28/2021 23:28:43 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing9Domains/asc/Norton\n",
+ "12/28/2021 23:28:44 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:44 - INFO - preparation - Num examples = 194\n",
+ "12/28/2021 23:28:44 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:44 - INFO - preparation - Num steps = 70\n",
+ "12/28/2021 23:28:45 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:45 - INFO - preparation - Num orig examples = 24\n",
+ "12/28/2021 23:28:45 - INFO - preparation - Num split examples = 24\n",
+ "12/28/2021 23:28:45 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:46 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:46 - INFO - preparation - Num examples = 25\n",
+ "12/28/2021 23:28:46 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing9Domains/asc/MicroMP3\n",
+ "12/28/2021 23:28:47 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:47 - INFO - preparation - Num examples = 484\n",
+ "12/28/2021 23:28:47 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:47 - INFO - preparation - Num steps = 160\n",
+ "12/28/2021 23:28:47 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:47 - INFO - preparation - Num orig examples = 61\n",
+ "12/28/2021 23:28:47 - INFO - preparation - Num split examples = 61\n",
+ "12/28/2021 23:28:47 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:48 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:48 - INFO - preparation - Num examples = 61\n",
+ "12/28/2021 23:28:48 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing3Domains/asc/Speaker\n",
+ "12/28/2021 23:28:49 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:49 - INFO - preparation - Num examples = 352\n",
+ "12/28/2021 23:28:49 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:49 - INFO - preparation - Num steps = 110\n",
+ "12/28/2021 23:28:49 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:49 - INFO - preparation - Num orig examples = 44\n",
+ "12/28/2021 23:28:49 - INFO - preparation - Num split examples = 44\n",
+ "12/28/2021 23:28:49 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:50 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:50 - INFO - preparation - Num examples = 44\n",
+ "12/28/2021 23:28:50 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing9Domains/asc/LinksysRouter\n",
+ "12/28/2021 23:28:51 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:51 - INFO - preparation - Num examples = 176\n",
+ "12/28/2021 23:28:51 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:51 - INFO - preparation - Num steps = 60\n",
+ "12/28/2021 23:28:51 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:51 - INFO - preparation - Num orig examples = 22\n",
+ "12/28/2021 23:28:51 - INFO - preparation - Num split examples = 22\n",
+ "12/28/2021 23:28:51 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:52 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:52 - INFO - preparation - Num examples = 23\n",
+ "12/28/2021 23:28:52 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing5Domains/asc/Nokia6610\n",
+ "12/28/2021 23:28:53 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:53 - INFO - preparation - Num examples = 271\n",
+ "12/28/2021 23:28:53 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:53 - INFO - preparation - Num steps = 90\n",
+ "12/28/2021 23:28:54 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:54 - INFO - preparation - Num orig examples = 34\n",
+ "12/28/2021 23:28:54 - INFO - preparation - Num split examples = 34\n",
+ "12/28/2021 23:28:54 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:55 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:55 - INFO - preparation - Num examples = 34\n",
+ "12/28/2021 23:28:55 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing5Domains/asc/NikonCoolpix4300\n",
+ "12/28/2021 23:28:56 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:56 - INFO - preparation - Num examples = 162\n",
+ "12/28/2021 23:28:56 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:56 - INFO - preparation - Num steps = 60\n",
+ "12/28/2021 23:28:56 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:56 - INFO - preparation - Num orig examples = 20\n",
+ "12/28/2021 23:28:56 - INFO - preparation - Num split examples = 20\n",
+ "12/28/2021 23:28:56 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:57 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:57 - INFO - preparation - Num examples = 21\n",
+ "12/28/2021 23:28:57 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing5Domains/asc/CanonG3\n",
+ "12/28/2021 23:28:58 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:28:58 - INFO - preparation - Num examples = 228\n",
+ "12/28/2021 23:28:58 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:58 - INFO - preparation - Num steps = 80\n",
+ "12/28/2021 23:28:58 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:28:58 - INFO - preparation - Num orig examples = 29\n",
+ "12/28/2021 23:28:58 - INFO - preparation - Num split examples = 29\n",
+ "12/28/2021 23:28:58 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:28:59 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:28:59 - INFO - preparation - Num examples = 29\n",
+ "12/28/2021 23:28:59 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing3Domains/asc/Computer\n",
+ "12/28/2021 23:29:00 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:29:00 - INFO - preparation - Num examples = 283\n",
+ "12/28/2021 23:29:00 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:29:00 - INFO - preparation - Num steps = 90\n",
+ "12/28/2021 23:29:00 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:29:00 - INFO - preparation - Num orig examples = 35\n",
+ "12/28/2021 23:29:00 - INFO - preparation - Num split examples = 35\n",
+ "12/28/2021 23:29:00 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:29:01 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:29:01 - INFO - preparation - Num examples = 36\n",
+ "12/28/2021 23:29:01 - INFO - preparation - Batch size = 128\n",
+ "dataset: ./dat/absa/Bing3Domains/asc/Router\n",
+ "12/28/2021 23:29:02 - INFO - preparation - ***** Running training *****\n",
+ "12/28/2021 23:29:02 - INFO - preparation - Num examples = 245\n",
+ "12/28/2021 23:29:02 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:29:02 - INFO - preparation - Num steps = 80\n",
+ "12/28/2021 23:29:02 - INFO - preparation - ***** Running validations *****\n",
+ "12/28/2021 23:29:02 - INFO - preparation - Num orig examples = 31\n",
+ "12/28/2021 23:29:02 - INFO - preparation - Num split examples = 31\n",
+ "12/28/2021 23:29:02 - INFO - preparation - Batch size = 32\n",
+ "12/28/2021 23:29:03 - INFO - preparation - ***** Running evaluation *****\n",
+ "12/28/2021 23:29:03 - INFO - preparation - Num examples = 31\n",
+ "12/28/2021 23:29:03 - INFO - preparation - Batch size = 128\n",
+ "\n",
+ "Task info = [(0, 3), (1, 3), (2, 2), (3, 2), (4, 2), (5, 2), (6, 2), (7, 2), (8, 2), (9, 2), (10, 2), (11, 2), (12, 2), (13, 2), (14, 2), (15, 2), (16, 2), (17, 2), (18, 2)]\n",
+ "Inits...\n",
+ "Downloading: 100% 570/570 [00:00<00:00, 525kB/s]\n",
+ "Downloading: 100% 440M/440M [00:29<00:00, 14.9MB/s]\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to attention\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "apply to output\n",
+ "BertAdapter\n",
+ "BertAdapterMask\n",
+ "apply_one_layer_shared \n",
+ "CapsuleLayer\n",
+ "CapsNet\n",
+ "BertAdapterCapsuleMaskImp\n",
+ "Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']\n",
+ "- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+ "Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.11.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.1.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.9.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.5.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.4.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.6.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.4.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.0.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.efc2.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.6.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.fc1.weight', 'bert.encoder.layer.8.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.6.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.bias', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.9.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.tsv_capsules.elarger.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.0.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.9.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.weight', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.4.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.elarger.weight', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.11.weight', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.bias', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.12.weight', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.route_weights', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.7.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.1.bias', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.1.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.weight', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.10.weight', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.transfer_capsules.larger.weight', 'bert.encoder.layer.5.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.2.weight', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.bias', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.13.bias', 'bert.encoder.layer.3.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.3.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.0.bias', 'bert.encoder.layer.4.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.14.weight', 'bert.encoder.layer.4.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.17.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs3.1.weight', 'bert.encoder.layer.6.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.efc1.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.5.weight', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.fc2.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.2.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.bias', 'bert.encoder.layer.8.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs4.bias', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.10.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.0.bias', 'bert.encoder.layer.9.output.adapter_capsule_mask.fc1.bias', 'bert.encoder.layer.2.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.5.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.2.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.15.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.11.output.adapter_capsule_mask.capsule_net.transfer_capsules.convs2.2.weight', 'bert.encoder.layer.7.attention.output.adapter_capsule_mask.fc2.bias', 'bert.encoder.layer.9.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.3.attention.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.8.bias', 'bert.encoder.layer.0.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.16.bias', 'bert.encoder.layer.1.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_sim.bias', 'bert.encoder.layer.11.attention.output.adapter_capsule_mask.capsule_net.transfer_capsules.fc_cur.weight', 'bert.encoder.layer.1.output.adapter_capsule_mask.capsule_net.tsv_capsules.larger.bias', 'bert.encoder.layer.10.output.adapter_capsule_mask.capsule_net.semantic_capsules.fc1.18.weight', 'bert.encoder.layer.7.output.adapter_capsule_mask.capsule_net.tsv_capsules.route_weights', 'bert.encoder.layer.7.output.adapter_capsule_mask.efc2.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "BERT ADAPTER CAPSULE MASK\n",
+ "12/28/2021 23:29:43 - INFO - preparation - device: cuda n_gpu: 1\n",
+ "DIL BERT ADAPTER MASK BASE\n",
+ "BERT ADAPTER CAPSULE MASK NCL\n",
+ "12/28/2021 23:29:43 - INFO - preparation - ****************************************************************************************************\n",
+ "12/28/2021 23:29:43 - INFO - preparation - Task 0 (./dat/absa/XuSemEval/asc/14/laptop)\n",
+ "12/28/2021 23:29:43 - INFO - preparation - ****************************************************************************************************\n",
+ "12/28/2021 23:29:43 - INFO - preparation - Start Training and Set the clock\n",
+ "train\n",
+ "Train Iter (loss=X.XXX): 0% 0/68 [00:57, ?it/s]\n",
+ "Traceback (most recent call last):\n",
+ " File \"run.py\", line 186, in \n",
+ " appr.train(task,train_dataloader,valid_dataloader,num_train_steps,train,valid)\n",
+ " File \"/content/gdrive/My Drive/PyContinual-main/PyContinual-main/src/approaches/classification/bert_adapter_capsule_mask.py\", line 60, in train\n",
+ " global_step=self.train_epoch(t,train,iter_bar, optimizer,t_total,global_step)\n",
+ " File \"/content/gdrive/My Drive/PyContinual-main/PyContinual-main/src/approaches/classification/bert_adapter_capsule_mask.py\", line 118, in train_epoch\n",
+ " output_dict = self.model.forward(t,input_ids, segment_ids, input_mask,targets,s=s)\n",
+ " File \"/content/gdrive/My Drive/PyContinual-main/PyContinual-main/src/networks/classification/bert_adapter_capsule_mask.py\", line 68, in forward\n",
+ " targets=None,t=t,s=s)\n",
+ " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n",
+ " result = self.forward(*input, **kwargs)\n",
+ " File \"./networks/base/my_transformers.py\", line 796, in forward\n",
+ " output_hidden_states,return_dict,t=t,s=s,x_list=x_list,h_list=h_list)\n",
+ " File \"./networks/base/my_transformers.py\", line 853, in compute_encoder_outputs\n",
+ " return_dict=return_dict, t=t,s=s\n",
+ " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n",
+ " result = self.forward(*input, **kwargs)\n",
+ " File \"./networks/base/my_transformers.py\", line 664, in forward\n",
+ " t=t,s=s,x_list=x_list,h_list=h_list\n",
+ " File \"./networks/base/my_transformers.py\", line 601, in compute_layer_outputs\n",
+ " t=t,s=s\n",
+ " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n",
+ " result = self.forward(*input, **kwargs)\n",
+ " File \"./networks/base/my_transformers.py\", line 474, in forward\n",
+ " t=t,s=s,\n",
+ " File \"./networks/base/my_transformers.py\", line 963, in apply_chunking_to_forward\n",
+ " t=t,s=s)\n",
+ " File \"./networks/base/my_transformers.py\", line 521, in feed_forward_chunk\n",
+ " t=t,s=s,)\n",
+ " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n",
+ " result = self.forward(*input, **kwargs)\n",
+ " File \"./networks/base/my_transformers.py\", line 361, in forward\n",
+ " output_dict = self.adapter_capsule_mask(hidden_states,t,s)\n",
+ " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n",
+ " result = self.forward(*input, **kwargs)\n",
+ " File \"/content/gdrive/My Drive/PyContinual-main/PyContinual-main/src/networks/base/adapters.py\", line 118, in forward\n",
+ " h=self.activation(self.fc1(h))\n",
+ " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 727, in _call_impl\n",
+ " result = self.forward(*input, **kwargs)\n",
+ " File \"/usr/local/lib/python3.7/site-packages/torch/nn/modules/linear.py\", line 93, in forward\n",
+ " return F.linear(input, self.weight, self.bias)\n",
+ " File \"/usr/local/lib/python3.7/site-packages/torch/nn/functional.py\", line 1692, in linear\n",
+ " output = input.matmul(weight.t())\n",
+ "RuntimeError: CUDA out of memory. Tried to allocate 32.00 MiB (GPU 0; 11.17 GiB total capacity; 10.45 GiB already allocated; 26.81 MiB free; 10.82 GiB reserved in total by PyTorch)\n"
+ ]
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..c02a1af8
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+et-xmlfile==1.1.0
+numpy @ file:///D:/Trabajo/rest-mex_2022_sentiment_data_training/numpy-1.24.2-cp311-cp311-win_amd64.whl
+openpyxl==3.1.2
+pandas @ file:///D:/Trabajo/rest-mex_2022_sentiment_data_training/pandas-1.5.3-cp311-cp311-win_amd64.whl
+python-dateutil==2.8.2
+pytz==2022.7.1
+six==1.16.0
diff --git a/src/_onlyrequirements.txt b/src/_onlyrequirements.txt
new file mode 100644
index 00000000..63108059
--- /dev/null
+++ b/src/_onlyrequirements.txt
@@ -0,0 +1,107 @@
+# This file may be used to create an environment using:
+# $ conda create --name --file
+# platform: linux-64
+#_libgcc_mutex=0.1=main
+#_openmp_mutex=4.5=1_gnu
+blas
+boto3
+botocore
+brotlipy
+bzip2
+ca-certificates
+certifi
+cffi
+charset-normalizer
+click
+cryptography
+cudatoolkit
+ffmpeg
+filelock
+freetype
+giflib
+gmp
+gnutls
+idna
+importlib-metadata
+intel-openmp
+jmespath
+joblib
+jpeg
+lame
+lcms2
+ld_impl_linux-64
+libffi
+libgcc-ng
+libgfortran-ng
+libgfortran4
+libgomp
+libiconv
+libidn2
+libpng
+libstdcxx-ng
+libtasn1
+libtiff
+libunistring
+libuv
+libwebp
+libwebp-base
+lz4-c
+mkl
+mkl-service
+mkl_fft
+mkl_random
+ncurses
+nettle
+ninja
+numpy
+numpy-base
+olefile
+openh264
+openssl
+packaging
+pillow
+pip
+pycparser
+pyopenssl
+pyparsing
+pysocks
+python==3.7.11
+python-dateutil==2.8.2
+pytorch==1.7.0
+pytorch-mutex==1.0
+pyyaml
+quadprog
+readline
+regex
+requests
+s3transfer
+sacremoses
+scikit-learn
+scipy
+setuptools
+six
+sqlite
+threadpoolctl
+tk
+tokenizers
+torchaudio
+torchvision
+tqdm
+transformers
+typing_extensions
+urllib3
+wheel
+xz
+zipp
+zlib
+zstd
+transformers==4.10.2
+zipp==3.6.0
+tokenizers==0.10.3
+pyparsing==2.4.7
+packaging==21.2
+quadprog==0.1.10
+pyyaml==6.0
+filelock==3.3.2
+importlib-metadata==4.8.1
+huggingface-hub==0.1.1
\ No newline at end of file
diff --git a/src/_requirements.txt b/src/_requirements.txt
new file mode 100644
index 00000000..d90f4e52
--- /dev/null
+++ b/src/_requirements.txt
@@ -0,0 +1,99 @@
+# This file may be used to create an environment using:
+# $ conda create --name --file
+# platform: linux-64
+#_libgcc_mutex=0.1=main
+#_openmp_mutex=4.5=1_gnu
+blas=1.0=mkl
+boto3=1.18.21=pyhd3eb1b0_0
+
+botocore=1.21.41=pyhd3eb1b0_1
+brotlipy=0.7.0=py37h27cfd23_1003
+bzip2=1.0.8=h7b6447c_0
+ca-certificates=2021.10.26=h06a4308_2
+certifi=2021.10.8=py37h06a4308_0
+cffi=1.14.6=py37h400218f_0
+charset-normalizer=2.0.4=pyhd3eb1b0_0
+click=8.0.3=pyhd3eb1b0_0
+cryptography=35.0.0=py37hd23ed53_0
+cudatoolkit=11.0.221=h6bb024c_0
+ffmpeg=4.3=hf484d3e_0
+filelock=3.3.2=pypi_0
+freetype=2.11.0=h70c0345_0
+giflib=5.2.1=h7b6447c_0
+gmp=6.2.1=h2531618_2
+gnutls=3.6.15=he1e5248_0
+huggingface-hub=0.1.1=pypi_0
+idna=3.2=pyhd3eb1b0_0
+importlib-metadata=4.8.1=pypi_0
+intel-openmp=2021.4.0=h06a4308_3561
+jmespath=0.10.0=pyhd3eb1b0_0
+joblib=1.1.0=pyhd3eb1b0_0
+jpeg=9d=h7f8727e_0
+lame=3.100=h7b6447c_0
+lcms2=2.12=h3be6417_0
+ld_impl_linux-64=2.35.1=h7274673_9
+libffi=3.3=he6710b0_2
+libgcc-ng=9.3.0=h5101ec6_17
+libgfortran-ng=7.5.0=ha8ba4b0_17
+libgfortran4=7.5.0=ha8ba4b0_17
+libgomp=9.3.0=h5101ec6_17
+libiconv=1.15=h63c8f33_5
+libidn2=2.3.2=h7f8727e_0
+libpng=1.6.37=hbc83047_0
+libstdcxx-ng=9.3.0=hd4cf53a_17
+libtasn1=4.16.0=h27cfd23_0
+libtiff=4.2.0=h85742a9_0
+libunistring=0.9.10=h27cfd23_0
+libuv=1.40.0=h7b6447c_0
+libwebp=1.2.0=h89dd481_0
+libwebp-base=1.2.0=h27cfd23_0
+lz4-c=1.9.3=h295c915_1
+mkl=2021.4.0=h06a4308_640
+mkl-service=2.4.0=py37h7f8727e_0
+mkl_fft=1.3.1=py37hd3c417c_0
+mkl_random=1.2.2=py37h51133e4_0
+ncurses=6.3=heee7806_1
+nettle=3.7.3=hbbd107a_1
+ninja=1.10.2=hff7bd54_1
+numpy=1.21.2=py37h20f2e39_0
+numpy-base=1.21.2=py37h79a1101_0
+olefile=0.46=py37_0
+openh264=2.1.0=hd408876_0
+openssl=1.1.1l=h7f8727e_0
+packaging=21.2=pypi_0
+pillow=8.4.0=py37h5aabda8_0
+pip=21.0.1=py37h06a4308_0
+pycparser=2.20=py_2
+pyopenssl=21.0.0=pyhd3eb1b0_1
+pyparsing=2.4.7=pypi_0
+pysocks=1.7.1=py37_1
+python=3.7.11=h12debd9_0
+python-dateutil=2.8.2=pyhd3eb1b0_0
+pytorch=1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0
+pytorch-mutex=1.0=cuda
+pyyaml=6.0=pypi_0
+quadprog=0.1.10=pypi_0
+readline=8.1=h27cfd23_0
+regex=2021.8.3=py37h7f8727e_0
+requests=2.26.0=pyhd3eb1b0_0
+s3transfer=0.5.0=pyhd3eb1b0_0
+sacremoses=0.0.43=pyhd3eb1b0_0
+scikit-learn=1.0.1=py37h51133e4_0
+scipy=1.7.1=py37h292c36d_2
+setuptools=58.0.4=py37h06a4308_0
+six=1.16.0=pyhd3eb1b0_0
+sqlite=3.36.0=hc218d9a_0
+threadpoolctl=2.2.0=pyh0d69192_0
+tk=8.6.11=h1ccaba5_0
+tokenizers=0.10.3=pypi_0
+torchaudio=0.7.0=py37
+torchvision=0.8.1=py37_cu110
+tqdm=4.62.3=pyhd3eb1b0_1
+transformers=4.10.2=pypi_0
+typing_extensions=3.10.0.2=pyh06a4308_0
+urllib3=1.26.7=pyhd3eb1b0_0
+wheel=0.37.0=pyhd3eb1b0_1
+xz=5.2.5=h7b6447c_0
+zipp=3.6.0=pypi_0
+zlib=1.2.11=h7b6447c_3
+zstd=1.4.9=haebb681_0
diff --git a/src/_testrequirements.txt b/src/_testrequirements.txt
new file mode 100644
index 00000000..7757e80d
--- /dev/null
+++ b/src/_testrequirements.txt
@@ -0,0 +1,97 @@
+# This file may be used to create an environment using:
+# $ conda create --name --file
+# platform: linux-64
+#_libgcc_mutex==0.1
+#_openmp_mutex==4.5
+blas
+boto3
+botocore
+brotlipy
+bzip2
+ca-certificates
+certifi
+cffi
+charset-normalizer
+click
+cryptography
+cudatoolkit
+ffmpeg
+filelock
+freetype
+giflib
+gmp
+gnutls
+idna
+importlib-metadata
+intel-openmp
+jmespath
+joblib
+jpeg
+lame
+lcms2
+ld_impl_linux-64
+libffi
+libgcc-ng
+libgfortran-ng
+libgfortran4
+libgomp
+libiconv
+libidn2
+libpng
+libstdcxx-ng
+libtasn1
+libtiff
+libunistring
+libuv
+libwebp
+libwebp-base
+lz4-c
+mkl
+mkl-service
+mkl_fft
+mkl_random
+ncurses
+nettle
+ninja
+numpy
+numpy-base
+olefile
+openh264
+openssl
+packaging
+pillow
+pip
+pycparser
+pyopenssl
+pyparsing
+pysocks
+python==3.7.11
+python-dateutil==2.8.2
+pytorch=1.7.0=py3.7_cuda11.0.221_cudnn8.0.3_0
+pytorch-mutex=1.0=cuda
+pyyaml
+quadprog
+readline
+regex
+requests
+s3transfer
+sacremoses
+scikit-learn
+scipy
+setuptools
+six
+sqlite
+threadpoolctl
+tk
+tokenizers
+torchaudio
+torchvision
+tqdm
+transformers=4.10.2
+typing_extensions
+urllib3
+wheel
+xz
+zipp
+zlib
+zstd
\ No newline at end of file
diff --git a/src/piprequirements.txt b/src/piprequirements.txt
new file mode 100644
index 00000000..e8897635
--- /dev/null
+++ b/src/piprequirements.txt
@@ -0,0 +1,13 @@
+# This file may be used to create an environment using:
+# $ conda create --name --file
+# platform: linux-64
+transformers==4.10.2
+zipp==3.6.0
+tokenizers==0.10.3
+pyparsing==2.4.7
+packaging==21.2
+quadprog==0.1.10
+pyyaml==6.0
+filelock==3.3.2
+importlib-metadata==4.8.1
+huggingface-hub==0.1.1
\ No newline at end of file
diff --git a/src/serialization/README.MD b/src/serialization/README.MD
new file mode 100644
index 00000000..fe280bf7
--- /dev/null
+++ b/src/serialization/README.MD
@@ -0,0 +1,5 @@
+# INSTRUCCIONES
+1. Copiar los 2 archivos excel en esta carpeta
+2. Tener instalado Python 3.11
+3. Instalar las dependencias necesarias que estan al inicio del repositorio en el archivo requirements.txt
+4. Ejecutar los scripts
diff --git a/src/serialization/classify_to_json.py b/src/serialization/classify_to_json.py
new file mode 100644
index 00000000..fe0c702b
--- /dev/null
+++ b/src/serialization/classify_to_json.py
@@ -0,0 +1,15 @@
+import pandas as pd
+
+"""
+In this algorithm iΒ΄m using python 3.11 cause itΒ΄s 80% faster than previous versions
+"""
+# Reading dataset
+df = pd.read_excel("Rest_Mex_Sentiment_Analysis_2023_Train.xlsx")
+
+# Filter
+with open("classified\\hotel.json", "w", encoding='utf-8') as file:
+ file.write(df[df["Type"] == "Hotel"].to_json(force_ascii=False, orient='index'))
+with open("classified\\restaurant.json", "w", encoding='utf-8') as file:
+ file.write(df[df["Type"] == "Restaurant"].to_json(force_ascii=False, orient='index'))
+with open("classified\\attractive.json", "w", encoding='utf-8') as file:
+ file.write(df[df["Type"] == "Attractive"].to_json(force_ascii=False, orient='index'))
diff --git a/src/serialization/find_diferents.py b/src/serialization/find_diferents.py
new file mode 100644
index 00000000..b8c998a2
--- /dev/null
+++ b/src/serialization/find_diferents.py
@@ -0,0 +1,10 @@
+import pandas as pd
+
+df2022 = pd.read_excel('Track_Train.xlsx')
+df2023 = pd.read_excel('Rest_Mex_Sentiment_Analysis_2023_Train.xlsx')
+
+df = pd.concat([df2023, df2022]).drop_duplicates(keep=False)
+
+df = df[~df.astype(str).apply(lambda x: x.str.contains('Attractive')).any(axis=1)]
+
+df.to_excel('dataset_filtrado.xlsx', index=False)
\ No newline at end of file
diff --git a/src/serialization/find_equals.py b/src/serialization/find_equals.py
new file mode 100644
index 00000000..7964fe8a
--- /dev/null
+++ b/src/serialization/find_equals.py
@@ -0,0 +1,18 @@
+import pandas as pd
+
+# Cargando los datasets
+df2022 = pd.read_excel('Track_Train.xlsx')
+df2023 = pd.read_excel('Rest_Mex_Sentiment_Analysis_2023_Train.xlsx')
+
+# Filtrando hasta quedarse con las filas unicas
+reviews_ds22 = df2022['Review'].unique().tolist()
+reviews_ds23 = df2023['Review'].unique().tolist()
+
+# Esta query permite saber si el contenido de la columna review esta en el dataset
+# con el q se le esta comparando
+rows_only_in_ds22_df = df2023[~df2023['Review'].isin(reviews_ds22)]
+rows_only_in_ds23_df = df2022[~df2022['Review'].isin(reviews_ds23)]
+
+# Exportando los datasets resultantes
+rows_only_in_ds22_df.to_excel('only_2022.xlsx', index=False)
+rows_only_in_ds23_df.to_excel('only_2023.xlsx', index=False)
\ No newline at end of file
diff --git a/src/serialization/find_intersection.py b/src/serialization/find_intersection.py
new file mode 100644
index 00000000..625328c0
--- /dev/null
+++ b/src/serialization/find_intersection.py
@@ -0,0 +1,25 @@
+import pandas as pd
+import json
+
+df2022 = pd.read_excel('Track_Train.xlsx')
+df2023 = pd.read_excel('Rest_Mex_Sentiment_Analysis_2023_Train.xlsx')
+
+# filas2022, columnas2022 = df2022.shape
+# filas2023, columnas2023 = df2023.shape
+
+# print('Dataset de 2022', filas2022, 'filas.')
+# print('Dataset de 2023', filas2023, 'filas.')
+
+
+# Encontrar la intersecciΓ³n
+interseccion1 = pd.merge( df2023,df2022, on='Review')
+interseccion2 = pd.merge( df2022,df2023, on='Review')
+
+# Saving...
+interseccion1.to_excel('interseccion2023-2022.xlsx', index=False)
+interseccion2.to_excel('interseccion2022-2023.xlsx', index=False)
+
+
+
+
+
diff --git a/src/serialization/model_export_json.py b/src/serialization/model_export_json.py
new file mode 100644
index 00000000..fc87fca5
--- /dev/null
+++ b/src/serialization/model_export_json.py
@@ -0,0 +1,15 @@
+import pandas as pd
+
+"""
+In this algorithm iΒ΄m using python 3.11 cause itΒ΄s 80% faster than previous versions
+"""
+# Reading dataset
+df = pd.read_excel("Rest_Mex_Sentiment_Analysis_2023_Train.xlsx")
+
+# parsing to json the df as index form
+payload = df.to_json(force_ascii=False, orient='index')
+
+# Exporting the file as .json
+with open("Train.json", "w", encoding='utf-8') as file:
+ file.write(payload)
+