Small fixes

sgugger · sgugger · commit 69ac1182df9f · 2021-07-28T09:13:51.000-04:00
diff --git a/examples/question_answering.ipynb b/examples/question_answering.ipynb
@@ -959,6 +959,11 @@
    "outputs": [],
    "source": [
     "def prepare_train_features(examples):\n",
+    "    # Some of the questions have lots of whitespace on the left, which is not useful and will make the\n",
+    "    # truncation of the context fail (the tokenized question will take a lots of space). So we remove that\n",
+    "    # left whitespace\n",
+    "    examples[\"question\"] = [q.lstrip() for q in examples[\"question\"]]\n",
+    "\n",
     "    # Tokenize our examples with truncation and padding, but keep the overflows using a stride. This results\n",
     "    # in one example possible giving several features when a context is long, each of those features having a\n",
     "    # context that overlaps a bit the context of the previous feature.\n",
diff --git a/examples/text_classification.ipynb b/examples/text_classification.ipynb
@@ -977,6 +977,7 @@
     "args = TrainingArguments(\n",
     "    \"test-glue\",\n",
     "    evaluation_strategy = \"epoch\",\n",
+    "    save_strategy = \"epoch\",\n",
     "    learning_rate=2e-5,\n",
     "    per_device_train_batch_size=batch_size,\n",
     "    per_device_eval_batch_size=batch_size,\n",