Skip to content

Commit 5886534

Browse files
committed
Add datasets dependency to transformers doc notebooks
1 parent 3a3918a commit 5886534

28 files changed

+572
-144
lines changed

transformers_doc/benchmarks.ipynb

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]
@@ -486,7 +486,9 @@
486486
"available [here](https://docs.google.com/spreadsheets/d/1sryqufw2D0XlUH4sq3e9Wnxu5EAQkaohzrJbd5HdQ_w/edit?usp=sharing).\n",
487487
"\n",
488488
"With the new *benchmark* tools, it is easier than ever to share your benchmark results with the community\n",
489-
":prefix_link:*here <examples/benchmarking/README.md>*."
489+
"\n",
490+
"- :prefix_link:*PyTorch Benchmarking Results<examples/pytorch/benchmarking/README.md>*.\n",
491+
"- :prefix_link:*TensorFlow Benchmarking Results<examples/tensorflow/benchmarking/README.md>*."
490492
]
491493
}
492494
],

transformers_doc/custom_datasets.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]

transformers_doc/multilingual.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]

transformers_doc/perplexity.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]

transformers_doc/preprocessing.ipynb

+19-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]
@@ -67,6 +67,15 @@
6767
"## Base use"
6868
]
6969
},
70+
{
71+
"cell_type": "markdown",
72+
"metadata": {},
73+
"source": [
74+
"> **RAW HTML:** <iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/Yffk5aydLzg\" title=\"YouTube video player\"\n",
75+
"> frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope;\n",
76+
"> picture-in-picture\" allowfullscreen></iframe>"
77+
]
78+
},
7079
{
7180
"cell_type": "markdown",
7281
"metadata": {},
@@ -276,6 +285,15 @@
276285
"## Preprocessing pairs of sentences"
277286
]
278287
},
288+
{
289+
"cell_type": "markdown",
290+
"metadata": {},
291+
"source": [
292+
"> **RAW HTML:** <iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/0u3ioSwev3s\" title=\"YouTube video player\"\n",
293+
"> frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope;\n",
294+
"> picture-in-picture\" allowfullscreen></iframe>"
295+
]
296+
},
279297
{
280298
"cell_type": "markdown",
281299
"metadata": {},

transformers_doc/pytorch/benchmarks.ipynb

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]
@@ -317,7 +317,9 @@
317317
"available [here](https://docs.google.com/spreadsheets/d/1sryqufw2D0XlUH4sq3e9Wnxu5EAQkaohzrJbd5HdQ_w/edit?usp=sharing).\n",
318318
"\n",
319319
"With the new *benchmark* tools, it is easier than ever to share your benchmark results with the community\n",
320-
":prefix_link:*here <examples/benchmarking/README.md>*."
320+
"\n",
321+
"- :prefix_link:*PyTorch Benchmarking Results<examples/pytorch/benchmarking/README.md>*.\n",
322+
"- :prefix_link:*TensorFlow Benchmarking Results<examples/tensorflow/benchmarking/README.md>*."
321323
]
322324
}
323325
],

transformers_doc/pytorch/custom_datasets.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]

transformers_doc/pytorch/multilingual.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]

transformers_doc/pytorch/perplexity.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]

transformers_doc/pytorch/preprocessing.ipynb

+19-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]
@@ -67,6 +67,15 @@
6767
"## Base use"
6868
]
6969
},
70+
{
71+
"cell_type": "markdown",
72+
"metadata": {},
73+
"source": [
74+
"> **RAW HTML:** <iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/Yffk5aydLzg\" title=\"YouTube video player\"\n",
75+
"> frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope;\n",
76+
"> picture-in-picture\" allowfullscreen></iframe>"
77+
]
78+
},
7079
{
7180
"cell_type": "markdown",
7281
"metadata": {},
@@ -245,6 +254,15 @@
245254
"## Preprocessing pairs of sentences"
246255
]
247256
},
257+
{
258+
"cell_type": "markdown",
259+
"metadata": {},
260+
"source": [
261+
"> **RAW HTML:** <iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/0u3ioSwev3s\" title=\"YouTube video player\"\n",
262+
"> frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope;\n",
263+
"> picture-in-picture\" allowfullscreen></iframe>"
264+
]
265+
},
248266
{
249267
"cell_type": "markdown",
250268
"metadata": {},

transformers_doc/pytorch/quicktour.ipynb

+37-7
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]
@@ -50,8 +50,23 @@
5050
"cell_type": "markdown",
5151
"metadata": {},
5252
"source": [
53-
"The easiest way to use a pretrained model on a given task is to use `pipeline`. 🤗 Transformers\n",
54-
"provides the following tasks out of the box:\n",
53+
"The easiest way to use a pretrained model on a given task is to use `pipeline`."
54+
]
55+
},
56+
{
57+
"cell_type": "markdown",
58+
"metadata": {},
59+
"source": [
60+
"> **RAW HTML:** <iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/tiZFewofSLM\" title=\"YouTube video player\"\n",
61+
"> frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope;\n",
62+
"> picture-in-picture\" allowfullscreen></iframe>"
63+
]
64+
},
65+
{
66+
"cell_type": "markdown",
67+
"metadata": {},
68+
"source": [
69+
"🤗 Transformers provides the following tasks out of the box:\n",
5570
"\n",
5671
"- Sentiment analysis: is a text positive or negative?\n",
5772
"- Text generation (in English): provide a prompt and the model will generate what follows.\n",
@@ -241,8 +256,23 @@
241256
"cell_type": "markdown",
242257
"metadata": {},
243258
"source": [
244-
"Let's now see what happens beneath the hood when using those pipelines. As we saw, the model and tokenizer are created\n",
245-
"using the `from_pretrained` method:"
259+
"Let's now see what happens beneath the hood when using those pipelines."
260+
]
261+
},
262+
{
263+
"cell_type": "markdown",
264+
"metadata": {},
265+
"source": [
266+
"> **RAW HTML:** <iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/AhChOFRegn4\" title=\"YouTube video player\"\n",
267+
"> frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope;\n",
268+
"> picture-in-picture\" allowfullscreen></iframe>"
269+
]
270+
},
271+
{
272+
"cell_type": "markdown",
273+
"metadata": {},
274+
"source": [
275+
"As we saw, the model and tokenizer are created using the `from_pretrained` method:"
246276
]
247277
},
248278
{
@@ -463,8 +493,8 @@
463493
"metadata": {},
464494
"outputs": [],
465495
"source": [
466-
"import torch.nn.functional as F\n",
467-
"pt_predictions = F.softmax(pt_outputs.logits, dim=-1)"
496+
"from torch import nn\n",
497+
"pt_predictions = nn.functional.softmax(pt_outputs.logits, dim=-1)"
468498
]
469499
},
470500
{

transformers_doc/pytorch/task_summary.ipynb

+19-19
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"outputs": [],
88
"source": [
99
"# Transformers installation\n",
10-
"! pip install transformers\n",
10+
"! pip install transformers datasets\n",
1111
"# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
1212
"# ! pip install git+https://github.com/huggingface/transformers.git\n"
1313
]
@@ -105,10 +105,10 @@
105105
],
106106
"source": [
107107
"from transformers import pipeline\n",
108-
"nlp = pipeline(\"sentiment-analysis\")\n",
109-
"result = nlp(\"I hate you\")[0]\n",
108+
"classifier = pipeline(\"sentiment-analysis\")\n",
109+
"result = classifier(\"I hate you\")[0]\n",
110110
"print(f\"label: {result['label']}, with score: {round(result['score'], 4)}\")\n",
111-
"result = nlp(\"I love you\")[0]\n",
111+
"result = classifier(\"I love you\")[0]\n",
112112
"print(f\"label: {result['label']}, with score: {round(result['score'], 4)}\")"
113113
]
114114
},
@@ -201,7 +201,7 @@
201201
"outputs": [],
202202
"source": [
203203
"from transformers import pipeline\n",
204-
"nlp = pipeline(\"question-answering\")\n",
204+
"question_answerer = pipeline(\"question-answering\")\n",
205205
"context = r\"\"\"\n",
206206
"Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a\n",
207207
"question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune\n",
@@ -235,9 +235,9 @@
235235
}
236236
],
237237
"source": [
238-
"result = nlp(question=\"What is extractive question answering?\", context=context)\n",
238+
"result = question_answerer(question=\"What is extractive question answering?\", context=context)\n",
239239
"print(f\"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}\")\n",
240-
"result = nlp(question=\"What is a good example of a question answering dataset?\", context=context)\n",
240+
"result = question_answerer(question=\"What is a good example of a question answering dataset?\", context=context)\n",
241241
"print(f\"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}\")"
242242
]
243243
},
@@ -362,7 +362,7 @@
362362
"outputs": [],
363363
"source": [
364364
"from transformers import pipeline\n",
365-
"nlp = pipeline(\"fill-mask\")"
365+
"unmasker = pipeline(\"fill-mask\")"
366366
]
367367
},
368368
{
@@ -414,7 +414,7 @@
414414
],
415415
"source": [
416416
"from pprint import pprint\n",
417-
"pprint(nlp(f\"HuggingFace is creating a {nlp.tokenizer.mask_token} that the community uses to solve NLP tasks.\"))"
417+
"pprint(unmasker(f\"HuggingFace is creating a {unmasker.tokenizer.mask_token} that the community uses to solve NLP tasks.\"))"
418418
]
419419
},
420420
{
@@ -516,7 +516,7 @@
516516
"source": [
517517
"from transformers import AutoModelWithLMHead, AutoTokenizer, top_k_top_p_filtering\n",
518518
"import torch\n",
519-
"from torch.nn import functional as F\n",
519+
"from torch import nn\n",
520520
"tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n",
521521
"model = AutoModelWithLMHead.from_pretrained(\"gpt2\")\n",
522522
"sequence = f\"Hugging Face is based in DUMBO, New York City, and\"\n",
@@ -526,7 +526,7 @@
526526
"# filter\n",
527527
"filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=50, top_p=1.0)\n",
528528
"# sample\n",
529-
"probs = F.softmax(filtered_next_token_logits, dim=-1)\n",
529+
"probs = nn.functional.softmax(filtered_next_token_logits, dim=-1)\n",
530530
"next_token = torch.multinomial(probs, num_samples=1)\n",
531531
"generated = torch.cat([input_ids, next_token], dim=-1)\n",
532532
"resulting_string = tokenizer.decode(generated.tolist()[0])"
@@ -563,8 +563,8 @@
563563
"cell_type": "markdown",
564564
"metadata": {},
565565
"source": [
566-
"In the next section, we show how `PreTrainedModel.generate` can be used to generate multiple tokens\n",
567-
"up to a specified length instead of one token at a time."
566+
"In the next section, we show how `GenerationMixin.generate` can be used to\n",
567+
"generate multiple tokens up to a specified length instead of one token at a time."
568568
]
569569
},
570570
{
@@ -717,8 +717,8 @@
717717
"outputs": [],
718718
"source": [
719719
"from transformers import pipeline\n",
720-
"nlp = pipeline(\"ner\")\n",
721-
"sequence = \"\"\"Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO, \n",
720+
"ner_pipe = pipeline(\"ner\")\n",
721+
"sequence = \"\"\"Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO,\n",
722722
"therefore very close to the Manhattan Bridge which is visible from the window.\"\"\""
723723
]
724724
},
@@ -760,7 +760,7 @@
760760
}
761761
],
762762
"source": [
763-
"print(nlp(sequence))"
763+
"print(ner_pipe(sequence))"
764764
]
765765
},
766766
{
@@ -982,11 +982,11 @@
982982
"metadata": {},
983983
"outputs": [],
984984
"source": [
985-
"from transformers import AutoModelWithLMHead, AutoTokenizer\n",
986-
"model = AutoModelWithLMHead.from_pretrained(\"t5-base\")\n",
985+
"from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
986+
"model = AutoModelForSeq2SeqLM.from_pretrained(\"t5-base\")\n",
987987
"tokenizer = AutoTokenizer.from_pretrained(\"t5-base\")\n",
988988
"# T5 uses a max_length of 512 so we cut the article to 512 tokens.\n",
989-
"inputs = tokenizer.encode(\"summarize: \" + ARTICLE, return_tensors=\"pt\", max_length=512)\n",
989+
"inputs = tokenizer.encode(\"summarize: \" + ARTICLE, return_tensors=\"pt\", max_length=512, truncation=True)\n",
990990
"outputs = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)"
991991
]
992992
},

0 commit comments

Comments
 (0)