|
7 | 7 | "outputs": [],
|
8 | 8 | "source": [
|
9 | 9 | "# Transformers installation\n",
|
10 |
| - "! pip install transformers\n", |
| 10 | + "! pip install transformers datasets\n", |
11 | 11 | "# To install from source instead of the last release, comment the command above and uncomment the following one.\n",
|
12 | 12 | "# ! pip install git+https://github.com/huggingface/transformers.git\n"
|
13 | 13 | ]
|
|
105 | 105 | ],
|
106 | 106 | "source": [
|
107 | 107 | "from transformers import pipeline\n",
|
108 |
| - "nlp = pipeline(\"sentiment-analysis\")\n", |
109 |
| - "result = nlp(\"I hate you\")[0]\n", |
| 108 | + "classifier = pipeline(\"sentiment-analysis\")\n", |
| 109 | + "result = classifier(\"I hate you\")[0]\n", |
110 | 110 | "print(f\"label: {result['label']}, with score: {round(result['score'], 4)}\")\n",
|
111 |
| - "result = nlp(\"I love you\")[0]\n", |
| 111 | + "result = classifier(\"I love you\")[0]\n", |
112 | 112 | "print(f\"label: {result['label']}, with score: {round(result['score'], 4)}\")"
|
113 | 113 | ]
|
114 | 114 | },
|
|
201 | 201 | "outputs": [],
|
202 | 202 | "source": [
|
203 | 203 | "from transformers import pipeline\n",
|
204 |
| - "nlp = pipeline(\"question-answering\")\n", |
| 204 | + "question_answerer = pipeline(\"question-answering\")\n", |
205 | 205 | "context = r\"\"\"\n",
|
206 | 206 | "Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a\n",
|
207 | 207 | "question answering dataset is the SQuAD dataset, which is entirely based on that task. If you would like to fine-tune\n",
|
|
235 | 235 | }
|
236 | 236 | ],
|
237 | 237 | "source": [
|
238 |
| - "result = nlp(question=\"What is extractive question answering?\", context=context)\n", |
| 238 | + "result = question_answerer(question=\"What is extractive question answering?\", context=context)\n", |
239 | 239 | "print(f\"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}\")\n",
|
240 |
| - "result = nlp(question=\"What is a good example of a question answering dataset?\", context=context)\n", |
| 240 | + "result = question_answerer(question=\"What is a good example of a question answering dataset?\", context=context)\n", |
241 | 241 | "print(f\"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}\")"
|
242 | 242 | ]
|
243 | 243 | },
|
|
362 | 362 | "outputs": [],
|
363 | 363 | "source": [
|
364 | 364 | "from transformers import pipeline\n",
|
365 |
| - "nlp = pipeline(\"fill-mask\")" |
| 365 | + "unmasker = pipeline(\"fill-mask\")" |
366 | 366 | ]
|
367 | 367 | },
|
368 | 368 | {
|
|
414 | 414 | ],
|
415 | 415 | "source": [
|
416 | 416 | "from pprint import pprint\n",
|
417 |
| - "pprint(nlp(f\"HuggingFace is creating a {nlp.tokenizer.mask_token} that the community uses to solve NLP tasks.\"))" |
| 417 | + "pprint(unmasker(f\"HuggingFace is creating a {unmasker.tokenizer.mask_token} that the community uses to solve NLP tasks.\"))" |
418 | 418 | ]
|
419 | 419 | },
|
420 | 420 | {
|
|
516 | 516 | "source": [
|
517 | 517 | "from transformers import AutoModelWithLMHead, AutoTokenizer, top_k_top_p_filtering\n",
|
518 | 518 | "import torch\n",
|
519 |
| - "from torch.nn import functional as F\n", |
| 519 | + "from torch import nn\n", |
520 | 520 | "tokenizer = AutoTokenizer.from_pretrained(\"gpt2\")\n",
|
521 | 521 | "model = AutoModelWithLMHead.from_pretrained(\"gpt2\")\n",
|
522 | 522 | "sequence = f\"Hugging Face is based in DUMBO, New York City, and\"\n",
|
|
526 | 526 | "# filter\n",
|
527 | 527 | "filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=50, top_p=1.0)\n",
|
528 | 528 | "# sample\n",
|
529 |
| - "probs = F.softmax(filtered_next_token_logits, dim=-1)\n", |
| 529 | + "probs = nn.functional.softmax(filtered_next_token_logits, dim=-1)\n", |
530 | 530 | "next_token = torch.multinomial(probs, num_samples=1)\n",
|
531 | 531 | "generated = torch.cat([input_ids, next_token], dim=-1)\n",
|
532 | 532 | "resulting_string = tokenizer.decode(generated.tolist()[0])"
|
|
563 | 563 | "cell_type": "markdown",
|
564 | 564 | "metadata": {},
|
565 | 565 | "source": [
|
566 |
| - "In the next section, we show how `PreTrainedModel.generate` can be used to generate multiple tokens\n", |
567 |
| - "up to a specified length instead of one token at a time." |
| 566 | + "In the next section, we show how `GenerationMixin.generate` can be used to\n", |
| 567 | + "generate multiple tokens up to a specified length instead of one token at a time." |
568 | 568 | ]
|
569 | 569 | },
|
570 | 570 | {
|
|
717 | 717 | "outputs": [],
|
718 | 718 | "source": [
|
719 | 719 | "from transformers import pipeline\n",
|
720 |
| - "nlp = pipeline(\"ner\")\n", |
721 |
| - "sequence = \"\"\"Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO, \n", |
| 720 | + "ner_pipe = pipeline(\"ner\")\n", |
| 721 | + "sequence = \"\"\"Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO,\n", |
722 | 722 | "therefore very close to the Manhattan Bridge which is visible from the window.\"\"\""
|
723 | 723 | ]
|
724 | 724 | },
|
|
760 | 760 | }
|
761 | 761 | ],
|
762 | 762 | "source": [
|
763 |
| - "print(nlp(sequence))" |
| 763 | + "print(ner_pipe(sequence))" |
764 | 764 | ]
|
765 | 765 | },
|
766 | 766 | {
|
|
982 | 982 | "metadata": {},
|
983 | 983 | "outputs": [],
|
984 | 984 | "source": [
|
985 |
| - "from transformers import AutoModelWithLMHead, AutoTokenizer\n", |
986 |
| - "model = AutoModelWithLMHead.from_pretrained(\"t5-base\")\n", |
| 985 | + "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n", |
| 986 | + "model = AutoModelForSeq2SeqLM.from_pretrained(\"t5-base\")\n", |
987 | 987 | "tokenizer = AutoTokenizer.from_pretrained(\"t5-base\")\n",
|
988 | 988 | "# T5 uses a max_length of 512 so we cut the article to 512 tokens.\n",
|
989 |
| - "inputs = tokenizer.encode(\"summarize: \" + ARTICLE, return_tensors=\"pt\", max_length=512)\n", |
| 989 | + "inputs = tokenizer.encode(\"summarize: \" + ARTICLE, return_tensors=\"pt\", max_length=512, truncation=True)\n", |
990 | 990 | "outputs = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)"
|
991 | 991 | ]
|
992 | 992 | },
|
|
0 commit comments