axinc-ai · ooe1123 · Dec 3, 2022 · Dec 3, 2022 · Dec 4, 2022 · Dec 4, 2022
diff --git a/README.md b/README.md
@@ -279,6 +279,7 @@ The collection of pre-trained, state-of-the-art AI models.
 |[bert_tweets_sentiment](/natural_language_processing/bert_tweets_sentiment) | [huggingface/transformers](https://github.com/huggingface/transformers) | Pytorch | 1.2.5 and later |
 |[gpt2](/natural_language_processing/gpt2) | [GPT-2](https://github.com/onnx/models/blob/master/text/machine_comprehension/gpt-2/README.md) | Pytorch | 1.2.7 and later |
 |[rinna_gpt2](/natural_language_processing/rinna_gpt2) | [japanese-pretrained-models](https://github.com/rinnakk/japanese-pretrained-models)   | Pytorch | 1.2.7 and later |
+|[bert_base_japanese_sentiment](/natural_language_processing/bert_base_japanese_sentiment) | [bert-base-japanese-sentiment](https://huggingface.co/daigo/bert-base-japanese-sentiment) | Pytorch | 1.2.7 and later |
 
 ## Neural Rendering
 

diff --git a/natural_language_processing/bert_base_japanese_sentiment/README.md b/natural_language_processing/bert_base_japanese_sentiment/README.md
@@ -0,0 +1,47 @@
+# Hugging Face - daigo/bert-base-japanese-sentiment
+
+## Input
+
+A SENTENCE.
+
+- Sample
+```
+私は幸せである。
+```
+
+## Output
+
+Recognized emotions
+```
+ポジティブ : 0.9903476238250732
+```
+
+## Usage
+Automatically downloads the onnx and prototxt files on the first run.
+It is necessary to be connected to the Internet while downloading.
+
+For the sample sentence,
+```bash
+$ python3 bert_base_japanese_sentiment.py
+```
+
+If you want to specify the `SENTENCE`, put after the `--input` option.
+```bash
+$ python3 bert_base_japanese_sentiment.py --input SENTENCE
+```
+
+## Reference
+
+[Hugging Face - daigo/bert-base-japanese-sentiment](https://huggingface.co/daigo/bert-base-japanese-sentiment)
+
+## Framework
+
+Pytorch
+
+## Model Format
+
+ONNX opset=11
+
+## Netron
+
+[model.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/bert_base_japanese_sentiment/model.onnx.prototxt)
diff --git a/natural_language_processing/bert_base_japanese_sentiment/bert_base_japanese_sentiment.py b/natural_language_processing/bert_base_japanese_sentiment/bert_base_japanese_sentiment.py
@@ -0,0 +1,161 @@
+import sys
+import time
+
+import numpy as np
+import cv2
+from PIL import Image
+
+from transformers import AutoTokenizer
+
+import ailia
+
+# import original modules
+sys.path.append('../../util')
+from utils import get_base_parser, update_parser, get_savepath  # noqa
+from model_utils import check_and_download_models  # noqa
+from math_utils import softmax
+# logger
+from logging import getLogger  # noqa
+
+logger = getLogger(__name__)
+
+# ======================
+# Parameters
+# ======================
+
+WEIGHT_PATH = "model.onnx"
+MODEL_PATH = "model.onnx.prototxt"
+REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/bert_base_japanese_sentiment/'
+
+DEFAULT_TEXT = "私は幸せである。"
+
+# ======================
+# Arguemnt Parser Config
+# ======================
+
+parser = get_base_parser(
+    'bert-base-japanese-sentiment', None, None
+)
+parser.add_argument(
+    '--input', '-i', default=DEFAULT_TEXT
+)
+parser.add_argument(
+    '--onnx',
+    action='store_true',
+    help='execute onnxruntime version.'
+)
+args = update_parser(parser, check_input_type=False)
+
+
+# ======================
+# Main functions
+# ======================
+
+def preprocess(tokenizer, sequence):
+    encoded_sequence = tokenizer.encode(sequence)
+    encoded_sequence = np.array(encoded_sequence)
+
+    encoded_sequence = np.expand_dims(encoded_sequence, axis=0)
+    encoded_sequence = encoded_sequence.astype(np.int64)
+
+    return encoded_sequence
+
+
+def post_processing(classifier, output):
+    output = classifier["weight"] @ output
+    logits = output + classifier["bias"]
+
+    scores = softmax(logits)
+
+    return scores
+
+
+def predict(model_info, sequence):
+    tokenizer = model_info["tokenizer"]
+    net = model_info["model"]
+    classifier = model_info["classifier"]
+
+    input_ids = preprocess(tokenizer, sequence)
+    attention_mask = np.ones(input_ids.shape, dtype=np.int64)
+    token_type_ids = np.zeros(input_ids.shape, dtype=np.int64)
+
+    # feedforward
+    if not args.onnx:
+        output = net.predict([input_ids, attention_mask, token_type_ids])
+    else:
+        output = net.run(None, {
+            'input_ids': input_ids, 'attention_mask': attention_mask, 'token_type_ids': token_type_ids
+        })
+
+    last_hidden_state, pooled_output = output
+
+    scores = post_processing(classifier, pooled_output[0])
+
+    return scores
+
+
+def recognize_from_text(model_info):
+    # prepare input data
+    sequence = args.input
+
+    logger.info("Input : " + sequence)
+
+    # inference
+    logger.info('Start inference...')
+    if args.benchmark:
+        logger.info('BENCHMARK mode')
+        total_time_estimation = 0
+        for i in range(args.benchmark_count):
+            start = int(round(time.time() * 1000))
+            scores = predict(model_info, sequence)
+            end = int(round(time.time() * 1000))
+            estimation_time = (end - start)
+
+            # Logging
+            logger.info(f'\tailia processing estimation time {estimation_time} ms')
+            if i != 0:
+                total_time_estimation = total_time_estimation + estimation_time
+
+        logger.info(f'\taverage time estimation {total_time_estimation / (args.benchmark_count - 1)} ms')
+    else:
+        scores = predict(model_info, sequence)
+
+    id2label = {
+        0: 'ポジティブ',
+        1: 'ネガティブ'
+    }
+    i = np.argmax(scores)
+    label = id2label[i]
+    score = scores[i]
+
+    logger.info(f'{label} : {score}')
+
+    logger.info('Script finished successfully.')
+
+
+def main():
+    # model files check and download
+    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)
+
+    # initialize
+    tokenizer = AutoTokenizer.from_pretrained("tokenizer")
+
+    if not args.onnx:
+        net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)
+    else:
+        import onnxruntime
+        net = onnxruntime.InferenceSession(WEIGHT_PATH)
+
+    classifier = np.load("classifier.npy", allow_pickle=True).item()
+
+    model_info = {
+        "tokenizer": tokenizer,
+        "model": net,
+        "classifier": classifier,
+    }
+
+    recognize_from_text(model_info)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/natural_language_processing/bert_base_japanese_sentiment/classifier.npy b/natural_language_processing/bert_base_japanese_sentiment/classifier.npy
diff --git a/natural_language_processing/bert_base_japanese_sentiment/tokenizer/special_tokens_map.json b/natural_language_processing/bert_base_japanese_sentiment/tokenizer/special_tokens_map.json
@@ -0,0 +1,7 @@
+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}
diff --git a/natural_language_processing/bert_base_japanese_sentiment/tokenizer/tokenizer_config.json b/natural_language_processing/bert_base_japanese_sentiment/tokenizer/tokenizer_config.json
@@ -0,0 +1,20 @@
+{
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "do_subword_tokenize": true,
+  "do_word_tokenize": true,
+  "jumanpp_kwargs": null,
+  "mask_token": "[MASK]",
+  "max_len": 512,
+  "mecab_kwargs": null,
+  "name_or_path": "daigo/bert-base-japanese-sentiment",
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "special_tokens_map_file": "/home/ooe/.cache/huggingface/hub/models--daigo--bert-base-japanese-sentiment/snapshots/51ac2d2c0a5645d77ca26078fc5f02c349fbb93d/special_tokens_map.json",
+  "subword_tokenizer_type": "wordpiece",
+  "sudachi_kwargs": null,
+  "tokenizer_class": "BertJapaneseTokenizer",
+  "unk_token": "[UNK]",
+  "word_tokenizer_type": "mecab"
+}