Fix docs and notebooks linting, add new makefile target (#403)

etcylfleet · matthew.fleetwood · web-flow · commit cd993413eced · 2023-08-24T15:42:34.000-07:00
* Fix docs and notebooks linting, add new makefile target for make lint to docs/ and notebooks/

* Update conf.py

* Update model_utils.py

* Update model_util.py

* Update TLT_TF_Image_Classification_Transfer_Learning.ipynb

* Update TLT_TF_Image_Classification_Transfer_Learning.ipynb

* Update TLT_TF_Image_Classification_Transfer_Learning.ipynb

* Update dataset_utils.py

* Update model_utils.py

* Update inc_utils.py

* Update plot_utils.py

* Update bert_utils.py

* Update bert_utils.py

* Update llm_utils.py

* Update model_utils.py

* Update data_utils.py

* Update Makefile

* Update BERT_Question_Answering.ipynb

* Update BERT_Question_Answering.ipynb

* Remove unused imports

* Move imports to after license

* Move imports to after sys append

* Fix incorrect formatting

* Move imports to top of file

* Fix blank lines

* Move imports to after license

---------

Co-authored-by: matthew.fleetwood &lt;mfleetwo@mlp-prod-clx-5831.ra.intel.com&gt;
diff --git a/Makefile b/Makefile
@@ -57,7 +57,10 @@ integration: tlt_test_venv
 
 lint: tlt_test_venv
 	@echo "Style checks..."
-	@. $(ACTIVATE_TEST_VENV) && flake8 tlt tests downloader
+	@. $(ACTIVATE_TEST_VENV) && \
+        	flake8 tlt tests downloader && \
+		flake8 notebooks && \
+		flake8 docs
 
 clean:
 	rm -rf tlt_test_venv
diff --git a/docs/conf.py b/docs/conf.py
@@ -30,14 +30,12 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
+from datetime import datetime
 import os
+import sphinx_rtd_theme
 import sys
-import shutil
-import glob
 sys.path.insert(0, os.path.abspath('../..'))
 sys.setrecursionlimit(1500)
-import sphinx_rtd_theme
-from datetime import datetime
 
 # -- Project information -----------------------------------------------------
 
@@ -91,9 +89,11 @@
 html_static_path = ['_static']
 templates_path = ['_templates']
 
+
 def setup(app):
-   app.add_css_file("tlt-custom.css")
-   app.add_js_file("tlt-custom.js")
+    app.add_css_file("tlt-custom.css")
+    app.add_js_file("tlt-custom.js")
+
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@@ -108,4 +108,4 @@ def setup(app):
 suppress_warnings = ["myst.xref_missing", "myst.header"]
 
 # ask the myst parser to process <img> tags so Sphinx can handle the properly
-myst_enable_extensions = [ "html_image" ]
+myst_enable_extensions = ["html_image"]
diff --git a/notebooks/image_classification/pytorch_image_classification/model_utils.py b/notebooks/image_classification/pytorch_image_classification/model_utils.py
@@ -17,7 +17,6 @@
 #
 
 import torch
-import torchvision
 from pydoc import locate
 
 
@@ -55,11 +54,12 @@
     }
 }
 
+
 def get_retrainable_model(model_name, num_classes, do_fine_tuning=False):
     # Load an image classification model pretrained on ImageNet
     pretrained_model_class = locate('torchvision.models.{}'.format(model_name))
     classifier_layer = torchvision_model_map[model_name]['classifier']
-                                                         
+
     model = pretrained_model_class(pretrained=True)
 
     if not do_fine_tuning:
@@ -74,6 +74,5 @@ def get_retrainable_model(model_name, num_classes, do_fine_tuning=False):
         classifier = getattr(model, classifier_layer)
         num_features = classifier.in_features
         setattr(model, classifier_layer, torch.nn.Linear(num_features, num_classes))
-        
-    return model
 
+    return model
diff --git a/notebooks/image_classification/tf_image_classification/model_util.py b/notebooks/image_classification/tf_image_classification/model_util.py
@@ -69,4 +69,3 @@
         "image_size": 331
     }
 }
-
diff --git a/notebooks/object_detection/pytorch_object_detection/dataset_utils.py b/notebooks/object_detection/pytorch_object_detection/dataset_utils.py
@@ -18,12 +18,11 @@
 
 import os
 import csv
-import glob
 import numpy as np
 import torch
 import torchvision
 from PIL import Image
-from typing import Any, Callable, List, Optional, Tuple
+from typing import Any, Tuple
 
 
 COCO_LABELS = [
@@ -45,6 +44,7 @@
     '__background__', 'Person', 'Vehicle'
 ]
 
+
 class PennFudanDataset(torch.utils.data.Dataset):
     def __init__(self, root, transforms):
         self.root = root
@@ -103,12 +103,13 @@ def __getitem__(self, idx):
         if self.transforms is not None:
             img = self.transforms(img)
             # target needs augmentations
-            
+
         return img, target
 
     def __len__(self):
         return len(self.imgs)
 
+
 class Kitti(torchvision.datasets.Kitti):
     def _parse_target(self, index: int):
         labels = []
@@ -118,12 +119,12 @@ def _parse_target(self, index: int):
             for line in content:
                 if line[0] in ['Pedestrian', 'Person_sitting', 'Cyclist']:
                     boxes.append([float(x) for x in line[4:8]])
-                    labels.append(1) # Re-label Pedestrian, Person_sitting, Cyclist to Person=1
+                    labels.append(1)  # Re-label Pedestrian, Person_sitting, Cyclist to Person=1
                 if line[0] in ['Car', 'Truck', 'Van', 'Tram']:
                     boxes.append([float(x) for x in line[4:8]])
-                    labels.append(2) # Re-label Car, Truck, Van, Tram to Vehicle=2
-        
-        boxes = torch.FloatTensor(boxes)  
+                    labels.append(2)  # Re-label Car, Truck, Van, Tram to Vehicle=2
+
+        boxes = torch.FloatTensor(boxes)
         target = {}
         target['image_id'] = torch.tensor([index])
         target['boxes'] = boxes
@@ -133,7 +134,6 @@ def _parse_target(self, index: int):
 
         return target
 
-    
     def __getitem__(self, index: int) -> Tuple[Any, Any]:
         """Get item at a given index.
 
diff --git a/notebooks/object_detection/pytorch_object_detection/model_utils.py b/notebooks/object_detection/pytorch_object_detection/model_utils.py
@@ -18,8 +18,6 @@
 
 import math
 import torch
-import torchvision
-import torchvision.models.detection as detection
 
 
 # Dictionary of Torchvision object detection models
@@ -38,6 +36,7 @@
     }
 }
 
+
 def get_retrainable_model(model_name, num_classes, pretrained_model_class, predictor_class):
     # Load an object detection model pre-trained on COCO
     model = pretrained_model_class(pretrained=True)
@@ -51,10 +50,9 @@ def get_retrainable_model(model_name, num_classes, pretrained_model_class, predi
         in_features = model.head.classification_head.conv[0].in_channels
         num_anchors = model.head.classification_head.num_anchors
         model.head.classification_head.num_classes = num_classes
-        cls_logits = torch.nn.Conv2d(in_features, num_anchors * num_classes, kernel_size = 3, stride=1, padding=1)
+        cls_logits = torch.nn.Conv2d(in_features, num_anchors * num_classes, kernel_size=3, stride=1, padding=1)
         torch.nn.init.normal_(cls_logits.weight, std=0.01)
         torch.nn.init.constant_(cls_logits.bias, -math.log((1 - 0.01) / 0.01))
         model.head.classification_head.cls_logits = cls_logits
 
     return model
-
diff --git a/notebooks/performance/utils/inc_utils.py b/notebooks/performance/utils/inc_utils.py
@@ -23,7 +23,7 @@ def performance(saved_model_dir, batch_size, image_size, dataset_dir, framework,
                 cores_per_instance=None, num_of_instance=None, inter_num_of_threads=None, intra_num_of_threads=None):
     """
     Uses the Intel Neural Compressor to get performance metrics for the specified model.
-    
+
     :param saved_model_dir: Model to load
     :param batch_size: Batch size
     :param image_size: Image input size
@@ -66,7 +66,7 @@ def performance(saved_model_dir, batch_size, image_size, dataset_dir, framework,
 def calculate_latency_and_throughput(results):
     """
     Parses the results from the benchmarking function and returns the latency (ms) and throughput (samples/sec)
-    
+
     :param results: Return value from calling the performance util function
     :param batch_size: batch size
     :return: latency (ms) and throughput (images/sec)
diff --git a/notebooks/question_answering/tfhub_question_answering/BERT_Question_Answering.ipynb b/notebooks/question_answering/tfhub_question_answering/BERT_Question_Answering.ipynb
@@ -120,8 +120,7 @@
     "\n",
     "from bert_qa_utils import create_mini_dataset_file, \\\n",
     "                          display_predictions, \\\n",
-    "                          get_config_and_vocab_from_zip, \\\n",
-    "                          predict_squad_customized\n",
+    "                          get_config_and_vocab_from_zip \n",
     "\n",
     "# Extract the vocab.txt and bert_config.json from the checkpoint zip file\n",
     "vocab_txt, bert_config = get_config_and_vocab_from_zip(checkpoint_url, bert_dir)\n",
diff --git a/notebooks/question_answering/tfhub_question_answering/bert_qa_utils.py b/notebooks/question_answering/tfhub_question_answering/bert_qa_utils.py
@@ -21,14 +21,11 @@
 import os
 import pandas as pd
 import sys
-import tensorflow as tf
+from tlt.utils.file_utils import download_file
+from zipfile import ZipFile
 
 sys.path.append(os.environ["TF_MODELS_DIR"])
 
-from official.common import distribute_utils
-from official.legacy.bert.run_squad_helper import get_dataset_fn
-from tlt.utils.file_utils import download_file
-from zipfile import ZipFile
 
 def create_mini_dataset_file(original_file, output_file, num_dataset_items, overwrite=False):
     """
@@ -41,37 +38,37 @@ def create_mini_dataset_file(original_file, output_file, num_dataset_items, over
     """
     if not os.path.exists(output_file) or overwrite:
         import random
-        
+
         with open(original_file) as f:
             original_data = json.load(f)
 
         total_len = len(original_data["data"])
-        
+
         if num_dataset_items > total_len:
             raise ValueError("The number of dataset items ({}) cannot be more than the total "
                              "dataset length ({}).".format(num_dataset_items, total_len))
-        
+
         item_indicies = random.sample(range(0, total_len), num_dataset_items)
         print("Total dataset length:", total_len)
         print("Randomly selected dataset indices:", item_indicies)
-        
+
         articles = []
-        
+
         for data_index in item_indicies:
             article = {}
             article["paragraphs"] = original_data["data"][data_index]["paragraphs"]
             article["title"] = original_data["data"][data_index]["title"]
-            
+
             for p in article["paragraphs"]:
                 for qas in p["qas"]:
                     qas["id"] = str(qas["id"])
-            
+
             articles.append(article)
 
         # Add the article to a dictionary for the mini dataset
         mini_data = {}
         mini_data["data"] = articles
-        
+
         # Add on a version
         mini_data["version"] = original_data["version"] if "version" in original_data.keys() else "1.0"
 
@@ -86,7 +83,7 @@ def create_mini_dataset_file(original_file, output_file, num_dataset_items, over
 
 def display_predictions(predict_data_path, results_file_path, n=10):
     """ Displays n number of predictions along with the actual value """
-    
+
     def get_data_list():
         count = 0
         data_list = []
@@ -116,7 +113,7 @@ def get_data_list():
                                        "Predicted Answer",
                                        "Actual Answer(s)"])
     return predict_df.style.hide(axis="index")
-                    
+
 
 def get_config_and_vocab_from_zip(zip_url, bert_dir):
     """
@@ -132,7 +129,7 @@ def get_config_and_vocab_from_zip(zip_url, bert_dir):
     """
     vocab_txt = os.path.join(bert_dir, "vocab.txt")
     bert_config = os.path.join(bert_dir, "bert_config.json")
-    
+
     if not os.path.exists(vocab_txt) or not os.path.exists(bert_config):
         downloaded_file = download_file(zip_url, bert_dir)
         with ZipFile(downloaded_file, "r") as checkpoint_zip:
@@ -149,60 +146,13 @@ def get_file_from_zip(file_path):
                             if matches:
                                 os.replace(matches[0], file_path)
                         break
-            
+
             if not os.path.exists(vocab_txt):
                 get_file_from_zip(vocab_txt)
-            
+
             if not os.path.exists(bert_config):
                 get_file_from_zip(bert_config)
 
         os.remove(downloaded_file)
-        
-    return vocab_txt, bert_config
 
-
-# This function was taken from the TensorFlow Model Garden repo and adapted
-# to be a utility function that has a string for the strategy, directly passes
-# in the max_seq_length instead of a metadata object, and removes the need for FLAGS
-# being defined (instead just passes in the predict_batch_size as an arg).
-
-# https://github.com/tensorflow/models/blob/v2.7.0/official/nlp/bert/run_squad_helper.py#L176
-def predict_squad_customized(strategy_str, max_seq_length, predict_batch_size,
-                             predict_tfrecord_path, num_steps, squad_model):
-    
-    strategy = distribute_utils.get_distribution_strategy(distribution_strategy=strategy_str)
-    
-    """Make predictions using a Bert-based squad model."""
-    predict_dataset_fn = get_dataset_fn(
-        predict_tfrecord_path,
-        max_seq_length,
-        predict_batch_size,
-        is_training=False)
-    predict_iterator = iter(
-        strategy.distribute_datasets_from_function(predict_dataset_fn))
-
-    @tf.function
-    def predict_step(iterator):
-        """Predicts on distributed devices."""
-
-        def _replicated_step(inputs):
-            """Replicated prediction calculation."""
-            x, _ = inputs
-            unique_ids = x.pop('unique_ids')
-            start_logits, end_logits = squad_model(x, training=False)
-            return dict(
-                unique_ids=unique_ids,
-                start_logits=start_logits,
-                end_logits=end_logits)
-
-        outputs = strategy.run(_replicated_step, args=(next(iterator),))
-        return tf.nest.map_structure(strategy.experimental_local_results, outputs)
-
-    all_results = []
-    for _ in range(num_steps):
-        predictions = predict_step(predict_iterator)
-        for result in get_raw_results(predictions):
-            all_results.append(result)
-        if len(all_results) % 100 == 0:
-            print('Made predictions for %d records.', len(all_results))
-    return all_results
+    return vocab_txt, bert_config
diff --git a/notebooks/question_answering/tfhub_question_answering/bert_utils.py b/notebooks/question_answering/tfhub_question_answering/bert_utils.py
@@ -17,11 +17,8 @@
 #
 
 import json
-import os
 import pandas as pd
 
-from zipfile import ZipFile
-
 
 def get_model_map(json_path, return_data_frame=False):
     """
diff --git a/notebooks/text_classification/tfhub_text_classification/bert_utils.py b/notebooks/text_classification/tfhub_text_classification/bert_utils.py
@@ -17,11 +17,8 @@
 #
 
 import json
-import os
 import pandas as pd
 
-from zipfile import ZipFile
-
 
 def get_model_map(json_path, return_data_frame=False):
     """
diff --git a/notebooks/text_generation/pytorch_text_generation/llm_utils.py b/notebooks/text_generation/pytorch_text_generation/llm_utils.py
@@ -39,8 +39,9 @@
     },
 }
 
+
 class INCDataloader:
-    def __init__(self, dataset, tokenizer, batch_size=1, device='cpu', 
+    def __init__(self, dataset, tokenizer, batch_size=1, device='cpu',
                  max_seq_length=512, for_calib=False):
         self.dataset = dataset
         self.tokenizer = tokenizer
@@ -98,4 +99,3 @@ def __iter__(self):
 
     def __len__(self):
         return self.length
-
diff --git a/notebooks/video_classification/pytorch_video_classification/model_utils.py b/notebooks/video_classification/pytorch_video_classification/model_utils.py

Original file line number	Diff line number	Diff line change
`@@ -69,4 +69,3 @@`
`69`	`69`	`"image_size": 331`
`70`	`70`	`}`
`71`	`71`	`}`
`72`		`-`
Original file line number	Diff line number	Diff line change
`@@ -17,11 +17,8 @@`
`17`	`17`	`#`
`18`	`18`
`19`	`19`	`import json`
`20`		`-import os`
`21`	`20`	`import pandas as pd`
`22`	`21`
`23`		`-from zipfile import ZipFile`
`24`		`-`
`25`	`22`
`26`	`23`	`def get_model_map(json_path, return_data_frame=False):`
`27`	`24`	`"""`