Skip to content

Commit cd99341

Browse files
etcylfleetmatthew.fleetwood
and
matthew.fleetwood
authored
Fix docs and notebooks linting, add new makefile target (#403)
* Fix docs and notebooks linting, add new makefile target for make lint to docs/ and notebooks/ * Update conf.py * Update model_utils.py * Update model_util.py * Update TLT_TF_Image_Classification_Transfer_Learning.ipynb * Update TLT_TF_Image_Classification_Transfer_Learning.ipynb * Update TLT_TF_Image_Classification_Transfer_Learning.ipynb * Update dataset_utils.py * Update model_utils.py * Update inc_utils.py * Update plot_utils.py * Update bert_utils.py * Update bert_utils.py * Update llm_utils.py * Update model_utils.py * Update data_utils.py * Update Makefile * Update BERT_Question_Answering.ipynb * Update BERT_Question_Answering.ipynb * Remove unused imports * Move imports to after license * Move imports to after sys append * Fix incorrect formatting * Move imports to top of file * Fix blank lines * Move imports to after license --------- Co-authored-by: matthew.fleetwood <[email protected]>
1 parent f209471 commit cd99341

File tree

13 files changed

+48
-106
lines changed

13 files changed

+48
-106
lines changed

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,10 @@ integration: tlt_test_venv
5757

5858
lint: tlt_test_venv
5959
@echo "Style checks..."
60-
@. $(ACTIVATE_TEST_VENV) && flake8 tlt tests downloader
60+
@. $(ACTIVATE_TEST_VENV) && \
61+
flake8 tlt tests downloader && \
62+
flake8 notebooks && \
63+
flake8 docs
6164

6265
clean:
6366
rm -rf tlt_test_venv

docs/conf.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,12 @@
3030
# add these directories to sys.path here. If the directory is relative to the
3131
# documentation root, use os.path.abspath to make it absolute, like shown here.
3232
#
33+
from datetime import datetime
3334
import os
35+
import sphinx_rtd_theme
3436
import sys
35-
import shutil
36-
import glob
3737
sys.path.insert(0, os.path.abspath('../..'))
3838
sys.setrecursionlimit(1500)
39-
import sphinx_rtd_theme
40-
from datetime import datetime
4139

4240
# -- Project information -----------------------------------------------------
4341

@@ -91,9 +89,11 @@
9189
html_static_path = ['_static']
9290
templates_path = ['_templates']
9391

92+
9493
def setup(app):
95-
app.add_css_file("tlt-custom.css")
96-
app.add_js_file("tlt-custom.js")
94+
app.add_css_file("tlt-custom.css")
95+
app.add_js_file("tlt-custom.js")
96+
9797

9898
# Add any paths that contain custom static files (such as style sheets) here,
9999
# relative to this directory. They are copied after the builtin static files,
@@ -108,4 +108,4 @@ def setup(app):
108108
suppress_warnings = ["myst.xref_missing", "myst.header"]
109109

110110
# ask the myst parser to process <img> tags so Sphinx can handle the properly
111-
myst_enable_extensions = [ "html_image" ]
111+
myst_enable_extensions = ["html_image"]

notebooks/image_classification/pytorch_image_classification/model_utils.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#
1818

1919
import torch
20-
import torchvision
2120
from pydoc import locate
2221

2322

@@ -55,11 +54,12 @@
5554
}
5655
}
5756

57+
5858
def get_retrainable_model(model_name, num_classes, do_fine_tuning=False):
5959
# Load an image classification model pretrained on ImageNet
6060
pretrained_model_class = locate('torchvision.models.{}'.format(model_name))
6161
classifier_layer = torchvision_model_map[model_name]['classifier']
62-
62+
6363
model = pretrained_model_class(pretrained=True)
6464

6565
if not do_fine_tuning:
@@ -74,6 +74,5 @@ def get_retrainable_model(model_name, num_classes, do_fine_tuning=False):
7474
classifier = getattr(model, classifier_layer)
7575
num_features = classifier.in_features
7676
setattr(model, classifier_layer, torch.nn.Linear(num_features, num_classes))
77-
78-
return model
7977

78+
return model

notebooks/image_classification/tf_image_classification/model_util.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,3 @@
6969
"image_size": 331
7070
}
7171
}
72-

notebooks/object_detection/pytorch_object_detection/dataset_utils.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,11 @@
1818

1919
import os
2020
import csv
21-
import glob
2221
import numpy as np
2322
import torch
2423
import torchvision
2524
from PIL import Image
26-
from typing import Any, Callable, List, Optional, Tuple
25+
from typing import Any, Tuple
2726

2827

2928
COCO_LABELS = [
@@ -45,6 +44,7 @@
4544
'__background__', 'Person', 'Vehicle'
4645
]
4746

47+
4848
class PennFudanDataset(torch.utils.data.Dataset):
4949
def __init__(self, root, transforms):
5050
self.root = root
@@ -103,12 +103,13 @@ def __getitem__(self, idx):
103103
if self.transforms is not None:
104104
img = self.transforms(img)
105105
# target needs augmentations
106-
106+
107107
return img, target
108108

109109
def __len__(self):
110110
return len(self.imgs)
111111

112+
112113
class Kitti(torchvision.datasets.Kitti):
113114
def _parse_target(self, index: int):
114115
labels = []
@@ -118,12 +119,12 @@ def _parse_target(self, index: int):
118119
for line in content:
119120
if line[0] in ['Pedestrian', 'Person_sitting', 'Cyclist']:
120121
boxes.append([float(x) for x in line[4:8]])
121-
labels.append(1) # Re-label Pedestrian, Person_sitting, Cyclist to Person=1
122+
labels.append(1) # Re-label Pedestrian, Person_sitting, Cyclist to Person=1
122123
if line[0] in ['Car', 'Truck', 'Van', 'Tram']:
123124
boxes.append([float(x) for x in line[4:8]])
124-
labels.append(2) # Re-label Car, Truck, Van, Tram to Vehicle=2
125-
126-
boxes = torch.FloatTensor(boxes)
125+
labels.append(2) # Re-label Car, Truck, Van, Tram to Vehicle=2
126+
127+
boxes = torch.FloatTensor(boxes)
127128
target = {}
128129
target['image_id'] = torch.tensor([index])
129130
target['boxes'] = boxes
@@ -133,7 +134,6 @@ def _parse_target(self, index: int):
133134

134135
return target
135136

136-
137137
def __getitem__(self, index: int) -> Tuple[Any, Any]:
138138
"""Get item at a given index.
139139

notebooks/object_detection/pytorch_object_detection/model_utils.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818

1919
import math
2020
import torch
21-
import torchvision
22-
import torchvision.models.detection as detection
2321

2422

2523
# Dictionary of Torchvision object detection models
@@ -38,6 +36,7 @@
3836
}
3937
}
4038

39+
4140
def get_retrainable_model(model_name, num_classes, pretrained_model_class, predictor_class):
4241
# Load an object detection model pre-trained on COCO
4342
model = pretrained_model_class(pretrained=True)
@@ -51,10 +50,9 @@ def get_retrainable_model(model_name, num_classes, pretrained_model_class, predi
5150
in_features = model.head.classification_head.conv[0].in_channels
5251
num_anchors = model.head.classification_head.num_anchors
5352
model.head.classification_head.num_classes = num_classes
54-
cls_logits = torch.nn.Conv2d(in_features, num_anchors * num_classes, kernel_size = 3, stride=1, padding=1)
53+
cls_logits = torch.nn.Conv2d(in_features, num_anchors * num_classes, kernel_size=3, stride=1, padding=1)
5554
torch.nn.init.normal_(cls_logits.weight, std=0.01)
5655
torch.nn.init.constant_(cls_logits.bias, -math.log((1 - 0.01) / 0.01))
5756
model.head.classification_head.cls_logits = cls_logits
5857

5958
return model
60-

notebooks/performance/utils/inc_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def performance(saved_model_dir, batch_size, image_size, dataset_dir, framework,
2323
cores_per_instance=None, num_of_instance=None, inter_num_of_threads=None, intra_num_of_threads=None):
2424
"""
2525
Uses the Intel Neural Compressor to get performance metrics for the specified model.
26-
26+
2727
:param saved_model_dir: Model to load
2828
:param batch_size: Batch size
2929
:param image_size: Image input size
@@ -66,7 +66,7 @@ def performance(saved_model_dir, batch_size, image_size, dataset_dir, framework,
6666
def calculate_latency_and_throughput(results):
6767
"""
6868
Parses the results from the benchmarking function and returns the latency (ms) and throughput (samples/sec)
69-
69+
7070
:param results: Return value from calling the performance util function
7171
:param batch_size: batch size
7272
:return: latency (ms) and throughput (images/sec)

notebooks/question_answering/tfhub_question_answering/BERT_Question_Answering.ipynb

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,7 @@
120120
"\n",
121121
"from bert_qa_utils import create_mini_dataset_file, \\\n",
122122
" display_predictions, \\\n",
123-
" get_config_and_vocab_from_zip, \\\n",
124-
" predict_squad_customized\n",
123+
" get_config_and_vocab_from_zip \n",
125124
"\n",
126125
"# Extract the vocab.txt and bert_config.json from the checkpoint zip file\n",
127126
"vocab_txt, bert_config = get_config_and_vocab_from_zip(checkpoint_url, bert_dir)\n",

notebooks/question_answering/tfhub_question_answering/bert_qa_utils.py

Lines changed: 16 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,11 @@
2121
import os
2222
import pandas as pd
2323
import sys
24-
import tensorflow as tf
24+
from tlt.utils.file_utils import download_file
25+
from zipfile import ZipFile
2526

2627
sys.path.append(os.environ["TF_MODELS_DIR"])
2728

28-
from official.common import distribute_utils
29-
from official.legacy.bert.run_squad_helper import get_dataset_fn
30-
from tlt.utils.file_utils import download_file
31-
from zipfile import ZipFile
3229

3330
def create_mini_dataset_file(original_file, output_file, num_dataset_items, overwrite=False):
3431
"""
@@ -41,37 +38,37 @@ def create_mini_dataset_file(original_file, output_file, num_dataset_items, over
4138
"""
4239
if not os.path.exists(output_file) or overwrite:
4340
import random
44-
41+
4542
with open(original_file) as f:
4643
original_data = json.load(f)
4744

4845
total_len = len(original_data["data"])
49-
46+
5047
if num_dataset_items > total_len:
5148
raise ValueError("The number of dataset items ({}) cannot be more than the total "
5249
"dataset length ({}).".format(num_dataset_items, total_len))
53-
50+
5451
item_indicies = random.sample(range(0, total_len), num_dataset_items)
5552
print("Total dataset length:", total_len)
5653
print("Randomly selected dataset indices:", item_indicies)
57-
54+
5855
articles = []
59-
56+
6057
for data_index in item_indicies:
6158
article = {}
6259
article["paragraphs"] = original_data["data"][data_index]["paragraphs"]
6360
article["title"] = original_data["data"][data_index]["title"]
64-
61+
6562
for p in article["paragraphs"]:
6663
for qas in p["qas"]:
6764
qas["id"] = str(qas["id"])
68-
65+
6966
articles.append(article)
7067

7168
# Add the article to a dictionary for the mini dataset
7269
mini_data = {}
7370
mini_data["data"] = articles
74-
71+
7572
# Add on a version
7673
mini_data["version"] = original_data["version"] if "version" in original_data.keys() else "1.0"
7774

@@ -86,7 +83,7 @@ def create_mini_dataset_file(original_file, output_file, num_dataset_items, over
8683

8784
def display_predictions(predict_data_path, results_file_path, n=10):
8885
""" Displays n number of predictions along with the actual value """
89-
86+
9087
def get_data_list():
9188
count = 0
9289
data_list = []
@@ -116,7 +113,7 @@ def get_data_list():
116113
"Predicted Answer",
117114
"Actual Answer(s)"])
118115
return predict_df.style.hide(axis="index")
119-
116+
120117

121118
def get_config_and_vocab_from_zip(zip_url, bert_dir):
122119
"""
@@ -132,7 +129,7 @@ def get_config_and_vocab_from_zip(zip_url, bert_dir):
132129
"""
133130
vocab_txt = os.path.join(bert_dir, "vocab.txt")
134131
bert_config = os.path.join(bert_dir, "bert_config.json")
135-
132+
136133
if not os.path.exists(vocab_txt) or not os.path.exists(bert_config):
137134
downloaded_file = download_file(zip_url, bert_dir)
138135
with ZipFile(downloaded_file, "r") as checkpoint_zip:
@@ -149,60 +146,13 @@ def get_file_from_zip(file_path):
149146
if matches:
150147
os.replace(matches[0], file_path)
151148
break
152-
149+
153150
if not os.path.exists(vocab_txt):
154151
get_file_from_zip(vocab_txt)
155-
152+
156153
if not os.path.exists(bert_config):
157154
get_file_from_zip(bert_config)
158155

159156
os.remove(downloaded_file)
160-
161-
return vocab_txt, bert_config
162157

163-
164-
# This function was taken from the TensorFlow Model Garden repo and adapted
165-
# to be a utility function that has a string for the strategy, directly passes
166-
# in the max_seq_length instead of a metadata object, and removes the need for FLAGS
167-
# being defined (instead just passes in the predict_batch_size as an arg).
168-
169-
# https://github.com/tensorflow/models/blob/v2.7.0/official/nlp/bert/run_squad_helper.py#L176
170-
def predict_squad_customized(strategy_str, max_seq_length, predict_batch_size,
171-
predict_tfrecord_path, num_steps, squad_model):
172-
173-
strategy = distribute_utils.get_distribution_strategy(distribution_strategy=strategy_str)
174-
175-
"""Make predictions using a Bert-based squad model."""
176-
predict_dataset_fn = get_dataset_fn(
177-
predict_tfrecord_path,
178-
max_seq_length,
179-
predict_batch_size,
180-
is_training=False)
181-
predict_iterator = iter(
182-
strategy.distribute_datasets_from_function(predict_dataset_fn))
183-
184-
@tf.function
185-
def predict_step(iterator):
186-
"""Predicts on distributed devices."""
187-
188-
def _replicated_step(inputs):
189-
"""Replicated prediction calculation."""
190-
x, _ = inputs
191-
unique_ids = x.pop('unique_ids')
192-
start_logits, end_logits = squad_model(x, training=False)
193-
return dict(
194-
unique_ids=unique_ids,
195-
start_logits=start_logits,
196-
end_logits=end_logits)
197-
198-
outputs = strategy.run(_replicated_step, args=(next(iterator),))
199-
return tf.nest.map_structure(strategy.experimental_local_results, outputs)
200-
201-
all_results = []
202-
for _ in range(num_steps):
203-
predictions = predict_step(predict_iterator)
204-
for result in get_raw_results(predictions):
205-
all_results.append(result)
206-
if len(all_results) % 100 == 0:
207-
print('Made predictions for %d records.', len(all_results))
208-
return all_results
158+
return vocab_txt, bert_config

notebooks/question_answering/tfhub_question_answering/bert_utils.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,8 @@
1717
#
1818

1919
import json
20-
import os
2120
import pandas as pd
2221

23-
from zipfile import ZipFile
24-
2522

2623
def get_model_map(json_path, return_data_frame=False):
2724
"""

notebooks/text_classification/tfhub_text_classification/bert_utils.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,8 @@
1717
#
1818

1919
import json
20-
import os
2120
import pandas as pd
2221

23-
from zipfile import ZipFile
24-
2522

2623
def get_model_map(json_path, return_data_frame=False):
2724
"""

notebooks/text_generation/pytorch_text_generation/llm_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@
3939
},
4040
}
4141

42+
4243
class INCDataloader:
43-
def __init__(self, dataset, tokenizer, batch_size=1, device='cpu',
44+
def __init__(self, dataset, tokenizer, batch_size=1, device='cpu',
4445
max_seq_length=512, for_calib=False):
4546
self.dataset = dataset
4647
self.tokenizer = tokenizer
@@ -98,4 +99,3 @@ def __iter__(self):
9899

99100
def __len__(self):
100101
return self.length
101-

0 commit comments

Comments
 (0)