Skip to content

Commit 08a6722

Browse files
authored
Clarify the type annotations for ndarrays (#31)
All mypy checks still pass! and I also added a more detailed description of the `Boundary` type. * update types with ndarray, not done yet * finish updating type annotations
1 parent 43f9465 commit 08a6722

File tree

6 files changed

+54
-24
lines changed

6 files changed

+54
-24
lines changed

find_duplicates.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy
2+
import numpy.typing
23
from typing import Iterable, Optional, Self
34

45

@@ -71,7 +72,7 @@ def __str__(self) -> str: # Used solely for debugging
7172

7273

7374
def _initialize_segments(
74-
matrix: numpy.ndarray, is_single_file: bool
75+
matrix: numpy.typing.NDArray[numpy.uint8], is_single_file: bool
7576
) -> tuple[list[_SegmentUnionFind], dict[_Coordinates, _SegmentUnionFind]]:
7677
"""
7778
Each _SegmentUnionFind we return has size at least 2: these have already
@@ -111,7 +112,7 @@ def _initialize_segments(
111112

112113

113114
def _get_pixel_to_segment(
114-
matrix: numpy.ndarray, is_single_file: bool
115+
matrix: numpy.typing.NDArray[numpy.uint8], is_single_file: bool
115116
) -> dict[_Coordinates, _SegmentUnionFind]:
116117
"""
117118
If is_single_file is set, we do not include pixels on the main diagonal,
@@ -158,7 +159,9 @@ def key(
158159
return pixel_to_segment
159160

160161

161-
def get_lengths(matrix: numpy.ndarray, is_single_file: bool) -> numpy.ndarray:
162+
def get_lengths(
163+
matrix: numpy.typing.NDArray[numpy.uint8], is_single_file: bool
164+
) -> numpy.typing.NDArray[numpy.uint32]:
162165
"""
163166
We return an image whose pixels indicate how long a chain of nonzero values
164167
from the original matrix is. If is_single_file is set, the main diagonal
@@ -175,7 +178,7 @@ def get_lengths(matrix: numpy.ndarray, is_single_file: bool) -> numpy.ndarray:
175178

176179

177180
def get_segments(
178-
matrix: numpy.ndarray, is_single_file: bool
181+
matrix: numpy.typing.NDArray[numpy.uint8], is_single_file: bool
179182
) -> set[_SegmentUnionFind]:
180183
"""
181184
We return set of _SegmentUnionFinds describing all the segments we found in
@@ -247,7 +250,11 @@ def update_candidate(candidate: _SegmentUnionFind) -> None:
247250
return best_candidate
248251

249252

250-
def get_hues(matrix: numpy.ndarray, is_single_file: bool) -> numpy.ndarray:
253+
def get_hues(
254+
matrix: numpy.typing.NDArray[numpy.uint8], is_single_file: bool
255+
) -> numpy.typing.NDArray[numpy.uint8]:
256+
# Scores are going to start out as uint32's, but get turned into floats.
257+
scores: numpy.typing.NDArray
251258
scores = get_lengths(matrix, is_single_file)
252259
# Cut everything off at the max, then divide by the max to put all values
253260
# between 0 and 1.

gui.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from math import ceil
22
import numpy
3+
import numpy.typing
34
import tkinter as tk
45
import tkinter.font as tkfont
56
from typing import Optional
@@ -130,8 +131,8 @@ def display(self, pixel: int) -> None:
130131
class _Gui(tk.Frame):
131132
def __init__(
132133
self,
133-
matrix: numpy.ndarray,
134-
hues: Optional[numpy.ndarray],
134+
matrix: numpy.typing.NDArray[numpy.uint8],
135+
hues: Optional[numpy.typing.NDArray[numpy.uint8]],
135136
data_a: FileInfo,
136137
data_b: FileInfo,
137138
map_width: int,
@@ -154,8 +155,8 @@ def _on_motion(self, event: tk.Event) -> None:
154155

155156

156157
def launch(
157-
matrix: numpy.ndarray,
158-
hues: Optional[numpy.ndarray],
158+
matrix: numpy.typing.NDArray[numpy.uint8],
159+
hues: Optional[numpy.typing.NDArray[numpy.uint8]],
159160
data_a: FileInfo,
160161
data_b: FileInfo,
161162
map_width: int,

image_pyramid.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy
2+
import numpy.typing
23
from typing import Optional
34

45
import utils
@@ -9,8 +10,8 @@ class ImagePyramid:
910

1011
def __init__(
1112
self,
12-
matrix: numpy.ndarray,
13-
hues: Optional[numpy.ndarray],
13+
matrix: numpy.typing.NDArray[numpy.uint8],
14+
hues: Optional[numpy.typing.NDArray[numpy.uint8]],
1415
sidelength: int,
1516
) -> None:
1617
"""
@@ -20,7 +21,7 @@ def __init__(
2021
self._pyramid.append(matrix)
2122
self._sidelength = sidelength
2223

23-
self._hue_pyramid: Optional[list[numpy.ndarray]]
24+
self._hue_pyramid: Optional[list[numpy.typing.NDArray[numpy.uint8]]]
2425
if hues is None:
2526
self._hue_pyramid = None
2627
else:
@@ -85,7 +86,7 @@ def __init__(
8586

8687
def get_submatrix(
8788
self, top_left_x: int, top_left_y: int
88-
) -> tuple[numpy.ndarray, int, int]:
89+
) -> tuple[numpy.typing.NDArray[numpy.uint8], int, int]:
8990
"""
9091
We return a sidelength-by-sidelength-by-3 ndarray containing an HSV
9192
image of the relevant region, and the indices of the top-left corner.

tokenizer.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import code_tokenize
22
import numpy
3+
import numpy.typing
34
from typing import NamedTuple, Optional
45

56
from code_tokenize.tokens import ASTToken
@@ -9,11 +10,27 @@
910

1011
# Syntactic sugar: a Boundary contains the start and end of a token, where
1112
# each position is described by its line number and the column within the line.
13+
# The first line of the file is line 1, but the first column of the line is
14+
# column 0. The end is the first location *after* the end of the token (which
15+
# might be 1 character past the end of the current line, if this is the last
16+
# token on the line).
17+
# Example: This file:
18+
# print("hi")
19+
# print("bye")
20+
# Likely has these boundaries:
21+
# ((1, 0), (1, 5)) print
22+
# ((1, 5), (1, 6)) (
23+
# ((1, 6), (1, 10)) "hi"
24+
# ((1, 10), (1, 11)) )
25+
# ((2, 0), (2, 5)) print
26+
# ((2, 5), (2, 6)) (
27+
# ((2, 6), (2, 11)) "bye"
28+
# ((2, 11), (2, 12)) )
1229
Boundary = tuple[tuple[int, int], tuple[int, int]]
1330

1431

1532
class FileInfo(NamedTuple):
16-
tokens: numpy.ndarray # Really a list[code_tokenize.tokens.Token]
33+
tokens: numpy.typing.NDArray[numpy.str_]
1734
lines: list[str]
1835
boundaries: list[Boundary]
1936
filename: str

utils.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import numpy
2+
import numpy.typing
23
from typing import Optional
34

45

56
PIXELS_IN_BIG_FILE = 50 * 1000 * 1000 # 50 megapixels
67

78

8-
def to_hsv_matrix(matrix: numpy.ndarray,
9-
hues: Optional[numpy.ndarray]) -> numpy.ndarray:
9+
def to_hsv_matrix(
10+
matrix: numpy.typing.NDArray[numpy.uint8],
11+
hues: Optional[numpy.typing.NDArray[numpy.uint8]],
12+
) -> numpy.typing.NDArray[numpy.uint8]:
1013
"""
1114
The matrix is a 2D array of uint8's. The hues are either None or another 2D
1215
array of the same shape.
@@ -22,12 +25,10 @@ def to_hsv_matrix(matrix: numpy.ndarray,
2225
return result
2326

2427

25-
# The two arguments to make_matrix both have type
26-
# list[code_tokenize.tokens.ASTToken], but that module does not have type
27-
# annotations and adding them in would be annoying.
2828
def make_matrix(
29-
tokens_a: numpy.ndarray, tokens_b: numpy.ndarray
30-
) -> numpy.ndarray:
29+
tokens_a: numpy.typing.NDArray[numpy.str_],
30+
tokens_b: numpy.typing.NDArray[numpy.str_]
31+
) -> numpy.typing.NDArray[numpy.uint8]:
3132
matrix = numpy.zeros([len(tokens_a), len(tokens_b)], dtype=numpy.uint8)
3233
for i, value in enumerate(tokens_a):
3334
matrix[i, :] = (tokens_b == value)

zoom_map.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from functools import partial
22
import numpy
3+
import numpy.typing
34
import PIL.Image
45
import PIL.ImageTk
56
import tkinter as tk
@@ -11,8 +12,8 @@ class ZoomMap(tk.Canvas):
1112
def __init__(
1213
self,
1314
tk_parent: tk.Widget,
14-
matrix: numpy.ndarray,
15-
hues: Optional[numpy.ndarray],
15+
matrix: numpy.typing.NDArray[numpy.uint8],
16+
hues: Optional[numpy.typing.NDArray[numpy.uint8]],
1617
sidelength: int,
1718
) -> None:
1819
super().__init__(tk_parent, height=sidelength, width=sidelength,
@@ -110,7 +111,9 @@ def _on_unclick(self, event: tk.Event) -> None:
110111
self._set_image()
111112

112113
@staticmethod
113-
def _to_image(matrix: numpy.ndarray) -> PIL.ImageTk.PhotoImage:
114+
def _to_image(
115+
matrix: numpy.typing.NDArray[numpy.uint8]
116+
) -> PIL.ImageTk.PhotoImage:
114117
image = PIL.Image.fromarray(matrix, mode="HSV")
115118
return PIL.ImageTk.PhotoImage(image)
116119

0 commit comments

Comments
 (0)