Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit c4ca5a4

Browse files
authored
Merge pull request #692 from brettkoonce/minor_sp
tensor2tensor: minor spelling tweaks
2 parents 56bac6d + 2180390 commit c4ca5a4

19 files changed

+41
-41
lines changed

tensor2tensor/bin/t2t_bleu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
flags.DEFINE_string("translation", None,
7575
"Path to the MT system translation file")
7676
flags.DEFINE_string("translations_dir", None,
77-
"Directory with translated files to be evaulated.")
77+
"Directory with translated files to be evaluated.")
7878
flags.DEFINE_string("event_dir", None, "Where to store the event file.")
7979

8080
flags.DEFINE_string("bleu_variant", "both",

tensor2tensor/data_generators/algorithmic_math.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def algebra_inverse_solve(left, right, var, solve_ops):
181181
right- Expression on the right side of the op.
182182
to_tree- The tree on the other side of the equal sign. The canceled
183183
out expression will be moved here.
184-
new_from_tree- The resuling from_tree after the algebraic
184+
new_from_tree- The resulting from_tree after the algebraic
185185
manipulation.
186186
new_to_tree- The resulting to_tree after the algebraic manipulation.
187187
@@ -355,7 +355,7 @@ def generate_calculus_integrate_sample(vlist, ops, min_depth, max_depth,
355355
# functions: Dict of special function names. Maps human readable string names to
356356
# single char names used in flist.
357357
# ops: Dict mapping op symbols (chars) to ExprOp instances.
358-
# solve_ops: Encodes rules for how to algebraicly cancel out each operation. See
358+
# solve_ops: Encodes rules for how to algebraically cancel out each operation. See
359359
# doc-string for `algebra_inverse_solve`.
360360
# int_encoder: Function that maps a string to a list of tokens. Use this to
361361
# encode an expression to feed into a model.
@@ -377,7 +377,7 @@ def math_dataset_init(alphabet_size=26, digits=None, functions=None):
377377
378378
Args:
379379
alphabet_size: How many possible variables there are. Max 52.
380-
digits: How many numerical digits to encode as tokens, "0" throuh
380+
digits: How many numerical digits to encode as tokens, "0" through
381381
str(digits-1), or None to encode no digits.
382382
functions: Defines special functions. A dict mapping human readable string
383383
names, like "log", "exp", "sin", "cos", etc., to single chars. Each

tensor2tensor/data_generators/speech_recognition.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
"""Common classes for automatic speech recogntion (ASR) datasets.
16+
"""Common classes for automatic speech recognition (ASR) datasets.
1717
1818
The audio import uses sox to generate normalized waveforms, please install
1919
it as appropriate (e.g. using apt-get or yum).

tensor2tensor/data_generators/text_encoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ def store_to_file(self, filename):
348348
def _escape_token(token, alphabet):
349349
"""Escape away underscores and OOV characters and append '_'.
350350
351-
This allows the token to be experessed as the concatenation of a list
351+
This allows the token to be expressed as the concatenation of a list
352352
of subtokens from the vocabulary. The underscore acts as a sentinel
353353
which allows us to invertibly concatenate multiple such lists.
354354

tensor2tensor/data_generators/translate_enzh.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ class TranslateEnzhWmt32k(translate.TranslateProblem):
172172
173173
CWMT:
174174
- http://nlp.nju.edu.cn/cwmt-wmt/
175-
- Website contrains instructions for FTP server access.
175+
- Website contains instructions for FTP server access.
176176
- You'll need to download CASIA, CASICT, DATUM2015, DATUM2017,
177177
NEU datasets
178178

tensor2tensor/data_generators/wiki.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def scramble_fraction(self):
190190

191191
@registry.register_problem
192192
class LanguagemodelWikiScrambleL1k(LanguagemodelWikiScramble):
193-
"""Sequence length 1024, 50% scrambed."""
193+
"""Sequence length 1024, 50% scrambled."""
194194

195195
@property
196196
def sequence_length(self):

tensor2tensor/layers/common_attention.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def register_layer(
8383
default_args (list): The default parameters to add to the function.
8484
default_kwargs (dict): The default parameters to add to the function.
8585
Those arguments can be overwritten when calling the function.
86-
use_dp (bool): Wrap the function call within a dataparalellism object if
86+
use_dp (bool): Wrap the function call within a dataparallelism object if
8787
dp is available. Some layers (like MOE) must be called without dp.
8888
recompute_grad (bool): If True, recompute the function during the
8989
backward pass to save memory
@@ -1378,7 +1378,7 @@ def _relative_attention_inner(x, y, z, transpose):
13781378
x: Tensor with shape [batch_size, heads, length, length or depth].
13791379
y: Tensor with shape [batch_size, heads, length, depth].
13801380
z: Tensor with shape [length, length, depth].
1381-
transpose: Whether to tranpose inner matrices of y and z. Should be true if
1381+
transpose: Whether to transpose inner matrices of y and z. Should be true if
13821382
last dimension of x is depth, not length.
13831383
13841384
Returns:
@@ -1422,7 +1422,7 @@ def dot_product_attention_relative(q,
14221422
k: a Tensor with shape [batch, heads, length, depth].
14231423
v: a Tensor with shape [batch, heads, length, depth].
14241424
bias: bias Tensor.
1425-
max_relative_position: an integer specifying the maxmimum distance between
1425+
max_relative_position: an integer specifying the maximum distance between
14261426
inputs that unique position embeddings should be learned for.
14271427
dropout_rate: a floating point number.
14281428
image_shapes: optional tuple of integer scalars.
@@ -2141,7 +2141,7 @@ def gather_indices_2d(x, block_shape, block_stride):
21412141

21422142

21432143
def make_2d_block_raster_mask(query_shape, memory_flange):
2144-
"""creates a mask for 2d block raster scany.
2144+
"""creates a mask for 2d block raster scan.
21452145
21462146
The query mask can look to the left, top left, top, and top right, but
21472147
not to the right. Inside the query, we have the standard raster scan
@@ -2661,7 +2661,7 @@ def ffn_self_attention_layer(x,
26612661
We use self-attention to do feedforward computations. We apply this function
26622662
positionwise where for each position, we linearly transform the output to have
26632663
depth filter_depth, and break up the result depth-wise into num_parts
2664-
contiguous parts. The parts self-attentd, we concatenate the results
2664+
contiguous parts. The parts self-attend, we concatenate the results
26652665
depth-wise, and we linearly transform to a depth of output_depth. The
26662666
goal is to get multiplicative interactions between components of a
26672667
representation.
@@ -2764,7 +2764,7 @@ def parameter_attention(x,
27642764
x, total_key_depth, use_bias=False, name="q_transform")
27652765
if dropout_rate:
27662766
# This is a cheaper form of attention dropout where we use to use
2767-
# the same dropout decisions across batch elemets and query positions,
2767+
# the same dropout decisions across batch elements and query positions,
27682768
# but different decisions across heads and memory positions.
27692769
v = tf.nn.dropout(
27702770
v, 1.0 - dropout_rate, noise_shape=[num_heads, memory_rows, 1])

tensor2tensor/layers/common_hparams.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,13 @@ def basic_params1():
102102
moe_loss_coef=1e-2,
103103
# Sequences of operations to perform on layer input and layer output.
104104
# Used by common_layers.layer_preprocess, common_layers.layer_postprocess
105-
# Each character repsesnts an operation:
105+
# Each character represents an operation:
106106
# none: no preprocessing
107107
# d: apply dropout
108108
# n: apply normalization (see norm_type and norm_epsilon)
109109
# a: add layer input (residual connection - only during postprocess)
110110
# The special string "none" is used instead of the empty string
111-
# to indicate no pre/postprocesisng, since the empty string causes
111+
# to indicate no pre/postprocessing, since the empty string causes
112112
# trouble for hyperparameter tuning.
113113
# TODO(noam): The current settings ("", "dan") are the published version
114114
# of the transformer. ("n", "da") seems better for harder-to-learn
@@ -174,13 +174,13 @@ def basic_params1():
174174
# The maximum length of "input" sequence.
175175
# Sequences longer than this value will be truncated. 0 or negative values
176176
# mean there is no maximum or truncation.
177-
# You can change this behavior by overridding preprocess_example() method
177+
# You can change this behavior by overriding preprocess_example() method
178178
# in your problem class.
179179
max_input_seq_length=0,
180180
# The maximum length of "target" sequence.
181181
# Sequences longer than this value will be truncated. 0 or negative values
182182
# mean there is no maximum or truncation.
183-
# You can change this behavior by overridding preprocess_example() method
183+
# You can change this behavior by overriding preprocess_example() method
184184
# in your problem class.
185185
max_target_seq_length=0,
186186
# if nonzero, we split the target sequences on example read.

tensor2tensor/layers/common_layers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1201,7 +1201,7 @@ def add_timing_signal(x, min_timescale=1, max_timescale=1e4, num_timescales=16):
12011201
and the target of the attention.
12021202
12031203
The use of relative position is possible because sin(x+y) and cos(x+y) can be
1204-
experessed in terms of y, sin(x) and cos(x).
1204+
expressed in terms of y, sin(x) and cos(x).
12051205
12061206
In particular, we use a geometric sequence of timescales starting with
12071207
min_timescale and ending with max_timescale. For each timescale, we

tensor2tensor/models/research/attention_lm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def attention_lm_prepare_decoder(targets, hparams):
7070
Returns:
7171
decoder_input: a Tensor, bottom of decoder stack
7272
decoder_self_attention_bias: a Tensor, containing large negative values
73-
to implement masked attention and possibly baises for diagonal alignments
73+
to implement masked attention and possibly biases for diagonal alignments
7474
"""
7575
if hparams.prepend_mode == "prepend_inputs_full_attention":
7676
decoder_self_attention_bias = (

tensor2tensor/models/research/attention_lm_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def _diet_expert(x):
163163
def print_shape(x, suffix, debug=False):
164164
# To help debugging, print the input/output shapes at inference and eval
165165
# Inference for long sequences can take a long time, so that's help to
166-
# see the progession of the generation
166+
# see the progression of the generation
167167
if not debug and hparams.mode == ModeKeys.TRAIN:
168168
return x
169169
return tf.Print(x, [tf.shape(x)], "shape_x_{}".format(suffix))
@@ -368,7 +368,7 @@ def attention_lm_moe_prepare_decoder(targets, hparams):
368368
Returns:
369369
decoder_input: a Tensor, bottom of decoder stack
370370
decoder_self_attention_bias: a Tensor, containing large negative values
371-
to implement masked attention and possibly baises for diagonal alignments
371+
to implement masked attention and possibly biases for diagonal alignments
372372
pad_remover (expert_utils.PadRemover): an util object to remove padding
373373
"""
374374
targets_pad_mask = common_attention.embedding_to_padding(targets)

tensor2tensor/models/research/transformer_moe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
# "a/a/a#": Encoder only model (3 layers)
4747
# "#a/a/a": Decoder only model (3 layers)
4848
# "a/a-moe#a/a/a": Encoder (2 layers with 1 moe), decoder (3 layers)
49-
# Note that all combinaisons are not necessarily possibles (some attention
49+
# Note that all combinations are not necessarily possibles (some attention
5050
# types are not necessarily compatible with the encoder, or can't accept certain
5151
# types of masking)
5252

tensor2tensor/models/transformer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class Transformer(t2t_model.T2TModel):
5151

5252
def __init__(self, *args, **kwargs):
5353
super(Transformer, self).__init__(*args, **kwargs)
54-
self.attention_weights = dict() # For vizualizing attention heads.
54+
self.attention_weights = dict() # For visualizing attention heads.
5555

5656
def encode(self, inputs, target_space, hparams, features=None):
5757
"""Encode transformer inputs.
@@ -60,7 +60,7 @@ def encode(self, inputs, target_space, hparams, features=None):
6060
inputs: Transformer inputs [batch_size, input_length, input_height,
6161
hidden_dim] which will be flattened along the two spatial dimensions.
6262
target_space: scalar, target space ID.
63-
hparams: hyperparmeters for model.
63+
hparams: hyperparameters for model.
6464
features: optionally pass the entire features dictionary as well.
6565
This is needed now for "packed" datasets.
6666
@@ -106,7 +106,7 @@ def decode(self,
106106
encoder-decoder attention. [batch_size, input_length]
107107
decoder_self_attention_bias: Bias and mask weights for decoder
108108
self-attention. [batch_size, decoder_length]
109-
hparams: hyperparmeters for model.
109+
hparams: hyperparameters for model.
110110
cache: dict, containing tensors which are the results of previous
111111
attentions, used for fast decoding.
112112
nonpadding: optional Tensor with shape [batch_size, decoder_length]
@@ -142,7 +142,7 @@ def body(self, features):
142142
Args:
143143
features: Map of features to the model. Should contain the following:
144144
"inputs": Transformer inputs [batch_size, input_length, hidden_dim]
145-
"tragets": Target decoder outputs.
145+
"targets": Target decoder outputs.
146146
[batch_size, decoder_length, hidden_dim]
147147
"target_space_id"
148148
@@ -832,7 +832,7 @@ def transformer_ffn_layer(x,
832832
833833
Args:
834834
x: a Tensor of shape [batch_size, length, hparams.hidden_size]
835-
hparams: hyperparmeters for model
835+
hparams: hyperparameters for model
836836
pad_remover: an expert_utils.PadRemover object tracking the padding
837837
positions. If provided, when using convolutional settings, the padding
838838
is removed before applying the convolution, and restored afterward. This

tensor2tensor/models/vanilla_gan.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def body(self, features):
125125
features: a dictionary with the tensors.
126126
127127
Returns:
128-
A pair (predictions, losses) where preditions is the generated image
128+
A pair (predictions, losses) where predictions is the generated image
129129
and losses is a dictionary of losses (that get added for the final loss).
130130
"""
131131
features["targets"] = features["inputs"]

tensor2tensor/rl/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Tensor2Tensor experimental Model-Based Reinforcement Learning.
22

3-
The rl package intention is to provide possibility to run reinforcement
3+
The rl package intention is to provide the ability to run reinforcement
44
algorithms within TensorFlow's computation graph, in order to do model-based
55
RL using environment models from Tensor2Tensor. It's very experimental
66
for now and under heavy development.
77

8-
Currently the only supported algorithm is Proximy Policy Optimization - PPO.
8+
Currently the only supported algorithm is Proximal Policy Optimization - PPO.
99

1010
# Sample usages
1111

tensor2tensor/utils/beam_search.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags,
135135
[batch_size, beam_size]. We will return the gathered scores from here.
136136
Scores to gather is different from scores because for grow_alive, we will
137137
need to return log_probs, while for grow_finished, we will need to return
138-
the length penalized scors.
138+
the length penalized scores.
139139
flags: Tensor of bools for sequences that say whether a sequence has reached
140140
EOS or not
141141
beam_size: int
@@ -229,7 +229,7 @@ def beam_search(symbols_to_logits_fn,
229229
Returns:
230230
Tuple of
231231
(decoded beams [batch_size, beam_size, decode_length]
232-
decoding probablities [batch_size, beam_size])
232+
decoding probabilities [batch_size, beam_size])
233233
"""
234234
batch_size = common_layers.shape_list(initial_ids)[0]
235235

@@ -495,17 +495,17 @@ def _is_finished(i, unused_alive_seq, alive_log_probs, unused_finished_seq,
495495
# If the sequence isn't finished, we multiply it's score by 0. since
496496
# scores are all -ve, taking the min will give us the score of the lowest
497497
# finished item.
498-
lowest_score_of_fininshed_in_finished = tf.reduce_min(
498+
lowest_score_of_finished_in_finished = tf.reduce_min(
499499
finished_scores * tf.to_float(finished_in_finished), axis=1)
500500
# If none of the sequences have finished, then the min will be 0 and
501501
# we have to replace it by -ve INF if it is. The score of any seq in alive
502502
# will be much higher than -ve INF and the termination condition will not
503503
# be met.
504-
lowest_score_of_fininshed_in_finished += (
504+
lowest_score_of_finished_in_finished += (
505505
(1. - tf.to_float(tf.reduce_any(finished_in_finished, 1))) * -INF)
506506

507507
bound_is_met = tf.reduce_all(
508-
tf.greater(lowest_score_of_fininshed_in_finished,
508+
tf.greater(lowest_score_of_finished_in_finished,
509509
lower_bound_alive_scores))
510510

511511
return tf.logical_and(

tensor2tensor/utils/data_reader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def _batching_scheme(batch_size,
136136
min_length=0):
137137
"""A batching scheme based on model hyperparameters.
138138
139-
Every batch containins a number of sequences divisible by `shard_multiplier`.
139+
Every batch contains a number of sequences divisible by `shard_multiplier`.
140140
141141
Args:
142142
batch_size: int, total number of tokens in a batch.

tensor2tensor/utils/expert_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def add_scope(scope=None, scope_fn=None):
6666
"""Return a decorator which add a TF name/variable scope to a function.
6767
6868
Note that the function returned by the decorator accept an additional 'name'
69-
parameter, which can overwritte the name scope given when the function is
69+
parameter, which can overwrite the name scope given when the function is
7070
created.
7171
7272
Args:
@@ -587,12 +587,12 @@ def restore(self, x):
587587

588588
@add_name_scope("map_ids")
589589
def map_ids(x, indices, map_fn):
590-
"""Apply a function to each coordinate ids of a multidimentional tensor.
590+
"""Apply a function to each coordinate ids of a multidimensional tensor.
591591
592592
This allows to process each sequence of a batch independently. This is
593593
similar to tf.map_fn but with tensor where the batch dim has been flatten.
594594
595-
Warning: The indices ids have to be contigous and orderd in memory as the
595+
Warning: The indices ids have to be contiguous and ordered in memory as the
596596
output vector for each of the ids are simply concatenated after being
597597
processed.
598598
Ex: if your indices are [0,2,2,1,2,0], the output will contains the processed

tensor2tensor/utils/metrics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ def image_summary(predictions, features, hparams):
254254
255255
Returns:
256256
summary_proto: containing the summary images.
257-
weights: A Tensor of zeros of the same shape as preditions.
257+
weights: A Tensor of zeros of the same shape as predictions.
258258
"""
259259
del hparams
260260
results = tf.cast(tf.argmax(predictions, axis=-1), tf.uint8)

0 commit comments

Comments
 (0)