diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8f583fc1c4..3812ceb399 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,5 @@ +# Ignore test linting to avoid conflicting changes to version stability. +exclude: ^tests/testdata/ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.1.0 diff --git a/lm_eval/decontamination/decontaminate.py b/lm_eval/decontamination/decontaminate.py index c78d4a6977..ce81446006 100644 --- a/lm_eval/decontamination/decontaminate.py +++ b/lm_eval/decontamination/decontaminate.py @@ -9,6 +9,7 @@ from .janitor import Janitor, word_ngrams from .archiver import ZStdTextReader + # Was used for testing the evaluator decoupled from the full logic below def get_train_overlap_stub(docs, ngrams_path, ngrams_n_size): simulated_overlap = 0.1 diff --git a/lm_eval/decontamination/janitor.py b/lm_eval/decontamination/janitor.py index 64db588fe8..458ee223c2 100644 --- a/lm_eval/decontamination/janitor.py +++ b/lm_eval/decontamination/janitor.py @@ -11,7 +11,7 @@ import janitor_util JANITOR_CPP = True -except Exception as e: +except Exception: print("WARNING: C++ module could not be loaded. Janitor running in python mode") traceback.print_exc() JANITOR_CPP = False diff --git a/lm_eval/tasks/__init__.py b/lm_eval/tasks/__init__.py index 358d4c7002..41b85c10da 100644 --- a/lm_eval/tasks/__init__.py +++ b/lm_eval/tasks/__init__.py @@ -22,14 +22,12 @@ from . import sat from . import arithmetic from . import lambada -from . import race from . import piqa from . import prost from . import mc_taco from . import triviaqa from . import pubmedqa from . import sciq -from . import webqs from . import qasper from . import qa4mre from . import translation @@ -294,7 +292,7 @@ def get_task(task_name): try: return TASK_REGISTRY[task_name] - except KeyError as e: + except KeyError: print("Available tasks:") pprint(TASK_REGISTRY) raise KeyError(f"Missing task {task_name}") diff --git a/lm_eval/tasks/blimp.py b/lm_eval/tasks/blimp.py index 2460b10bfa..356b7f05f7 100644 --- a/lm_eval/tasks/blimp.py +++ b/lm_eval/tasks/blimp.py @@ -28,7 +28,7 @@ eprint = {https://doi.org/10.1162/tacl_a_00321}, abstract = { We introduce The Benchmark of Linguistic Minimal Pairs (BLiMP),1 a challenge set for evaluating the linguistic knowledge of language models (LMs) on major grammatical phenomena in English. BLiMP consists of 67 individual datasets, each containing 1,000 minimal pairs—that is, pairs of minimally different sentences that contrast in grammatical acceptability and isolate specific phenomenon in syntax, morphology, or semantics. We generate the data according to linguist-crafted grammar templates, and human aggregate agreement with the labels is 96.4\%. We evaluate n-gram, LSTM, and Transformer (GPT-2 and Transformer-XL) LMs by observing whether they assign a higher probability to the acceptable sentence in each minimal pair. We find that state-of-the-art models identify morphological contrasts related to agreement reliably, but they struggle with some subtle semantic and syntactic phenomena, such as negative polarity items and extraction islands. } } -""" +""" # noqa: W605 class BlimpTask(Task): diff --git a/lm_eval/tasks/hendrycks_math.py b/lm_eval/tasks/hendrycks_math.py index c805af0f85..6b13f57511 100644 --- a/lm_eval/tasks/hendrycks_math.py +++ b/lm_eval/tasks/hendrycks_math.py @@ -98,7 +98,7 @@ def is_equiv(self, str1, str2, verbose=False): if verbose: print(ss1, ss2) return ss1 == ss2 - except: + except Exception: return str1 == str2 def remove_boxed(self, s): @@ -246,7 +246,7 @@ def strip_string(self, string): # remove percentage string = string.replace("\\%", "") - string = string.replace("\%", "") + string = string.replace("\%", "") # noqa: W605 # " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string string = string.replace(" .", " 0.") diff --git a/lm_eval/tasks/naturalqs.py b/lm_eval/tasks/naturalqs.py index 4a2d526f9e..29e7c8fccc 100644 --- a/lm_eval/tasks/naturalqs.py +++ b/lm_eval/tasks/naturalqs.py @@ -71,7 +71,7 @@ def doc_to_decontamination_query(self, doc): def doc_to_target(self, doc): # There's a short answer and a long answer. Based on the paper, I'm using the long answer. - short_answer = doc["annotations"]["short_answers"][0]["text"] + # short_answer = doc["annotations"]["short_answers"][0]["text"] long_answer_start = doc["annotations"]["long_answer"][0]["start_token"] long_answer_end = doc["annotations"]["long_answer"][0]["end_token"] long_answer_span = doc["document"]["tokens"]["token"][ diff --git a/lm_eval/tasks/qa4mre.py b/lm_eval/tasks/qa4mre.py index 26dbed6f96..682959c9d1 100644 --- a/lm_eval/tasks/qa4mre.py +++ b/lm_eval/tasks/qa4mre.py @@ -23,7 +23,7 @@ booktitle={CLEF}, year={2013} } -""" +""" # noqa: W605 class QA4MRE(MultipleChoiceTask): diff --git a/lm_eval/tasks/squad.py b/lm_eval/tasks/squad.py index 2a987ea7eb..010c87e2a4 100644 --- a/lm_eval/tasks/squad.py +++ b/lm_eval/tasks/squad.py @@ -144,7 +144,7 @@ def process_results(self, doc, results): "f1": ( predictions, references, - ), # The F-score of predicted tokens versus the gold answer + ), # The F-score of predicted tokens versus the gold answer "HasAns_exact": ( predictions, references, @@ -180,7 +180,7 @@ def aggregation(self): ), # Exact match (the normalized answer exactly match the gold answer) "f1": partial( _squad_agg, "f1" - ), # The F-score of predicted tokens versus the gold answer + ), # The F-score of predicted tokens versus the gold answer "HasAns_exact": partial( _squad_agg, "HasAns_exact" ), # Exact match (the normalized answer exactly match the gold answer) @@ -209,7 +209,7 @@ def higher_is_better(self): """ return { "exact": True, # Exact match (the normalized answer exactly match the gold answer) - "f1": True, # The F-score of predicted tokens versus the gold answer + "f1": True, # The F-score of predicted tokens versus the gold answer "HasAns_exact": True, # Exact match (the normalized answer exactly match the gold answer) "HasAns_f1": True, # The F-score of predicted tokens versus the gold answer "NoAns_exact": True, # Exact match (the normalized answer exactly match the gold answer) diff --git a/lm_eval/tasks/truthfulqa.py b/lm_eval/tasks/truthfulqa.py index 4e61e80496..254dc3a210 100644 --- a/lm_eval/tasks/truthfulqa.py +++ b/lm_eval/tasks/truthfulqa.py @@ -390,6 +390,7 @@ def rouge(self, refs, preds): rouge_types = ["rouge1", "rouge2", "rougeLsum"] scorer = rouge_scorer.RougeScorer(rouge_types) # Add newlines between sentences to correctly compute `rougeLsum`. + def _prepare_summary(summary): summary = summary.replace(" . ", ".\n") return summary diff --git a/scripts/clean_training_data/compress_and_package.py b/scripts/clean_training_data/compress_and_package.py index dfa23e42d9..c9e7f2593c 100644 --- a/scripts/clean_training_data/compress_and_package.py +++ b/scripts/clean_training_data/compress_and_package.py @@ -42,8 +42,12 @@ def compress_and_move(working_directory, output_directory, process_count): tasks.append(task) pool = TqdmMultiProcessPool(process_count) - on_done = lambda _: None - on_error = lambda _: None + + def on_done(_): + return None + + def on_error(_): + return None global_progress = tqdm( total=len(bucket_file_paths), dynamic_ncols=True, unit="file" diff --git a/scripts/clean_training_data/investigate_pile.py b/scripts/clean_training_data/investigate_pile.py index dd6bd11d06..dccd3abe70 100644 --- a/scripts/clean_training_data/investigate_pile.py +++ b/scripts/clean_training_data/investigate_pile.py @@ -51,8 +51,12 @@ def get_stats(): # Generate minhashes with pool tasks = [(get_file_stats, (file,)) for file in files] - on_done = lambda _: None - on_error = lambda _: None + def on_done(_): + return None + + def on_error(_): + return None + results = pool.map(global_tqdm, tasks, on_error, on_done) total_documents, total_size = reduce( diff --git a/scripts/clean_training_data/process_sorted_buckets.py b/scripts/clean_training_data/process_sorted_buckets.py index 35b6950b92..1e145f9198 100644 --- a/scripts/clean_training_data/process_sorted_buckets.py +++ b/scripts/clean_training_data/process_sorted_buckets.py @@ -30,12 +30,13 @@ logger = logging.getLogger(__name__) + # Multiprocessed def process_bucket( bucket_file_path, processed_directory, move_dir, tqdm_func, global_tqdm ): - bucket_id = re.sub("\D", "", os.path.basename(bucket_file_path)) + bucket_id = re.sub("\D", "", os.path.basename(bucket_file_path)) # noqa: W605 done_file = os.path.join( processed_directory, f"ngram_bucket_processing_{bucket_id}.done" ) @@ -106,8 +107,13 @@ def process_sorted_buckets(working_directory, move_dir, process_count): ] global_tqdm = tqdm(total=len(bucket_file_paths), dynamic_ncols=True, unit="bucket") - on_done = lambda _: None - on_error = lambda _: None + + def on_done(_): + return None + + def on_error(_): + return None + _ = pool.map(global_tqdm, tasks, on_error, on_done) diff --git a/tests/test_generate_13_grams.py b/tests/test_generate_13_grams.py index 5c6757e67a..7a2e9f41cf 100644 --- a/tests/test_generate_13_grams.py +++ b/tests/test_generate_13_grams.py @@ -3,7 +3,7 @@ import shutil import glob -from lm_eval.decontamination.janitor import * +from lm_eval.decontamination.janitor import Janitor, word_ngrams from scripts.clean_training_data.generate_13_grams import do_ngrams_in_buckets from lm_eval.decontamination.archiver import Archive, TextReader diff --git a/tests/test_janitor.py b/tests/test_janitor.py index 784198825b..b496bfadd1 100644 --- a/tests/test_janitor.py +++ b/tests/test_janitor.py @@ -1,7 +1,13 @@ import re from collections import defaultdict -from lm_eval.decontamination.janitor import * +from lm_eval.decontamination.janitor import ( + Janitor, + form_ngrams, + word_ngrams, + split_indices, + word_ngrams_indices, +) def simple_ngram(sequence, n): diff --git a/tests/testdata/anagrams1-v0-greedy_until b/tests/testdata/anagrams1-v0-greedy_until index 2195ebfbf5..5536425002 100644 --- a/tests/testdata/anagrams1-v0-greedy_until +++ b/tests/testdata/anagrams1-v0-greedy_until @@ -1 +1 @@ -7c0c5246d3f751f39119a5629ac1d4b2c6fd2a315f78d6de9b2c387e24e3fef1 +7c0c5246d3f751f39119a5629ac1d4b2c6fd2a315f78d6de9b2c387e24e3fef1 \ No newline at end of file diff --git a/tests/testdata/anagrams1-v0-res.json b/tests/testdata/anagrams1-v0-res.json index 1dde182b14..c89528892a 100644 --- a/tests/testdata/anagrams1-v0-res.json +++ b/tests/testdata/anagrams1-v0-res.json @@ -1 +1 @@ -{"results": {"anagrams1": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"anagrams1": 0}} +{"results": {"anagrams1": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"anagrams1": 0}} \ No newline at end of file diff --git a/tests/testdata/anagrams2-v0-greedy_until b/tests/testdata/anagrams2-v0-greedy_until index 6349c22e73..9db9d158dc 100644 --- a/tests/testdata/anagrams2-v0-greedy_until +++ b/tests/testdata/anagrams2-v0-greedy_until @@ -1 +1 @@ -6700a3c44e48abe8337238dcbe3b54cf4abafe0c204c52d921e590872fbd05e7 +6700a3c44e48abe8337238dcbe3b54cf4abafe0c204c52d921e590872fbd05e7 \ No newline at end of file diff --git a/tests/testdata/anagrams2-v0-res.json b/tests/testdata/anagrams2-v0-res.json index cdf7e295e6..f74887fe16 100644 --- a/tests/testdata/anagrams2-v0-res.json +++ b/tests/testdata/anagrams2-v0-res.json @@ -1 +1 @@ -{"results": {"anagrams2": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"anagrams2": 0}} +{"results": {"anagrams2": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"anagrams2": 0}} \ No newline at end of file diff --git a/tests/testdata/anli_r1-v0-loglikelihood b/tests/testdata/anli_r1-v0-loglikelihood index b48619eddb..4450c0628e 100644 --- a/tests/testdata/anli_r1-v0-loglikelihood +++ b/tests/testdata/anli_r1-v0-loglikelihood @@ -1 +1 @@ -3a84baf2f170e138c6ce0bc9f06f905def35d705fa2b8781f10c87aef404c4cb +3a84baf2f170e138c6ce0bc9f06f905def35d705fa2b8781f10c87aef404c4cb \ No newline at end of file diff --git a/tests/testdata/anli_r1-v0-res.json b/tests/testdata/anli_r1-v0-res.json index 7b2c32c2b9..b6f6b35018 100644 --- a/tests/testdata/anli_r1-v0-res.json +++ b/tests/testdata/anli_r1-v0-res.json @@ -1 +1 @@ -{"results": {"anli_r1": {"acc": 0.334, "acc_stderr": 0.014922019523732967}}, "versions": {"anli_r1": 0}} +{"results": {"anli_r1": {"acc": 0.334, "acc_stderr": 0.014922019523732967}}, "versions": {"anli_r1": 0}} \ No newline at end of file diff --git a/tests/testdata/anli_r2-v0-loglikelihood b/tests/testdata/anli_r2-v0-loglikelihood index 1f312526f2..4a437fc8a8 100644 --- a/tests/testdata/anli_r2-v0-loglikelihood +++ b/tests/testdata/anli_r2-v0-loglikelihood @@ -1 +1 @@ -d0ea3c3e09d533982c15b4c034439896d6af4bbafb2254d305e20215534a251d +d0ea3c3e09d533982c15b4c034439896d6af4bbafb2254d305e20215534a251d \ No newline at end of file diff --git a/tests/testdata/anli_r2-v0-res.json b/tests/testdata/anli_r2-v0-res.json index 81e68ebb47..6dc08ebbaa 100644 --- a/tests/testdata/anli_r2-v0-res.json +++ b/tests/testdata/anli_r2-v0-res.json @@ -1 +1 @@ -{"results": {"anli_r2": {"acc": 0.356, "acc_stderr": 0.015149042659306628}}, "versions": {"anli_r2": 0}} +{"results": {"anli_r2": {"acc": 0.356, "acc_stderr": 0.015149042659306628}}, "versions": {"anli_r2": 0}} \ No newline at end of file diff --git a/tests/testdata/anli_r3-v0-loglikelihood b/tests/testdata/anli_r3-v0-loglikelihood index 75bd1be817..29d3d67c8b 100644 --- a/tests/testdata/anli_r3-v0-loglikelihood +++ b/tests/testdata/anli_r3-v0-loglikelihood @@ -1 +1 @@ -6b6e5c6a794f2fbff78b7aa24fe0c90156039334bbd1cb34f7af9fc6e6183845 +6b6e5c6a794f2fbff78b7aa24fe0c90156039334bbd1cb34f7af9fc6e6183845 \ No newline at end of file diff --git a/tests/testdata/anli_r3-v0-res.json b/tests/testdata/anli_r3-v0-res.json index b41cf014b7..548dea1e22 100644 --- a/tests/testdata/anli_r3-v0-res.json +++ b/tests/testdata/anli_r3-v0-res.json @@ -1 +1 @@ -{"results": {"anli_r3": {"acc": 0.31916666666666665, "acc_stderr": 0.01346230971200514}}, "versions": {"anli_r3": 0}} +{"results": {"anli_r3": {"acc": 0.31916666666666665, "acc_stderr": 0.01346230971200514}}, "versions": {"anli_r3": 0}} \ No newline at end of file diff --git a/tests/testdata/arc_challenge-v0-loglikelihood b/tests/testdata/arc_challenge-v0-loglikelihood index 9722e1bd9b..91a3560635 100644 --- a/tests/testdata/arc_challenge-v0-loglikelihood +++ b/tests/testdata/arc_challenge-v0-loglikelihood @@ -1 +1 @@ -41c34c96cca8ace661911d0033d630c554b283f5a3953bcdc50720ae6b00a9c1 +41c34c96cca8ace661911d0033d630c554b283f5a3953bcdc50720ae6b00a9c1 \ No newline at end of file diff --git a/tests/testdata/arc_challenge-v0-res.json b/tests/testdata/arc_challenge-v0-res.json index e6b74ebaec..49f34a7306 100644 --- a/tests/testdata/arc_challenge-v0-res.json +++ b/tests/testdata/arc_challenge-v0-res.json @@ -1 +1 @@ -{"results": {"arc_challenge": {"acc": 0.24488054607508533, "acc_norm": 0.2440273037542662, "acc_norm_stderr": 0.012551447627856257, "acc_stderr": 0.012566273985131354}}, "versions": {"arc_challenge": 0}} +{"results": {"arc_challenge": {"acc": 0.24488054607508533, "acc_norm": 0.2440273037542662, "acc_norm_stderr": 0.012551447627856257, "acc_stderr": 0.012566273985131354}}, "versions": {"arc_challenge": 0}} \ No newline at end of file diff --git a/tests/testdata/arc_easy-v0-loglikelihood b/tests/testdata/arc_easy-v0-loglikelihood index 090cb9eb6c..d82be433ab 100644 --- a/tests/testdata/arc_easy-v0-loglikelihood +++ b/tests/testdata/arc_easy-v0-loglikelihood @@ -1 +1 @@ -ffa6e39a35a16299dcb015f17f986aaa598ad8b4840c4cebe0339a7042232741 +ffa6e39a35a16299dcb015f17f986aaa598ad8b4840c4cebe0339a7042232741 \ No newline at end of file diff --git a/tests/testdata/arc_easy-v0-res.json b/tests/testdata/arc_easy-v0-res.json index de15549b7a..f217448594 100644 --- a/tests/testdata/arc_easy-v0-res.json +++ b/tests/testdata/arc_easy-v0-res.json @@ -1 +1 @@ -{"results": {"arc_easy": {"acc": 0.2474747474747475, "acc_norm": 0.24074074074074073, "acc_norm_stderr": 0.008772796145221907, "acc_stderr": 0.008855114414834707}}, "versions": {"arc_easy": 0}} +{"results": {"arc_easy": {"acc": 0.2474747474747475, "acc_norm": 0.24074074074074073, "acc_norm_stderr": 0.008772796145221907, "acc_stderr": 0.008855114414834707}}, "versions": {"arc_easy": 0}} \ No newline at end of file diff --git a/tests/testdata/arithmetic_1dc-v0-loglikelihood b/tests/testdata/arithmetic_1dc-v0-loglikelihood index 4e4b4919ac..01756b4d47 100644 --- a/tests/testdata/arithmetic_1dc-v0-loglikelihood +++ b/tests/testdata/arithmetic_1dc-v0-loglikelihood @@ -1 +1 @@ -04c3a63a6b3c579bd3775d92b3076ba9130041d5ce7cf9244d3f86e95c804387 +04c3a63a6b3c579bd3775d92b3076ba9130041d5ce7cf9244d3f86e95c804387 \ No newline at end of file diff --git a/tests/testdata/arithmetic_1dc-v0-res.json b/tests/testdata/arithmetic_1dc-v0-res.json index 6c8b47d066..29e447d578 100644 --- a/tests/testdata/arithmetic_1dc-v0-res.json +++ b/tests/testdata/arithmetic_1dc-v0-res.json @@ -1 +1 @@ -{"results": {"arithmetic_1dc": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_1dc": 0}} +{"results": {"arithmetic_1dc": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_1dc": 0}} \ No newline at end of file diff --git a/tests/testdata/arithmetic_2da-v0-loglikelihood b/tests/testdata/arithmetic_2da-v0-loglikelihood index 0792aacf28..fd95bb231e 100644 --- a/tests/testdata/arithmetic_2da-v0-loglikelihood +++ b/tests/testdata/arithmetic_2da-v0-loglikelihood @@ -1 +1 @@ -6ca1ca6ebd7cac4420d5005f7f35b0edbc921377f5e4f8874cc176e4fb6d79d4 +6ca1ca6ebd7cac4420d5005f7f35b0edbc921377f5e4f8874cc176e4fb6d79d4 \ No newline at end of file diff --git a/tests/testdata/arithmetic_2da-v0-res.json b/tests/testdata/arithmetic_2da-v0-res.json index 34a9d37d6a..874256a0b8 100644 --- a/tests/testdata/arithmetic_2da-v0-res.json +++ b/tests/testdata/arithmetic_2da-v0-res.json @@ -1 +1 @@ -{"results": {"arithmetic_2da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_2da": 0}} +{"results": {"arithmetic_2da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_2da": 0}} \ No newline at end of file diff --git a/tests/testdata/arithmetic_2dm-v0-loglikelihood b/tests/testdata/arithmetic_2dm-v0-loglikelihood index e9d660a0c7..7b7adaf862 100644 --- a/tests/testdata/arithmetic_2dm-v0-loglikelihood +++ b/tests/testdata/arithmetic_2dm-v0-loglikelihood @@ -1 +1 @@ -14ac5e510cdf82967d6827a9ca059906ee1db2e347be1b17f36403a157e73552 +14ac5e510cdf82967d6827a9ca059906ee1db2e347be1b17f36403a157e73552 \ No newline at end of file diff --git a/tests/testdata/arithmetic_2dm-v0-res.json b/tests/testdata/arithmetic_2dm-v0-res.json index 086c2ce6e1..8fc5d47310 100644 --- a/tests/testdata/arithmetic_2dm-v0-res.json +++ b/tests/testdata/arithmetic_2dm-v0-res.json @@ -1 +1 @@ -{"results": {"arithmetic_2dm": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_2dm": 0}} +{"results": {"arithmetic_2dm": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_2dm": 0}} \ No newline at end of file diff --git a/tests/testdata/arithmetic_2ds-v0-loglikelihood b/tests/testdata/arithmetic_2ds-v0-loglikelihood index ec2090bcb9..28f32c92c6 100644 --- a/tests/testdata/arithmetic_2ds-v0-loglikelihood +++ b/tests/testdata/arithmetic_2ds-v0-loglikelihood @@ -1 +1 @@ -66f7ff3b40251ee38fadcbee658e309a200224356fc3efa07d0a490a2c24bfa3 +66f7ff3b40251ee38fadcbee658e309a200224356fc3efa07d0a490a2c24bfa3 \ No newline at end of file diff --git a/tests/testdata/arithmetic_2ds-v0-res.json b/tests/testdata/arithmetic_2ds-v0-res.json index 79209c8edd..a18e6eec6e 100644 --- a/tests/testdata/arithmetic_2ds-v0-res.json +++ b/tests/testdata/arithmetic_2ds-v0-res.json @@ -1 +1 @@ -{"results": {"arithmetic_2ds": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_2ds": 0}} +{"results": {"arithmetic_2ds": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_2ds": 0}} \ No newline at end of file diff --git a/tests/testdata/arithmetic_3da-v0-loglikelihood b/tests/testdata/arithmetic_3da-v0-loglikelihood index 1048cb0a8a..6c99dece22 100644 --- a/tests/testdata/arithmetic_3da-v0-loglikelihood +++ b/tests/testdata/arithmetic_3da-v0-loglikelihood @@ -1 +1 @@ -c421f9cd5a5001b80e528441da925128177a04db8526ebcdab543a90b33c9ce2 +c421f9cd5a5001b80e528441da925128177a04db8526ebcdab543a90b33c9ce2 \ No newline at end of file diff --git a/tests/testdata/arithmetic_3da-v0-res.json b/tests/testdata/arithmetic_3da-v0-res.json index 596733ef8a..1bbb3eb0c2 100644 --- a/tests/testdata/arithmetic_3da-v0-res.json +++ b/tests/testdata/arithmetic_3da-v0-res.json @@ -1 +1 @@ -{"results": {"arithmetic_3da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_3da": 0}} +{"results": {"arithmetic_3da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_3da": 0}} \ No newline at end of file diff --git a/tests/testdata/arithmetic_3ds-v0-loglikelihood b/tests/testdata/arithmetic_3ds-v0-loglikelihood index ad55700d82..6bc029c520 100644 --- a/tests/testdata/arithmetic_3ds-v0-loglikelihood +++ b/tests/testdata/arithmetic_3ds-v0-loglikelihood @@ -1 +1 @@ -d3d8bad8827d4530945a1d8b3c7589c0235bbed0bc89e7561a6fdac678f6ce5c +d3d8bad8827d4530945a1d8b3c7589c0235bbed0bc89e7561a6fdac678f6ce5c \ No newline at end of file diff --git a/tests/testdata/arithmetic_3ds-v0-res.json b/tests/testdata/arithmetic_3ds-v0-res.json index 1d84cdbd0b..d76cc9bdf5 100644 --- a/tests/testdata/arithmetic_3ds-v0-res.json +++ b/tests/testdata/arithmetic_3ds-v0-res.json @@ -1 +1 @@ -{"results": {"arithmetic_3ds": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_3ds": 0}} +{"results": {"arithmetic_3ds": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_3ds": 0}} \ No newline at end of file diff --git a/tests/testdata/arithmetic_4da-v0-loglikelihood b/tests/testdata/arithmetic_4da-v0-loglikelihood index 4b14518f94..b52790c74b 100644 --- a/tests/testdata/arithmetic_4da-v0-loglikelihood +++ b/tests/testdata/arithmetic_4da-v0-loglikelihood @@ -1 +1 @@ -d3557beb8b9e5704122c2fc6362b11fbe2c3f2f3cb72aed4462b208767c40e01 +d3557beb8b9e5704122c2fc6362b11fbe2c3f2f3cb72aed4462b208767c40e01 \ No newline at end of file diff --git a/tests/testdata/arithmetic_4da-v0-res.json b/tests/testdata/arithmetic_4da-v0-res.json index 698c584bbd..57ce0e3007 100644 --- a/tests/testdata/arithmetic_4da-v0-res.json +++ b/tests/testdata/arithmetic_4da-v0-res.json @@ -1 +1 @@ -{"results": {"arithmetic_4da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_4da": 0}} +{"results": {"arithmetic_4da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_4da": 0}} \ No newline at end of file diff --git a/tests/testdata/arithmetic_4ds-v0-loglikelihood b/tests/testdata/arithmetic_4ds-v0-loglikelihood index 2c5e3bcae3..154cf9c594 100644 --- a/tests/testdata/arithmetic_4ds-v0-loglikelihood +++ b/tests/testdata/arithmetic_4ds-v0-loglikelihood @@ -1 +1 @@ -d915830b8621e66331383bb2ae4c60acebf008e2f94741092ef4c33ea5441037 +d915830b8621e66331383bb2ae4c60acebf008e2f94741092ef4c33ea5441037 \ No newline at end of file diff --git a/tests/testdata/arithmetic_4ds-v0-res.json b/tests/testdata/arithmetic_4ds-v0-res.json index 4408839724..4321db2604 100644 --- a/tests/testdata/arithmetic_4ds-v0-res.json +++ b/tests/testdata/arithmetic_4ds-v0-res.json @@ -1 +1 @@ -{"results": {"arithmetic_4ds": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_4ds": 0}} +{"results": {"arithmetic_4ds": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_4ds": 0}} \ No newline at end of file diff --git a/tests/testdata/arithmetic_5da-v0-loglikelihood b/tests/testdata/arithmetic_5da-v0-loglikelihood index c977e8d084..a751332bc6 100644 --- a/tests/testdata/arithmetic_5da-v0-loglikelihood +++ b/tests/testdata/arithmetic_5da-v0-loglikelihood @@ -1 +1 @@ -49edb1e735660631ea6cc309721e6c0b80b7106a613a6959514852ca48f1130e +49edb1e735660631ea6cc309721e6c0b80b7106a613a6959514852ca48f1130e \ No newline at end of file diff --git a/tests/testdata/arithmetic_5da-v0-res.json b/tests/testdata/arithmetic_5da-v0-res.json index 44816832f5..fb9a5671e8 100644 --- a/tests/testdata/arithmetic_5da-v0-res.json +++ b/tests/testdata/arithmetic_5da-v0-res.json @@ -1 +1 @@ -{"results": {"arithmetic_5da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_5da": 0}} +{"results": {"arithmetic_5da": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_5da": 0}} \ No newline at end of file diff --git a/tests/testdata/arithmetic_5ds-v0-loglikelihood b/tests/testdata/arithmetic_5ds-v0-loglikelihood index d83afd50b3..0f959c21f6 100644 --- a/tests/testdata/arithmetic_5ds-v0-loglikelihood +++ b/tests/testdata/arithmetic_5ds-v0-loglikelihood @@ -1 +1 @@ -2888d6d098a5ef8c1e7f0d8295ba80826e2e04e431f57508dfb71d53e1cd4604 +2888d6d098a5ef8c1e7f0d8295ba80826e2e04e431f57508dfb71d53e1cd4604 \ No newline at end of file diff --git a/tests/testdata/arithmetic_5ds-v0-res.json b/tests/testdata/arithmetic_5ds-v0-res.json index f5580eb045..c7773f373d 100644 --- a/tests/testdata/arithmetic_5ds-v0-res.json +++ b/tests/testdata/arithmetic_5ds-v0-res.json @@ -1 +1 @@ -{"results": {"arithmetic_5ds": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_5ds": 0}} +{"results": {"arithmetic_5ds": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"arithmetic_5ds": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_adjunct_island-v0-loglikelihood b/tests/testdata/blimp_adjunct_island-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_adjunct_island-v0-loglikelihood rename to tests/testdata/blimp_adjunct_island-v0-loglikelihood index 04dcda8865..85f0e8fb2a 100644 --- a/tests/tests/testdata/blimp_adjunct_island-v0-loglikelihood +++ b/tests/testdata/blimp_adjunct_island-v0-loglikelihood @@ -1 +1 @@ -976a5cac4bdb724632eebd4cb9e522203ce3da8d5525288a597c86e80469f3f2 +976a5cac4bdb724632eebd4cb9e522203ce3da8d5525288a597c86e80469f3f2 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_adjunct_island-v0-res.json b/tests/testdata/blimp_adjunct_island-v0-res.json similarity index 99% rename from tests/tests/testdata/blimp_adjunct_island-v0-res.json rename to tests/testdata/blimp_adjunct_island-v0-res.json index 163ce5a628..39e2517bbc 100644 --- a/tests/tests/testdata/blimp_adjunct_island-v0-res.json +++ b/tests/testdata/blimp_adjunct_island-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_adjunct_island": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_adjunct_island": 0}} +{"results": {"blimp_adjunct_island": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_adjunct_island": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_anaphor_gender_agreement-v0-loglikelihood b/tests/testdata/blimp_anaphor_gender_agreement-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_anaphor_gender_agreement-v0-loglikelihood rename to tests/testdata/blimp_anaphor_gender_agreement-v0-loglikelihood index ea9de13a17..32b700ea9e 100644 --- a/tests/tests/testdata/blimp_anaphor_gender_agreement-v0-loglikelihood +++ b/tests/testdata/blimp_anaphor_gender_agreement-v0-loglikelihood @@ -1 +1 @@ -2d8964e56a17661502ecf3f09c0befba63915360ddf2145b0bd845816950515d +2d8964e56a17661502ecf3f09c0befba63915360ddf2145b0bd845816950515d \ No newline at end of file diff --git a/tests/tests/testdata/blimp_anaphor_gender_agreement-v0-res.json b/tests/testdata/blimp_anaphor_gender_agreement-v0-res.json similarity index 85% rename from tests/tests/testdata/blimp_anaphor_gender_agreement-v0-res.json rename to tests/testdata/blimp_anaphor_gender_agreement-v0-res.json index 9a748a2b8c..1c39ab7045 100644 --- a/tests/tests/testdata/blimp_anaphor_gender_agreement-v0-res.json +++ b/tests/testdata/blimp_anaphor_gender_agreement-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_anaphor_gender_agreement": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_anaphor_gender_agreement": 0}} +{"results": {"blimp_anaphor_gender_agreement": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_anaphor_gender_agreement": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_anaphor_number_agreement-v0-loglikelihood b/tests/testdata/blimp_anaphor_number_agreement-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_anaphor_number_agreement-v0-loglikelihood rename to tests/testdata/blimp_anaphor_number_agreement-v0-loglikelihood index 512b36da8e..347570f3a6 100644 --- a/tests/tests/testdata/blimp_anaphor_number_agreement-v0-loglikelihood +++ b/tests/testdata/blimp_anaphor_number_agreement-v0-loglikelihood @@ -1 +1 @@ -0bdad31c974ba064e1f1ba931841ec2ba7461e8b0ca54ea5f79f08b6bae0bab5 +0bdad31c974ba064e1f1ba931841ec2ba7461e8b0ca54ea5f79f08b6bae0bab5 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_anaphor_number_agreement-v0-res.json b/tests/testdata/blimp_anaphor_number_agreement-v0-res.json similarity index 85% rename from tests/tests/testdata/blimp_anaphor_number_agreement-v0-res.json rename to tests/testdata/blimp_anaphor_number_agreement-v0-res.json index 5391c08491..68bbe21379 100644 --- a/tests/tests/testdata/blimp_anaphor_number_agreement-v0-res.json +++ b/tests/testdata/blimp_anaphor_number_agreement-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_anaphor_number_agreement": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_anaphor_number_agreement": 0}} +{"results": {"blimp_anaphor_number_agreement": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_anaphor_number_agreement": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_animate_subject_passive-v0-loglikelihood b/tests/testdata/blimp_animate_subject_passive-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_animate_subject_passive-v0-loglikelihood rename to tests/testdata/blimp_animate_subject_passive-v0-loglikelihood index 719a6eb0f8..47cd3d3be1 100644 --- a/tests/tests/testdata/blimp_animate_subject_passive-v0-loglikelihood +++ b/tests/testdata/blimp_animate_subject_passive-v0-loglikelihood @@ -1 +1 @@ -064c38fcd072b8bd12f54ea4f8e41599ed4e11dc386e93b77e1fc07967d1f960 +064c38fcd072b8bd12f54ea4f8e41599ed4e11dc386e93b77e1fc07967d1f960 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_animate_subject_passive-v0-res.json b/tests/testdata/blimp_animate_subject_passive-v0-res.json similarity index 87% rename from tests/tests/testdata/blimp_animate_subject_passive-v0-res.json rename to tests/testdata/blimp_animate_subject_passive-v0-res.json index 6c0d6d45ea..96a7ed5e2a 100644 --- a/tests/tests/testdata/blimp_animate_subject_passive-v0-res.json +++ b/tests/testdata/blimp_animate_subject_passive-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_animate_subject_passive": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_animate_subject_passive": 0}} +{"results": {"blimp_animate_subject_passive": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_animate_subject_passive": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_animate_subject_trans-v0-loglikelihood b/tests/testdata/blimp_animate_subject_trans-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_animate_subject_trans-v0-loglikelihood rename to tests/testdata/blimp_animate_subject_trans-v0-loglikelihood index 70cff68a1a..07106a9058 100644 --- a/tests/tests/testdata/blimp_animate_subject_trans-v0-loglikelihood +++ b/tests/testdata/blimp_animate_subject_trans-v0-loglikelihood @@ -1 +1 @@ -2a84231e7b79f517427e57e2099c88fed3d60a7efab4ef9506e263b4091d5cfa +2a84231e7b79f517427e57e2099c88fed3d60a7efab4ef9506e263b4091d5cfa \ No newline at end of file diff --git a/tests/tests/testdata/blimp_animate_subject_trans-v0-res.json b/tests/testdata/blimp_animate_subject_trans-v0-res.json similarity index 89% rename from tests/tests/testdata/blimp_animate_subject_trans-v0-res.json rename to tests/testdata/blimp_animate_subject_trans-v0-res.json index de987875a5..480cf29a4d 100644 --- a/tests/tests/testdata/blimp_animate_subject_trans-v0-res.json +++ b/tests/testdata/blimp_animate_subject_trans-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_animate_subject_trans": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_animate_subject_trans": 0}} +{"results": {"blimp_animate_subject_trans": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_animate_subject_trans": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_causative-v0-loglikelihood b/tests/testdata/blimp_causative-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_causative-v0-loglikelihood rename to tests/testdata/blimp_causative-v0-loglikelihood index 475d483914..5a0f6a3559 100644 --- a/tests/tests/testdata/blimp_causative-v0-loglikelihood +++ b/tests/testdata/blimp_causative-v0-loglikelihood @@ -1 +1 @@ -3d67ad025185dbb0808ebd7f508edcb5750c18fc3c01ad91f20fda80780c916c +3d67ad025185dbb0808ebd7f508edcb5750c18fc3c01ad91f20fda80780c916c \ No newline at end of file diff --git a/tests/tests/testdata/blimp_causative-v0-res.json b/tests/testdata/blimp_causative-v0-res.json similarity index 53% rename from tests/tests/testdata/blimp_causative-v0-res.json rename to tests/testdata/blimp_causative-v0-res.json index d84658aed6..90dc95da81 100644 --- a/tests/tests/testdata/blimp_causative-v0-res.json +++ b/tests/testdata/blimp_causative-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_causative": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_causative": 0}} +{"results": {"blimp_causative": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_causative": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_complex_NP_island-v0-loglikelihood b/tests/testdata/blimp_complex_NP_island-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_complex_NP_island-v0-loglikelihood rename to tests/testdata/blimp_complex_NP_island-v0-loglikelihood index cbc8ba8117..3a6d0875c6 100644 --- a/tests/tests/testdata/blimp_complex_NP_island-v0-loglikelihood +++ b/tests/testdata/blimp_complex_NP_island-v0-loglikelihood @@ -1 +1 @@ -f46cfcc7e43050a235fd2a6b989cabbfbcce76786df74db9f0d4a9cd1caa1628 +f46cfcc7e43050a235fd2a6b989cabbfbcce76786df74db9f0d4a9cd1caa1628 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_complex_NP_island-v0-res.json b/tests/testdata/blimp_complex_NP_island-v0-res.json similarity index 94% rename from tests/tests/testdata/blimp_complex_NP_island-v0-res.json rename to tests/testdata/blimp_complex_NP_island-v0-res.json index 86754d2f25..5bfbffb6e4 100644 --- a/tests/tests/testdata/blimp_complex_NP_island-v0-res.json +++ b/tests/testdata/blimp_complex_NP_island-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_complex_NP_island": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_complex_NP_island": 0}} +{"results": {"blimp_complex_NP_island": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_complex_NP_island": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood b/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood rename to tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood index 742e7e16cd..8970b32aff 100644 --- a/tests/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood +++ b/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-loglikelihood @@ -1 +1 @@ -7e1cc5b9f71abfbe56c4bdf343a1e5632785b66a986b8e904a41ed8f45a2c33e +7e1cc5b9f71abfbe56c4bdf343a1e5632785b66a986b8e904a41ed8f45a2c33e \ No newline at end of file diff --git a/tests/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-res.json b/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-res.json similarity index 94% rename from tests/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-res.json rename to tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-res.json index b7807d77fa..2750fcda2a 100644 --- a/tests/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-res.json +++ b/tests/testdata/blimp_coordinate_structure_constraint_complex_left_branch-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_coordinate_structure_constraint_complex_left_branch": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_coordinate_structure_constraint_complex_left_branch": 0}} +{"results": {"blimp_coordinate_structure_constraint_complex_left_branch": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_coordinate_structure_constraint_complex_left_branch": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood b/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood rename to tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood index 5f64d037ca..f1edb69cb1 100644 --- a/tests/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood +++ b/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-loglikelihood @@ -1 +1 @@ -23ddafdff7b1ebe331b146e23b2c21aa109fe57aa1ce8ca201a0d239fcbdd166 +23ddafdff7b1ebe331b146e23b2c21aa109fe57aa1ce8ca201a0d239fcbdd166 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-res.json b/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-res.json similarity index 96% rename from tests/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-res.json rename to tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-res.json index 271d758092..80f2c6a7a0 100644 --- a/tests/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-res.json +++ b/tests/testdata/blimp_coordinate_structure_constraint_object_extraction-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_coordinate_structure_constraint_object_extraction": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_coordinate_structure_constraint_object_extraction": 0}} +{"results": {"blimp_coordinate_structure_constraint_object_extraction": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_coordinate_structure_constraint_object_extraction": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_1-v0-loglikelihood b/tests/testdata/blimp_determiner_noun_agreement_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_determiner_noun_agreement_1-v0-loglikelihood rename to tests/testdata/blimp_determiner_noun_agreement_1-v0-loglikelihood index 6f7397c31c..5fe9e64bc6 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_1-v0-loglikelihood +++ b/tests/testdata/blimp_determiner_noun_agreement_1-v0-loglikelihood @@ -1 +1 @@ -2df8cc7f17089f7e8c7d974dcb324c809d30ef059a5be22aed6b69f44230809f +2df8cc7f17089f7e8c7d974dcb324c809d30ef059a5be22aed6b69f44230809f \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_1-v0-res.json b/tests/testdata/blimp_determiner_noun_agreement_1-v0-res.json similarity index 82% rename from tests/tests/testdata/blimp_determiner_noun_agreement_1-v0-res.json rename to tests/testdata/blimp_determiner_noun_agreement_1-v0-res.json index c1d74c8373..a245755067 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_1-v0-res.json +++ b/tests/testdata/blimp_determiner_noun_agreement_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_determiner_noun_agreement_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_1": 0}} +{"results": {"blimp_determiner_noun_agreement_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_2-v0-loglikelihood b/tests/testdata/blimp_determiner_noun_agreement_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_determiner_noun_agreement_2-v0-loglikelihood rename to tests/testdata/blimp_determiner_noun_agreement_2-v0-loglikelihood index 4fa30bc200..72ab237e58 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_2-v0-loglikelihood +++ b/tests/testdata/blimp_determiner_noun_agreement_2-v0-loglikelihood @@ -1 +1 @@ -123e2acd00fbba60aba1fbae607c79a062e512c9e79c7d8dfafff63e30111d76 +123e2acd00fbba60aba1fbae607c79a062e512c9e79c7d8dfafff63e30111d76 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_2-v0-res.json b/tests/testdata/blimp_determiner_noun_agreement_2-v0-res.json similarity index 82% rename from tests/tests/testdata/blimp_determiner_noun_agreement_2-v0-res.json rename to tests/testdata/blimp_determiner_noun_agreement_2-v0-res.json index 53b0dceb6c..bc2dc6e1ed 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_2-v0-res.json +++ b/tests/testdata/blimp_determiner_noun_agreement_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_determiner_noun_agreement_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_2": 0}} +{"results": {"blimp_determiner_noun_agreement_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood b/tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood rename to tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood index 650d52cc4e..f808af4605 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood +++ b/tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-loglikelihood @@ -1 +1 @@ -7fab9f02e71a224ae7931aa77f8a9a61d887a7480756adc965d4746e97fb04a5 +7fab9f02e71a224ae7931aa77f8a9a61d887a7480756adc965d4746e97fb04a5 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-res.json b/tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-res.json similarity index 73% rename from tests/tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-res.json rename to tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-res.json index 3921ff3b88..8caeecf43d 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-res.json +++ b/tests/testdata/blimp_determiner_noun_agreement_irregular_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_determiner_noun_agreement_irregular_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_irregular_1": 0}} +{"results": {"blimp_determiner_noun_agreement_irregular_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_irregular_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood b/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood rename to tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood index a89c854b6e..12a4ebe1d2 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood +++ b/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-loglikelihood @@ -1 +1 @@ -ddb24ddfaebe076b3aa7107937d71bf5f4503a78283bc889e39200368603681e +ddb24ddfaebe076b3aa7107937d71bf5f4503a78283bc889e39200368603681e \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-res.json b/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-res.json similarity index 73% rename from tests/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-res.json rename to tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-res.json index ab47b016e6..c04ead4577 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-res.json +++ b/tests/testdata/blimp_determiner_noun_agreement_irregular_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_determiner_noun_agreement_irregular_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_irregular_2": 0}} +{"results": {"blimp_determiner_noun_agreement_irregular_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_irregular_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood b/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood rename to tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood index 6cf9e371c6..a260838746 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood +++ b/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-loglikelihood @@ -1 +1 @@ -95acb74fac7d57ae2c9d208361a5f8ad36b0b19a055f02e648ed8e99505f4b43 +95acb74fac7d57ae2c9d208361a5f8ad36b0b19a055f02e648ed8e99505f4b43 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-res.json b/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-res.json similarity index 73% rename from tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-res.json rename to tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-res.json index 831302ad4a..67ea47559d 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-res.json +++ b/tests/testdata/blimp_determiner_noun_agreement_with_adj_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_determiner_noun_agreement_with_adj_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_with_adj_2": 0}} +{"results": {"blimp_determiner_noun_agreement_with_adj_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_with_adj_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood b/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood rename to tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood index 9d47dd4928..6756cc4020 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood +++ b/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-loglikelihood @@ -1 +1 @@ -ad61c619aa79433d02f1aeacde2ab87291fd5d5c370032c24d41c4f0065ed1f9 +ad61c619aa79433d02f1aeacde2ab87291fd5d5c370032c24d41c4f0065ed1f9 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-res.json b/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-res.json similarity index 99% rename from tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-res.json rename to tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-res.json index 765d2cf8ea..defc3560d9 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-res.json +++ b/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_determiner_noun_agreement_with_adj_irregular_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_with_adj_irregular_1": 0}} +{"results": {"blimp_determiner_noun_agreement_with_adj_irregular_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_with_adj_irregular_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood b/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood rename to tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood index e6e2a4497d..13176ac613 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood +++ b/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-loglikelihood @@ -1 +1 @@ -ccc64b4d5e80c081d5161aae5828212ba49d277ca8c5a4281f181744727a6a99 +ccc64b4d5e80c081d5161aae5828212ba49d277ca8c5a4281f181744727a6a99 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-res.json b/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-res.json similarity index 99% rename from tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-res.json rename to tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-res.json index 7b57aaaed2..276f03f76d 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-res.json +++ b/tests/testdata/blimp_determiner_noun_agreement_with_adj_irregular_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_determiner_noun_agreement_with_adj_irregular_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_with_adj_irregular_2": 0}} +{"results": {"blimp_determiner_noun_agreement_with_adj_irregular_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_with_adj_irregular_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood b/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood rename to tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood index 9f139b4b16..d765bb5906 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood +++ b/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-loglikelihood @@ -1 +1 @@ -007c47e5fbf88119c5180feef75e1345d448e56adcd4c7ab2d52fb8d67350d34 +007c47e5fbf88119c5180feef75e1345d448e56adcd4c7ab2d52fb8d67350d34 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-res.json b/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-res.json similarity index 69% rename from tests/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-res.json rename to tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-res.json index 3f8cb59ad3..66b30be1b8 100644 --- a/tests/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-res.json +++ b/tests/testdata/blimp_determiner_noun_agreement_with_adjective_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_determiner_noun_agreement_with_adjective_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_with_adjective_1": 0}} +{"results": {"blimp_determiner_noun_agreement_with_adjective_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_determiner_noun_agreement_with_adjective_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_distractor_agreement_relational_noun-v0-loglikelihood b/tests/testdata/blimp_distractor_agreement_relational_noun-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_distractor_agreement_relational_noun-v0-loglikelihood rename to tests/testdata/blimp_distractor_agreement_relational_noun-v0-loglikelihood index cdf9c7b657..f926cf3d4b 100644 --- a/tests/tests/testdata/blimp_distractor_agreement_relational_noun-v0-loglikelihood +++ b/tests/testdata/blimp_distractor_agreement_relational_noun-v0-loglikelihood @@ -1 +1 @@ -8aab641bd5933f84f46a14f5c1208a3c855cace7e67b44abcd5aff8fec96717d +8aab641bd5933f84f46a14f5c1208a3c855cace7e67b44abcd5aff8fec96717d \ No newline at end of file diff --git a/tests/tests/testdata/blimp_distractor_agreement_relational_noun-v0-res.json b/tests/testdata/blimp_distractor_agreement_relational_noun-v0-res.json similarity index 73% rename from tests/tests/testdata/blimp_distractor_agreement_relational_noun-v0-res.json rename to tests/testdata/blimp_distractor_agreement_relational_noun-v0-res.json index cb2599b9e0..d8ce0672c2 100644 --- a/tests/tests/testdata/blimp_distractor_agreement_relational_noun-v0-res.json +++ b/tests/testdata/blimp_distractor_agreement_relational_noun-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_distractor_agreement_relational_noun": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_distractor_agreement_relational_noun": 0}} +{"results": {"blimp_distractor_agreement_relational_noun": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_distractor_agreement_relational_noun": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_distractor_agreement_relative_clause-v0-loglikelihood b/tests/testdata/blimp_distractor_agreement_relative_clause-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_distractor_agreement_relative_clause-v0-loglikelihood rename to tests/testdata/blimp_distractor_agreement_relative_clause-v0-loglikelihood index d1057c6947..1fddc2190c 100644 --- a/tests/tests/testdata/blimp_distractor_agreement_relative_clause-v0-loglikelihood +++ b/tests/testdata/blimp_distractor_agreement_relative_clause-v0-loglikelihood @@ -1 +1 @@ -bf78e2b53c0f3531303c668c96bd3897a0a35e960da37439e63724ecba4e371a +bf78e2b53c0f3531303c668c96bd3897a0a35e960da37439e63724ecba4e371a \ No newline at end of file diff --git a/tests/tests/testdata/blimp_distractor_agreement_relative_clause-v0-res.json b/tests/testdata/blimp_distractor_agreement_relative_clause-v0-res.json similarity index 73% rename from tests/tests/testdata/blimp_distractor_agreement_relative_clause-v0-res.json rename to tests/testdata/blimp_distractor_agreement_relative_clause-v0-res.json index f892031db4..cf08b036b9 100644 --- a/tests/tests/testdata/blimp_distractor_agreement_relative_clause-v0-res.json +++ b/tests/testdata/blimp_distractor_agreement_relative_clause-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_distractor_agreement_relative_clause": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_distractor_agreement_relative_clause": 0}} +{"results": {"blimp_distractor_agreement_relative_clause": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_distractor_agreement_relative_clause": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_drop_argument-v0-loglikelihood b/tests/testdata/blimp_drop_argument-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_drop_argument-v0-loglikelihood rename to tests/testdata/blimp_drop_argument-v0-loglikelihood index 616b099d98..1d6bea95e1 100644 --- a/tests/tests/testdata/blimp_drop_argument-v0-loglikelihood +++ b/tests/testdata/blimp_drop_argument-v0-loglikelihood @@ -1 +1 @@ -616109e63f162dcd31a632943e7ef0c9e0431afeb179e83e9b04b39007b16f5b +616109e63f162dcd31a632943e7ef0c9e0431afeb179e83e9b04b39007b16f5b \ No newline at end of file diff --git a/tests/tests/testdata/blimp_drop_argument-v0-res.json b/tests/testdata/blimp_drop_argument-v0-res.json similarity index 50% rename from tests/tests/testdata/blimp_drop_argument-v0-res.json rename to tests/testdata/blimp_drop_argument-v0-res.json index ed70ea9350..853a4d2f92 100644 --- a/tests/tests/testdata/blimp_drop_argument-v0-res.json +++ b/tests/testdata/blimp_drop_argument-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_drop_argument": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_drop_argument": 0}} +{"results": {"blimp_drop_argument": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_drop_argument": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_ellipsis_n_bar_1-v0-loglikelihood b/tests/testdata/blimp_ellipsis_n_bar_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_ellipsis_n_bar_1-v0-loglikelihood rename to tests/testdata/blimp_ellipsis_n_bar_1-v0-loglikelihood index 4fc62005c5..611211bec0 100644 --- a/tests/tests/testdata/blimp_ellipsis_n_bar_1-v0-loglikelihood +++ b/tests/testdata/blimp_ellipsis_n_bar_1-v0-loglikelihood @@ -1 +1 @@ -d14e4b7fcdd68991eb39b9cf3ade4b37dee9ddd39b688f861d81a327e47a969f +d14e4b7fcdd68991eb39b9cf3ade4b37dee9ddd39b688f861d81a327e47a969f \ No newline at end of file diff --git a/tests/tests/testdata/blimp_ellipsis_n_bar_1-v0-res.json b/tests/testdata/blimp_ellipsis_n_bar_1-v0-res.json similarity index 96% rename from tests/tests/testdata/blimp_ellipsis_n_bar_1-v0-res.json rename to tests/testdata/blimp_ellipsis_n_bar_1-v0-res.json index 6169eef26c..82f320ce8f 100644 --- a/tests/tests/testdata/blimp_ellipsis_n_bar_1-v0-res.json +++ b/tests/testdata/blimp_ellipsis_n_bar_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_ellipsis_n_bar_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_ellipsis_n_bar_1": 0}} +{"results": {"blimp_ellipsis_n_bar_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_ellipsis_n_bar_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_ellipsis_n_bar_2-v0-loglikelihood b/tests/testdata/blimp_ellipsis_n_bar_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_ellipsis_n_bar_2-v0-loglikelihood rename to tests/testdata/blimp_ellipsis_n_bar_2-v0-loglikelihood index 3675cffd8b..1005f68060 100644 --- a/tests/tests/testdata/blimp_ellipsis_n_bar_2-v0-loglikelihood +++ b/tests/testdata/blimp_ellipsis_n_bar_2-v0-loglikelihood @@ -1 +1 @@ -0523771a217759f0b22b89807694ee7f6381ce98a584b1fd070ba96194a3273b +0523771a217759f0b22b89807694ee7f6381ce98a584b1fd070ba96194a3273b \ No newline at end of file diff --git a/tests/tests/testdata/blimp_ellipsis_n_bar_2-v0-res.json b/tests/testdata/blimp_ellipsis_n_bar_2-v0-res.json similarity index 96% rename from tests/tests/testdata/blimp_ellipsis_n_bar_2-v0-res.json rename to tests/testdata/blimp_ellipsis_n_bar_2-v0-res.json index f30b7b4704..5b721ca152 100644 --- a/tests/tests/testdata/blimp_ellipsis_n_bar_2-v0-res.json +++ b/tests/testdata/blimp_ellipsis_n_bar_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_ellipsis_n_bar_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_ellipsis_n_bar_2": 0}} +{"results": {"blimp_ellipsis_n_bar_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_ellipsis_n_bar_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_existential_there_object_raising-v0-loglikelihood b/tests/testdata/blimp_existential_there_object_raising-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_existential_there_object_raising-v0-loglikelihood rename to tests/testdata/blimp_existential_there_object_raising-v0-loglikelihood index e1599102ba..d23fba902a 100644 --- a/tests/tests/testdata/blimp_existential_there_object_raising-v0-loglikelihood +++ b/tests/testdata/blimp_existential_there_object_raising-v0-loglikelihood @@ -1 +1 @@ -63567712076256f373131971676c1c6d711efef73cd0e4de3cc639bc631a2413 +63567712076256f373131971676c1c6d711efef73cd0e4de3cc639bc631a2413 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_existential_there_object_raising-v0-res.json b/tests/testdata/blimp_existential_there_object_raising-v0-res.json similarity index 77% rename from tests/tests/testdata/blimp_existential_there_object_raising-v0-res.json rename to tests/testdata/blimp_existential_there_object_raising-v0-res.json index 4556caf48b..da3deb1aaf 100644 --- a/tests/tests/testdata/blimp_existential_there_object_raising-v0-res.json +++ b/tests/testdata/blimp_existential_there_object_raising-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_existential_there_object_raising": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_existential_there_object_raising": 0}} +{"results": {"blimp_existential_there_object_raising": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_existential_there_object_raising": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_existential_there_quantifiers_1-v0-loglikelihood b/tests/testdata/blimp_existential_there_quantifiers_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_existential_there_quantifiers_1-v0-loglikelihood rename to tests/testdata/blimp_existential_there_quantifiers_1-v0-loglikelihood index c288c7bb55..7697713f85 100644 --- a/tests/tests/testdata/blimp_existential_there_quantifiers_1-v0-loglikelihood +++ b/tests/testdata/blimp_existential_there_quantifiers_1-v0-loglikelihood @@ -1 +1 @@ -d77594382e6d9af31a8b8ef00ba1ef6c29d6be6d0ddb7a9c27ef25ace654e05a +d77594382e6d9af31a8b8ef00ba1ef6c29d6be6d0ddb7a9c27ef25ace654e05a \ No newline at end of file diff --git a/tests/tests/testdata/blimp_existential_there_quantifiers_1-v0-res.json b/tests/testdata/blimp_existential_there_quantifiers_1-v0-res.json similarity index 78% rename from tests/tests/testdata/blimp_existential_there_quantifiers_1-v0-res.json rename to tests/testdata/blimp_existential_there_quantifiers_1-v0-res.json index 99c821bab2..076319f01e 100644 --- a/tests/tests/testdata/blimp_existential_there_quantifiers_1-v0-res.json +++ b/tests/testdata/blimp_existential_there_quantifiers_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_existential_there_quantifiers_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_existential_there_quantifiers_1": 0}} +{"results": {"blimp_existential_there_quantifiers_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_existential_there_quantifiers_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_existential_there_quantifiers_2-v0-loglikelihood b/tests/testdata/blimp_existential_there_quantifiers_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_existential_there_quantifiers_2-v0-loglikelihood rename to tests/testdata/blimp_existential_there_quantifiers_2-v0-loglikelihood index 0b8d9be879..4b1a428c4d 100644 --- a/tests/tests/testdata/blimp_existential_there_quantifiers_2-v0-loglikelihood +++ b/tests/testdata/blimp_existential_there_quantifiers_2-v0-loglikelihood @@ -1 +1 @@ -6e6add7baff4217f383425bef58288202018e041b24084edcaa5df8af08f820c +6e6add7baff4217f383425bef58288202018e041b24084edcaa5df8af08f820c \ No newline at end of file diff --git a/tests/tests/testdata/blimp_existential_there_quantifiers_2-v0-res.json b/tests/testdata/blimp_existential_there_quantifiers_2-v0-res.json similarity index 78% rename from tests/tests/testdata/blimp_existential_there_quantifiers_2-v0-res.json rename to tests/testdata/blimp_existential_there_quantifiers_2-v0-res.json index 80c6fbb97f..b8500d68b5 100644 --- a/tests/tests/testdata/blimp_existential_there_quantifiers_2-v0-res.json +++ b/tests/testdata/blimp_existential_there_quantifiers_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_existential_there_quantifiers_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_existential_there_quantifiers_2": 0}} +{"results": {"blimp_existential_there_quantifiers_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_existential_there_quantifiers_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_existential_there_subject_raising-v0-loglikelihood b/tests/testdata/blimp_existential_there_subject_raising-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_existential_there_subject_raising-v0-loglikelihood rename to tests/testdata/blimp_existential_there_subject_raising-v0-loglikelihood index 54aa63c903..925e5b4680 100644 --- a/tests/tests/testdata/blimp_existential_there_subject_raising-v0-loglikelihood +++ b/tests/testdata/blimp_existential_there_subject_raising-v0-loglikelihood @@ -1 +1 @@ -9b324b28ae3e1b5d49ecf4b7b2a16c7bbc8ff38d000cf216fab75df633da2084 +9b324b28ae3e1b5d49ecf4b7b2a16c7bbc8ff38d000cf216fab75df633da2084 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_existential_there_subject_raising-v0-res.json b/tests/testdata/blimp_existential_there_subject_raising-v0-res.json similarity index 76% rename from tests/tests/testdata/blimp_existential_there_subject_raising-v0-res.json rename to tests/testdata/blimp_existential_there_subject_raising-v0-res.json index e7483274c8..00c913dcd3 100644 --- a/tests/tests/testdata/blimp_existential_there_subject_raising-v0-res.json +++ b/tests/testdata/blimp_existential_there_subject_raising-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_existential_there_subject_raising": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_existential_there_subject_raising": 0}} +{"results": {"blimp_existential_there_subject_raising": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_existential_there_subject_raising": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_expletive_it_object_raising-v0-loglikelihood b/tests/testdata/blimp_expletive_it_object_raising-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_expletive_it_object_raising-v0-loglikelihood rename to tests/testdata/blimp_expletive_it_object_raising-v0-loglikelihood index 236950e3aa..31772c9a1c 100644 --- a/tests/tests/testdata/blimp_expletive_it_object_raising-v0-loglikelihood +++ b/tests/testdata/blimp_expletive_it_object_raising-v0-loglikelihood @@ -1 +1 @@ -ceede5b38248a62125a74a8332602b8eac5ef40864f071ad8d86e7971e07219d +ceede5b38248a62125a74a8332602b8eac5ef40864f071ad8d86e7971e07219d \ No newline at end of file diff --git a/tests/tests/testdata/blimp_expletive_it_object_raising-v0-res.json b/tests/testdata/blimp_expletive_it_object_raising-v0-res.json similarity index 82% rename from tests/tests/testdata/blimp_expletive_it_object_raising-v0-res.json rename to tests/testdata/blimp_expletive_it_object_raising-v0-res.json index 808b3f9b8d..735dc09826 100644 --- a/tests/tests/testdata/blimp_expletive_it_object_raising-v0-res.json +++ b/tests/testdata/blimp_expletive_it_object_raising-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_expletive_it_object_raising": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_expletive_it_object_raising": 0}} +{"results": {"blimp_expletive_it_object_raising": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_expletive_it_object_raising": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_inchoative-v0-loglikelihood b/tests/testdata/blimp_inchoative-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_inchoative-v0-loglikelihood rename to tests/testdata/blimp_inchoative-v0-loglikelihood index 26f8f1fcb9..b494980087 100644 --- a/tests/tests/testdata/blimp_inchoative-v0-loglikelihood +++ b/tests/testdata/blimp_inchoative-v0-loglikelihood @@ -1 +1 @@ -3ff73629fb4473986a0e8ae2fcb7c40e88292189ab0d8755d20836c5aa5a2f99 +3ff73629fb4473986a0e8ae2fcb7c40e88292189ab0d8755d20836c5aa5a2f99 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_inchoative-v0-res.json b/tests/testdata/blimp_inchoative-v0-res.json similarity index 52% rename from tests/tests/testdata/blimp_inchoative-v0-res.json rename to tests/testdata/blimp_inchoative-v0-res.json index 2f0cc1a7ca..8d1b39c2d4 100644 --- a/tests/tests/testdata/blimp_inchoative-v0-res.json +++ b/tests/testdata/blimp_inchoative-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_inchoative": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_inchoative": 0}} +{"results": {"blimp_inchoative": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_inchoative": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_intransitive-v0-loglikelihood b/tests/testdata/blimp_intransitive-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_intransitive-v0-loglikelihood rename to tests/testdata/blimp_intransitive-v0-loglikelihood index 8dd8c4f2c6..b16238545d 100644 --- a/tests/tests/testdata/blimp_intransitive-v0-loglikelihood +++ b/tests/testdata/blimp_intransitive-v0-loglikelihood @@ -1 +1 @@ -6469ae3b0d46b008846b5fd132f2d2b26ea2858745d056df1470b89aa97a790f +6469ae3b0d46b008846b5fd132f2d2b26ea2858745d056df1470b89aa97a790f \ No newline at end of file diff --git a/tests/tests/testdata/blimp_intransitive-v0-res.json b/tests/testdata/blimp_intransitive-v0-res.json similarity index 51% rename from tests/tests/testdata/blimp_intransitive-v0-res.json rename to tests/testdata/blimp_intransitive-v0-res.json index d4dc91ae4b..d5b2f91179 100644 --- a/tests/tests/testdata/blimp_intransitive-v0-res.json +++ b/tests/testdata/blimp_intransitive-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_intransitive": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_intransitive": 0}} +{"results": {"blimp_intransitive": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_intransitive": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_irregular_past_participle_adjectives-v0-loglikelihood b/tests/testdata/blimp_irregular_past_participle_adjectives-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_irregular_past_participle_adjectives-v0-loglikelihood rename to tests/testdata/blimp_irregular_past_participle_adjectives-v0-loglikelihood index 008745c366..a030be1d72 100644 --- a/tests/tests/testdata/blimp_irregular_past_participle_adjectives-v0-loglikelihood +++ b/tests/testdata/blimp_irregular_past_participle_adjectives-v0-loglikelihood @@ -1 +1 @@ -47c56f336df11924d8b97feb46339ce55bea4b216b6fd13946cc999ea36a4a95 +47c56f336df11924d8b97feb46339ce55bea4b216b6fd13946cc999ea36a4a95 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_irregular_past_participle_adjectives-v0-res.json b/tests/testdata/blimp_irregular_past_participle_adjectives-v0-res.json similarity index 73% rename from tests/tests/testdata/blimp_irregular_past_participle_adjectives-v0-res.json rename to tests/testdata/blimp_irregular_past_participle_adjectives-v0-res.json index 5819ba6a6f..e3b8718ff8 100644 --- a/tests/tests/testdata/blimp_irregular_past_participle_adjectives-v0-res.json +++ b/tests/testdata/blimp_irregular_past_participle_adjectives-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_irregular_past_participle_adjectives": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_irregular_past_participle_adjectives": 0}} +{"results": {"blimp_irregular_past_participle_adjectives": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_irregular_past_participle_adjectives": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_irregular_past_participle_verbs-v0-loglikelihood b/tests/testdata/blimp_irregular_past_participle_verbs-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_irregular_past_participle_verbs-v0-loglikelihood rename to tests/testdata/blimp_irregular_past_participle_verbs-v0-loglikelihood index 5c01c94ef7..1ff9f6b991 100644 --- a/tests/tests/testdata/blimp_irregular_past_participle_verbs-v0-loglikelihood +++ b/tests/testdata/blimp_irregular_past_participle_verbs-v0-loglikelihood @@ -1 +1 @@ -63ec733873f94ace71cb34112d1c3cd5bb768c26b975fb90acc9b8ba3f4e938e +63ec733873f94ace71cb34112d1c3cd5bb768c26b975fb90acc9b8ba3f4e938e \ No newline at end of file diff --git a/tests/tests/testdata/blimp_irregular_past_participle_verbs-v0-res.json b/tests/testdata/blimp_irregular_past_participle_verbs-v0-res.json similarity index 78% rename from tests/tests/testdata/blimp_irregular_past_participle_verbs-v0-res.json rename to tests/testdata/blimp_irregular_past_participle_verbs-v0-res.json index 13a6167484..94d73d41da 100644 --- a/tests/tests/testdata/blimp_irregular_past_participle_verbs-v0-res.json +++ b/tests/testdata/blimp_irregular_past_participle_verbs-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_irregular_past_participle_verbs": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_irregular_past_participle_verbs": 0}} +{"results": {"blimp_irregular_past_participle_verbs": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_irregular_past_participle_verbs": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood b/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood rename to tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood index 023096be2c..bd7f4bd9ea 100644 --- a/tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood +++ b/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-loglikelihood @@ -1 +1 @@ -7084358b1b7dd7fb5ead1a58f4b499d6f7610eca897bfac25a986d0f9a91aa5d +7084358b1b7dd7fb5ead1a58f4b499d6f7610eca897bfac25a986d0f9a91aa5d \ No newline at end of file diff --git a/tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-res.json b/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-res.json similarity index 69% rename from tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-res.json rename to tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-res.json index d5f38df686..d70bd8bad3 100644 --- a/tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-res.json +++ b/tests/testdata/blimp_irregular_plural_subject_verb_agreement_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_irregular_plural_subject_verb_agreement_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_irregular_plural_subject_verb_agreement_1": 0}} +{"results": {"blimp_irregular_plural_subject_verb_agreement_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_irregular_plural_subject_verb_agreement_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood b/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood rename to tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood index 68ef732797..187b79e94c 100644 --- a/tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood +++ b/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-loglikelihood @@ -1 +1 @@ -9534751f83a86b6cbe1fb12fb9feb827b0b7836a663108928b4ecc1d70b08871 +9534751f83a86b6cbe1fb12fb9feb827b0b7836a663108928b4ecc1d70b08871 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-res.json b/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-res.json similarity index 69% rename from tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-res.json rename to tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-res.json index 02f042e827..b0289b9dea 100644 --- a/tests/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-res.json +++ b/tests/testdata/blimp_irregular_plural_subject_verb_agreement_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_irregular_plural_subject_verb_agreement_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_irregular_plural_subject_verb_agreement_2": 0}} +{"results": {"blimp_irregular_plural_subject_verb_agreement_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_irregular_plural_subject_verb_agreement_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_left_branch_island_echo_question-v0-loglikelihood b/tests/testdata/blimp_left_branch_island_echo_question-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_left_branch_island_echo_question-v0-loglikelihood rename to tests/testdata/blimp_left_branch_island_echo_question-v0-loglikelihood index 6846155766..da909529e5 100644 --- a/tests/tests/testdata/blimp_left_branch_island_echo_question-v0-loglikelihood +++ b/tests/testdata/blimp_left_branch_island_echo_question-v0-loglikelihood @@ -1 +1 @@ -9852b38612db8c6adf938a5d8a7a9e5ce9e655259d6cc806b142506fcaff0ed4 +9852b38612db8c6adf938a5d8a7a9e5ce9e655259d6cc806b142506fcaff0ed4 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_left_branch_island_echo_question-v0-res.json b/tests/testdata/blimp_left_branch_island_echo_question-v0-res.json similarity index 77% rename from tests/tests/testdata/blimp_left_branch_island_echo_question-v0-res.json rename to tests/testdata/blimp_left_branch_island_echo_question-v0-res.json index 0c1723b0f9..198f9a289c 100644 --- a/tests/tests/testdata/blimp_left_branch_island_echo_question-v0-res.json +++ b/tests/testdata/blimp_left_branch_island_echo_question-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_left_branch_island_echo_question": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_left_branch_island_echo_question": 0}} +{"results": {"blimp_left_branch_island_echo_question": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_left_branch_island_echo_question": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_left_branch_island_simple_question-v0-loglikelihood b/tests/testdata/blimp_left_branch_island_simple_question-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_left_branch_island_simple_question-v0-loglikelihood rename to tests/testdata/blimp_left_branch_island_simple_question-v0-loglikelihood index 585a1dab73..22adb2995e 100644 --- a/tests/tests/testdata/blimp_left_branch_island_simple_question-v0-loglikelihood +++ b/tests/testdata/blimp_left_branch_island_simple_question-v0-loglikelihood @@ -1 +1 @@ -6cb36bbdae7754f8832f50872c3dd511ce12547e00fa0771deb747be3355eb85 +6cb36bbdae7754f8832f50872c3dd511ce12547e00fa0771deb747be3355eb85 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_left_branch_island_simple_question-v0-res.json b/tests/testdata/blimp_left_branch_island_simple_question-v0-res.json similarity index 75% rename from tests/tests/testdata/blimp_left_branch_island_simple_question-v0-res.json rename to tests/testdata/blimp_left_branch_island_simple_question-v0-res.json index 27b71d0de9..057af2db85 100644 --- a/tests/tests/testdata/blimp_left_branch_island_simple_question-v0-res.json +++ b/tests/testdata/blimp_left_branch_island_simple_question-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_left_branch_island_simple_question": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_left_branch_island_simple_question": 0}} +{"results": {"blimp_left_branch_island_simple_question": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_left_branch_island_simple_question": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-loglikelihood b/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-loglikelihood rename to tests/testdata/blimp_matrix_question_npi_licensor_present-v0-loglikelihood index 38824111a0..a5c4bc6ca2 100644 --- a/tests/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-loglikelihood +++ b/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-loglikelihood @@ -1 +1 @@ -a3a702a3335c79b02b36caf37c68069050c2a8a3a03c3610c09afc39d2b83fb1 +a3a702a3335c79b02b36caf37c68069050c2a8a3a03c3610c09afc39d2b83fb1 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-res.json b/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-res.json similarity index 73% rename from tests/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-res.json rename to tests/testdata/blimp_matrix_question_npi_licensor_present-v0-res.json index 77211f8352..4fba717b88 100644 --- a/tests/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-res.json +++ b/tests/testdata/blimp_matrix_question_npi_licensor_present-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_matrix_question_npi_licensor_present": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_matrix_question_npi_licensor_present": 0}} +{"results": {"blimp_matrix_question_npi_licensor_present": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_matrix_question_npi_licensor_present": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_npi_present_1-v0-loglikelihood b/tests/testdata/blimp_npi_present_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_npi_present_1-v0-loglikelihood rename to tests/testdata/blimp_npi_present_1-v0-loglikelihood index 697a296562..910e490a98 100644 --- a/tests/tests/testdata/blimp_npi_present_1-v0-loglikelihood +++ b/tests/testdata/blimp_npi_present_1-v0-loglikelihood @@ -1 +1 @@ -3ef532a85e0ee8f8ff779bc7ddc873d515969a708da84a4eb4a85b7c843cf244 +3ef532a85e0ee8f8ff779bc7ddc873d515969a708da84a4eb4a85b7c843cf244 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_npi_present_1-v0-res.json b/tests/testdata/blimp_npi_present_1-v0-res.json similarity index 50% rename from tests/tests/testdata/blimp_npi_present_1-v0-res.json rename to tests/testdata/blimp_npi_present_1-v0-res.json index 3db6b3cdb4..8e4ae8d6ef 100644 --- a/tests/tests/testdata/blimp_npi_present_1-v0-res.json +++ b/tests/testdata/blimp_npi_present_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_npi_present_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_npi_present_1": 0}} +{"results": {"blimp_npi_present_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_npi_present_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_npi_present_2-v0-loglikelihood b/tests/testdata/blimp_npi_present_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_npi_present_2-v0-loglikelihood rename to tests/testdata/blimp_npi_present_2-v0-loglikelihood index d7f4e66e70..543fdc0614 100644 --- a/tests/tests/testdata/blimp_npi_present_2-v0-loglikelihood +++ b/tests/testdata/blimp_npi_present_2-v0-loglikelihood @@ -1 +1 @@ -fdb688ac6259bb65d234ef0a36e9a9ee449f9608f633b12e1943b462aead8e17 +fdb688ac6259bb65d234ef0a36e9a9ee449f9608f633b12e1943b462aead8e17 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_npi_present_2-v0-res.json b/tests/testdata/blimp_npi_present_2-v0-res.json similarity index 50% rename from tests/tests/testdata/blimp_npi_present_2-v0-res.json rename to tests/testdata/blimp_npi_present_2-v0-res.json index 58f75fa16a..efe40ced37 100644 --- a/tests/tests/testdata/blimp_npi_present_2-v0-res.json +++ b/tests/testdata/blimp_npi_present_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_npi_present_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_npi_present_2": 0}} +{"results": {"blimp_npi_present_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_npi_present_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_only_npi_licensor_present-v0-loglikelihood b/tests/testdata/blimp_only_npi_licensor_present-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_only_npi_licensor_present-v0-loglikelihood rename to tests/testdata/blimp_only_npi_licensor_present-v0-loglikelihood index 70d74d84ab..03f45fd619 100644 --- a/tests/tests/testdata/blimp_only_npi_licensor_present-v0-loglikelihood +++ b/tests/testdata/blimp_only_npi_licensor_present-v0-loglikelihood @@ -1 +1 @@ -d2d0711611b5b218c6fa8c7278494749252b7868c396451919b761303556bd66 +d2d0711611b5b218c6fa8c7278494749252b7868c396451919b761303556bd66 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_only_npi_licensor_present-v0-res.json b/tests/testdata/blimp_only_npi_licensor_present-v0-res.json similarity index 84% rename from tests/tests/testdata/blimp_only_npi_licensor_present-v0-res.json rename to tests/testdata/blimp_only_npi_licensor_present-v0-res.json index b934ea7a77..321702a66e 100644 --- a/tests/tests/testdata/blimp_only_npi_licensor_present-v0-res.json +++ b/tests/testdata/blimp_only_npi_licensor_present-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_only_npi_licensor_present": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_only_npi_licensor_present": 0}} +{"results": {"blimp_only_npi_licensor_present": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_only_npi_licensor_present": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_only_npi_scope-v0-loglikelihood b/tests/testdata/blimp_only_npi_scope-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_only_npi_scope-v0-loglikelihood rename to tests/testdata/blimp_only_npi_scope-v0-loglikelihood index 7b0445e9db..f1846d3e93 100644 --- a/tests/tests/testdata/blimp_only_npi_scope-v0-loglikelihood +++ b/tests/testdata/blimp_only_npi_scope-v0-loglikelihood @@ -1 +1 @@ -fc0be817478c212327050fa297ef61ad214f4847dbff61d4e0fe7914c06a1691 +fc0be817478c212327050fa297ef61ad214f4847dbff61d4e0fe7914c06a1691 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_only_npi_scope-v0-res.json b/tests/testdata/blimp_only_npi_scope-v0-res.json similarity index 99% rename from tests/tests/testdata/blimp_only_npi_scope-v0-res.json rename to tests/testdata/blimp_only_npi_scope-v0-res.json index bec2e7d33b..82fbbab07d 100644 --- a/tests/tests/testdata/blimp_only_npi_scope-v0-res.json +++ b/tests/testdata/blimp_only_npi_scope-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_only_npi_scope": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_only_npi_scope": 0}} +{"results": {"blimp_only_npi_scope": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_only_npi_scope": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_passive_1-v0-loglikelihood b/tests/testdata/blimp_passive_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_passive_1-v0-loglikelihood rename to tests/testdata/blimp_passive_1-v0-loglikelihood index 52f5b2332f..183b815d22 100644 --- a/tests/tests/testdata/blimp_passive_1-v0-loglikelihood +++ b/tests/testdata/blimp_passive_1-v0-loglikelihood @@ -1 +1 @@ -fa4addddd8e380031b8e0871776cabcb707c0f21dcaf5d8b3defec66cce55043 +fa4addddd8e380031b8e0871776cabcb707c0f21dcaf5d8b3defec66cce55043 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_passive_1-v0-res.json b/tests/testdata/blimp_passive_1-v0-res.json similarity index 53% rename from tests/tests/testdata/blimp_passive_1-v0-res.json rename to tests/testdata/blimp_passive_1-v0-res.json index 3dd08a649f..64070cf58d 100644 --- a/tests/tests/testdata/blimp_passive_1-v0-res.json +++ b/tests/testdata/blimp_passive_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_passive_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_passive_1": 0}} +{"results": {"blimp_passive_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_passive_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_passive_2-v0-loglikelihood b/tests/testdata/blimp_passive_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_passive_2-v0-loglikelihood rename to tests/testdata/blimp_passive_2-v0-loglikelihood index d8cab002c4..d667f46946 100644 --- a/tests/tests/testdata/blimp_passive_2-v0-loglikelihood +++ b/tests/testdata/blimp_passive_2-v0-loglikelihood @@ -1 +1 @@ -755bdfe2c89737c43001ff1dc83d68ad33e444aaf0669af66aaf82dcd09f2eca +755bdfe2c89737c43001ff1dc83d68ad33e444aaf0669af66aaf82dcd09f2eca \ No newline at end of file diff --git a/tests/tests/testdata/blimp_passive_2-v0-res.json b/tests/testdata/blimp_passive_2-v0-res.json similarity index 53% rename from tests/tests/testdata/blimp_passive_2-v0-res.json rename to tests/testdata/blimp_passive_2-v0-res.json index 5205b31eb7..5a4dd092c4 100644 --- a/tests/tests/testdata/blimp_passive_2-v0-res.json +++ b/tests/testdata/blimp_passive_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_passive_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_passive_2": 0}} +{"results": {"blimp_passive_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_passive_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_c_command-v0-loglikelihood b/tests/testdata/blimp_principle_A_c_command-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_principle_A_c_command-v0-loglikelihood rename to tests/testdata/blimp_principle_A_c_command-v0-loglikelihood index a9c864d0a4..87b49c5de9 100644 --- a/tests/tests/testdata/blimp_principle_A_c_command-v0-loglikelihood +++ b/tests/testdata/blimp_principle_A_c_command-v0-loglikelihood @@ -1 +1 @@ -7c2ed82612af9175052cd44d8e178b6dd084c04eb462a3d88fcacfad2df8be8e +7c2ed82612af9175052cd44d8e178b6dd084c04eb462a3d88fcacfad2df8be8e \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_c_command-v0-res.json b/tests/testdata/blimp_principle_A_c_command-v0-res.json similarity index 89% rename from tests/tests/testdata/blimp_principle_A_c_command-v0-res.json rename to tests/testdata/blimp_principle_A_c_command-v0-res.json index 34113bf8d1..43fadc2e0b 100644 --- a/tests/tests/testdata/blimp_principle_A_c_command-v0-res.json +++ b/tests/testdata/blimp_principle_A_c_command-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_principle_A_c_command": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_c_command": 0}} +{"results": {"blimp_principle_A_c_command": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_c_command": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_case_1-v0-loglikelihood b/tests/testdata/blimp_principle_A_case_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_principle_A_case_1-v0-loglikelihood rename to tests/testdata/blimp_principle_A_case_1-v0-loglikelihood index 5812c4732c..ce8166c460 100644 --- a/tests/tests/testdata/blimp_principle_A_case_1-v0-loglikelihood +++ b/tests/testdata/blimp_principle_A_case_1-v0-loglikelihood @@ -1 +1 @@ -49d2b8ce6667a6166fdc2a2e5dbe7ff07d9b8415e9f33482aef15956b3ebc24a +49d2b8ce6667a6166fdc2a2e5dbe7ff07d9b8415e9f33482aef15956b3ebc24a \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_case_1-v0-res.json b/tests/testdata/blimp_principle_A_case_1-v0-res.json similarity index 93% rename from tests/tests/testdata/blimp_principle_A_case_1-v0-res.json rename to tests/testdata/blimp_principle_A_case_1-v0-res.json index 952f4fb4ee..f325c2e3e3 100644 --- a/tests/tests/testdata/blimp_principle_A_case_1-v0-res.json +++ b/tests/testdata/blimp_principle_A_case_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_principle_A_case_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_case_1": 0}} +{"results": {"blimp_principle_A_case_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_case_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_case_2-v0-loglikelihood b/tests/testdata/blimp_principle_A_case_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_principle_A_case_2-v0-loglikelihood rename to tests/testdata/blimp_principle_A_case_2-v0-loglikelihood index ad1aac68e3..8c043857d4 100644 --- a/tests/tests/testdata/blimp_principle_A_case_2-v0-loglikelihood +++ b/tests/testdata/blimp_principle_A_case_2-v0-loglikelihood @@ -1 +1 @@ -cd68adb65c891d672e22bf53c054b2083ab08bc1da43951732b409c942d14bc7 +cd68adb65c891d672e22bf53c054b2083ab08bc1da43951732b409c942d14bc7 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_case_2-v0-res.json b/tests/testdata/blimp_principle_A_case_2-v0-res.json similarity index 93% rename from tests/tests/testdata/blimp_principle_A_case_2-v0-res.json rename to tests/testdata/blimp_principle_A_case_2-v0-res.json index e8dd8fad63..ec8108c88d 100644 --- a/tests/tests/testdata/blimp_principle_A_case_2-v0-res.json +++ b/tests/testdata/blimp_principle_A_case_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_principle_A_case_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_case_2": 0}} +{"results": {"blimp_principle_A_case_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_case_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_domain_1-v0-loglikelihood b/tests/testdata/blimp_principle_A_domain_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_principle_A_domain_1-v0-loglikelihood rename to tests/testdata/blimp_principle_A_domain_1-v0-loglikelihood index 0ac838c09e..6b900d05f4 100644 --- a/tests/tests/testdata/blimp_principle_A_domain_1-v0-loglikelihood +++ b/tests/testdata/blimp_principle_A_domain_1-v0-loglikelihood @@ -1 +1 @@ -290e7eddacea4ec16989af697f2ee3373fdd9aef4b452bf887184c6e2f6e7d9d +290e7eddacea4ec16989af697f2ee3373fdd9aef4b452bf887184c6e2f6e7d9d \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_domain_1-v0-res.json b/tests/testdata/blimp_principle_A_domain_1-v0-res.json similarity index 90% rename from tests/tests/testdata/blimp_principle_A_domain_1-v0-res.json rename to tests/testdata/blimp_principle_A_domain_1-v0-res.json index 546ccc8b22..9efbffb50f 100644 --- a/tests/tests/testdata/blimp_principle_A_domain_1-v0-res.json +++ b/tests/testdata/blimp_principle_A_domain_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_principle_A_domain_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_domain_1": 0}} +{"results": {"blimp_principle_A_domain_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_domain_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_domain_2-v0-loglikelihood b/tests/testdata/blimp_principle_A_domain_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_principle_A_domain_2-v0-loglikelihood rename to tests/testdata/blimp_principle_A_domain_2-v0-loglikelihood index c227edddc2..0e201fe3c8 100644 --- a/tests/tests/testdata/blimp_principle_A_domain_2-v0-loglikelihood +++ b/tests/testdata/blimp_principle_A_domain_2-v0-loglikelihood @@ -1 +1 @@ -eb5ddf0a97982373ab1a4e58267cfcdebdecdb86c376dfd5ebf46737c9d3ee12 +eb5ddf0a97982373ab1a4e58267cfcdebdecdb86c376dfd5ebf46737c9d3ee12 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_domain_2-v0-res.json b/tests/testdata/blimp_principle_A_domain_2-v0-res.json similarity index 90% rename from tests/tests/testdata/blimp_principle_A_domain_2-v0-res.json rename to tests/testdata/blimp_principle_A_domain_2-v0-res.json index 763f00c1da..1bda1a2aa9 100644 --- a/tests/tests/testdata/blimp_principle_A_domain_2-v0-res.json +++ b/tests/testdata/blimp_principle_A_domain_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_principle_A_domain_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_domain_2": 0}} +{"results": {"blimp_principle_A_domain_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_domain_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_domain_3-v0-loglikelihood b/tests/testdata/blimp_principle_A_domain_3-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_principle_A_domain_3-v0-loglikelihood rename to tests/testdata/blimp_principle_A_domain_3-v0-loglikelihood index d525bd3c1a..c37e936401 100644 --- a/tests/tests/testdata/blimp_principle_A_domain_3-v0-loglikelihood +++ b/tests/testdata/blimp_principle_A_domain_3-v0-loglikelihood @@ -1 +1 @@ -38454befedcf1f3f6ef27d3bef9ccfdfb3e94a7ab32d86a63493a920d2d50093 +38454befedcf1f3f6ef27d3bef9ccfdfb3e94a7ab32d86a63493a920d2d50093 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_domain_3-v0-res.json b/tests/testdata/blimp_principle_A_domain_3-v0-res.json similarity index 90% rename from tests/tests/testdata/blimp_principle_A_domain_3-v0-res.json rename to tests/testdata/blimp_principle_A_domain_3-v0-res.json index 9e11de23aa..77c4bf916a 100644 --- a/tests/tests/testdata/blimp_principle_A_domain_3-v0-res.json +++ b/tests/testdata/blimp_principle_A_domain_3-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_principle_A_domain_3": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_domain_3": 0}} +{"results": {"blimp_principle_A_domain_3": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_domain_3": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_reconstruction-v0-loglikelihood b/tests/testdata/blimp_principle_A_reconstruction-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_principle_A_reconstruction-v0-loglikelihood rename to tests/testdata/blimp_principle_A_reconstruction-v0-loglikelihood index 2a19fcc794..f8d1d1f87f 100644 --- a/tests/tests/testdata/blimp_principle_A_reconstruction-v0-loglikelihood +++ b/tests/testdata/blimp_principle_A_reconstruction-v0-loglikelihood @@ -1 +1 @@ -894efedfd8750d5b8de6157f9b2ed2b51b5290d3a78ea9b041fc62d34e96efbc +894efedfd8750d5b8de6157f9b2ed2b51b5290d3a78ea9b041fc62d34e96efbc \ No newline at end of file diff --git a/tests/tests/testdata/blimp_principle_A_reconstruction-v0-res.json b/tests/testdata/blimp_principle_A_reconstruction-v0-res.json similarity index 83% rename from tests/tests/testdata/blimp_principle_A_reconstruction-v0-res.json rename to tests/testdata/blimp_principle_A_reconstruction-v0-res.json index 8d0a8ed044..0e7d8db1e2 100644 --- a/tests/tests/testdata/blimp_principle_A_reconstruction-v0-res.json +++ b/tests/testdata/blimp_principle_A_reconstruction-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_principle_A_reconstruction": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_reconstruction": 0}} +{"results": {"blimp_principle_A_reconstruction": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_principle_A_reconstruction": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood b/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood rename to tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood index 69aea9868d..0a32ca7f97 100644 --- a/tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood +++ b/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-loglikelihood @@ -1 +1 @@ -5bc0441f31e32443cf761bca6e961d504e1e84b15aa4e1d79e5c8ed5b4c2aa3a +5bc0441f31e32443cf761bca6e961d504e1e84b15aa4e1d79e5c8ed5b4c2aa3a \ No newline at end of file diff --git a/tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-res.json b/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-res.json similarity index 71% rename from tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-res.json rename to tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-res.json index f26f44b74d..16fed715d4 100644 --- a/tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-res.json +++ b/tests/testdata/blimp_regular_plural_subject_verb_agreement_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_regular_plural_subject_verb_agreement_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_regular_plural_subject_verb_agreement_1": 0}} +{"results": {"blimp_regular_plural_subject_verb_agreement_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_regular_plural_subject_verb_agreement_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood b/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood rename to tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood index 4f1faa5602..4b6525a10e 100644 --- a/tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood +++ b/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-loglikelihood @@ -1 +1 @@ -f69d9891f59872538962221fccc425b07df7cfbd83cdc546ce83e6b0e9a93f7c +f69d9891f59872538962221fccc425b07df7cfbd83cdc546ce83e6b0e9a93f7c \ No newline at end of file diff --git a/tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-res.json b/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-res.json similarity index 71% rename from tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-res.json rename to tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-res.json index d3710f6dca..6d64b97e20 100644 --- a/tests/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-res.json +++ b/tests/testdata/blimp_regular_plural_subject_verb_agreement_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_regular_plural_subject_verb_agreement_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_regular_plural_subject_verb_agreement_2": 0}} +{"results": {"blimp_regular_plural_subject_verb_agreement_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_regular_plural_subject_verb_agreement_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-loglikelihood b/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-loglikelihood rename to tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-loglikelihood index bc436683b1..8e254de7a7 100644 --- a/tests/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-loglikelihood +++ b/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-loglikelihood @@ -1 +1 @@ -e6666c5657215ff4bfd646b8ee3ae6df956e71c0be9ab1c287fb1b68291dd0d1 +e6666c5657215ff4bfd646b8ee3ae6df956e71c0be9ab1c287fb1b68291dd0d1 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-res.json b/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-res.json similarity index 70% rename from tests/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-res.json rename to tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-res.json index 5147e675df..4305bb313c 100644 --- a/tests/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-res.json +++ b/tests/testdata/blimp_sentential_negation_npi_licensor_present-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_sentential_negation_npi_licensor_present": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_sentential_negation_npi_licensor_present": 0}} +{"results": {"blimp_sentential_negation_npi_licensor_present": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_sentential_negation_npi_licensor_present": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_sentential_negation_npi_scope-v0-loglikelihood b/tests/testdata/blimp_sentential_negation_npi_scope-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_sentential_negation_npi_scope-v0-loglikelihood rename to tests/testdata/blimp_sentential_negation_npi_scope-v0-loglikelihood index c8ceed839e..c7aa260f91 100644 --- a/tests/tests/testdata/blimp_sentential_negation_npi_scope-v0-loglikelihood +++ b/tests/testdata/blimp_sentential_negation_npi_scope-v0-loglikelihood @@ -1 +1 @@ -32fcbd0a1c6e664af2751bad552587b5ca3911973b07f4fb2cf0a2acd3de5349 +32fcbd0a1c6e664af2751bad552587b5ca3911973b07f4fb2cf0a2acd3de5349 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_sentential_negation_npi_scope-v0-res.json b/tests/testdata/blimp_sentential_negation_npi_scope-v0-res.json similarity index 80% rename from tests/tests/testdata/blimp_sentential_negation_npi_scope-v0-res.json rename to tests/testdata/blimp_sentential_negation_npi_scope-v0-res.json index c5c869e576..fcaf915f36 100644 --- a/tests/tests/testdata/blimp_sentential_negation_npi_scope-v0-res.json +++ b/tests/testdata/blimp_sentential_negation_npi_scope-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_sentential_negation_npi_scope": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_sentential_negation_npi_scope": 0}} +{"results": {"blimp_sentential_negation_npi_scope": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_sentential_negation_npi_scope": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_sentential_subject_island-v0-loglikelihood b/tests/testdata/blimp_sentential_subject_island-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_sentential_subject_island-v0-loglikelihood rename to tests/testdata/blimp_sentential_subject_island-v0-loglikelihood index 796f881d3c..6220172936 100644 --- a/tests/tests/testdata/blimp_sentential_subject_island-v0-loglikelihood +++ b/tests/testdata/blimp_sentential_subject_island-v0-loglikelihood @@ -1 +1 @@ -80f5f98fad26240de2767fe58c4b18d864df41cbfa76f06c84c3fce9f14f4833 +80f5f98fad26240de2767fe58c4b18d864df41cbfa76f06c84c3fce9f14f4833 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_sentential_subject_island-v0-res.json b/tests/testdata/blimp_sentential_subject_island-v0-res.json similarity index 84% rename from tests/tests/testdata/blimp_sentential_subject_island-v0-res.json rename to tests/testdata/blimp_sentential_subject_island-v0-res.json index d2e011f01c..a7f8f1825a 100644 --- a/tests/tests/testdata/blimp_sentential_subject_island-v0-res.json +++ b/tests/testdata/blimp_sentential_subject_island-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_sentential_subject_island": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_sentential_subject_island": 0}} +{"results": {"blimp_sentential_subject_island": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_sentential_subject_island": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_superlative_quantifiers_1-v0-loglikelihood b/tests/testdata/blimp_superlative_quantifiers_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_superlative_quantifiers_1-v0-loglikelihood rename to tests/testdata/blimp_superlative_quantifiers_1-v0-loglikelihood index 31f880fb55..b7d2819cb3 100644 --- a/tests/tests/testdata/blimp_superlative_quantifiers_1-v0-loglikelihood +++ b/tests/testdata/blimp_superlative_quantifiers_1-v0-loglikelihood @@ -1 +1 @@ -8a01f6a5ea87a01c0c9b0c7b3bc4de4711bf0ff050976976651182b9ed34a0d4 +8a01f6a5ea87a01c0c9b0c7b3bc4de4711bf0ff050976976651182b9ed34a0d4 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_superlative_quantifiers_1-v0-res.json b/tests/testdata/blimp_superlative_quantifiers_1-v0-res.json similarity index 84% rename from tests/tests/testdata/blimp_superlative_quantifiers_1-v0-res.json rename to tests/testdata/blimp_superlative_quantifiers_1-v0-res.json index 99045a76c6..b69d445f3c 100644 --- a/tests/tests/testdata/blimp_superlative_quantifiers_1-v0-res.json +++ b/tests/testdata/blimp_superlative_quantifiers_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_superlative_quantifiers_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_superlative_quantifiers_1": 0}} +{"results": {"blimp_superlative_quantifiers_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_superlative_quantifiers_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_superlative_quantifiers_2-v0-loglikelihood b/tests/testdata/blimp_superlative_quantifiers_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_superlative_quantifiers_2-v0-loglikelihood rename to tests/testdata/blimp_superlative_quantifiers_2-v0-loglikelihood index c72ce9158f..4a8317f0b3 100644 --- a/tests/tests/testdata/blimp_superlative_quantifiers_2-v0-loglikelihood +++ b/tests/testdata/blimp_superlative_quantifiers_2-v0-loglikelihood @@ -1 +1 @@ -59c20ff0f632cf42afc74ecc682cf92e5e740417b01e6cf9a610a3bc544d2ea5 +59c20ff0f632cf42afc74ecc682cf92e5e740417b01e6cf9a610a3bc544d2ea5 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_superlative_quantifiers_2-v0-res.json b/tests/testdata/blimp_superlative_quantifiers_2-v0-res.json similarity index 84% rename from tests/tests/testdata/blimp_superlative_quantifiers_2-v0-res.json rename to tests/testdata/blimp_superlative_quantifiers_2-v0-res.json index 02f83fd3b8..2733d251cf 100644 --- a/tests/tests/testdata/blimp_superlative_quantifiers_2-v0-res.json +++ b/tests/testdata/blimp_superlative_quantifiers_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_superlative_quantifiers_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_superlative_quantifiers_2": 0}} +{"results": {"blimp_superlative_quantifiers_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_superlative_quantifiers_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_tough_vs_raising_1-v0-loglikelihood b/tests/testdata/blimp_tough_vs_raising_1-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_tough_vs_raising_1-v0-loglikelihood rename to tests/testdata/blimp_tough_vs_raising_1-v0-loglikelihood index 34c4914813..a26cb174a0 100644 --- a/tests/tests/testdata/blimp_tough_vs_raising_1-v0-loglikelihood +++ b/tests/testdata/blimp_tough_vs_raising_1-v0-loglikelihood @@ -1 +1 @@ -973fe56534fdef1207f0fc08dd09a210304c55f33c6cbb17552754bf54f11c86 +973fe56534fdef1207f0fc08dd09a210304c55f33c6cbb17552754bf54f11c86 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_tough_vs_raising_1-v0-res.json b/tests/testdata/blimp_tough_vs_raising_1-v0-res.json similarity index 93% rename from tests/tests/testdata/blimp_tough_vs_raising_1-v0-res.json rename to tests/testdata/blimp_tough_vs_raising_1-v0-res.json index 68575cfb71..44ea10c138 100644 --- a/tests/tests/testdata/blimp_tough_vs_raising_1-v0-res.json +++ b/tests/testdata/blimp_tough_vs_raising_1-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_tough_vs_raising_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_tough_vs_raising_1": 0}} +{"results": {"blimp_tough_vs_raising_1": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_tough_vs_raising_1": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_tough_vs_raising_2-v0-loglikelihood b/tests/testdata/blimp_tough_vs_raising_2-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_tough_vs_raising_2-v0-loglikelihood rename to tests/testdata/blimp_tough_vs_raising_2-v0-loglikelihood index 376afc1f89..3b0f976352 100644 --- a/tests/tests/testdata/blimp_tough_vs_raising_2-v0-loglikelihood +++ b/tests/testdata/blimp_tough_vs_raising_2-v0-loglikelihood @@ -1 +1 @@ -d255a10a34f14d77d9526604a17b0f6747d32f62fc2e3a09e9ab10054535fd45 +d255a10a34f14d77d9526604a17b0f6747d32f62fc2e3a09e9ab10054535fd45 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_tough_vs_raising_2-v0-res.json b/tests/testdata/blimp_tough_vs_raising_2-v0-res.json similarity index 93% rename from tests/tests/testdata/blimp_tough_vs_raising_2-v0-res.json rename to tests/testdata/blimp_tough_vs_raising_2-v0-res.json index 9db116b8a0..c9b8c7d061 100644 --- a/tests/tests/testdata/blimp_tough_vs_raising_2-v0-res.json +++ b/tests/testdata/blimp_tough_vs_raising_2-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_tough_vs_raising_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_tough_vs_raising_2": 0}} +{"results": {"blimp_tough_vs_raising_2": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_tough_vs_raising_2": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_transitive-v0-loglikelihood b/tests/testdata/blimp_transitive-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_transitive-v0-loglikelihood rename to tests/testdata/blimp_transitive-v0-loglikelihood index 0d464eaed5..98156dcf1e 100644 --- a/tests/tests/testdata/blimp_transitive-v0-loglikelihood +++ b/tests/testdata/blimp_transitive-v0-loglikelihood @@ -1 +1 @@ -d0d47fe40a7ee558ba782edbc4f49f7d9123c8472a36decc97f8ab142b45b9d8 +d0d47fe40a7ee558ba782edbc4f49f7d9123c8472a36decc97f8ab142b45b9d8 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_transitive-v0-res.json b/tests/testdata/blimp_transitive-v0-res.json similarity index 52% rename from tests/tests/testdata/blimp_transitive-v0-res.json rename to tests/testdata/blimp_transitive-v0-res.json index e93acad90a..d2c99ab803 100644 --- a/tests/tests/testdata/blimp_transitive-v0-res.json +++ b/tests/testdata/blimp_transitive-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_transitive": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_transitive": 0}} +{"results": {"blimp_transitive": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_transitive": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_island-v0-loglikelihood b/tests/testdata/blimp_wh_island-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_wh_island-v0-loglikelihood rename to tests/testdata/blimp_wh_island-v0-loglikelihood index c3006b2b22..d27f1316dc 100644 --- a/tests/tests/testdata/blimp_wh_island-v0-loglikelihood +++ b/tests/testdata/blimp_wh_island-v0-loglikelihood @@ -1 +1 @@ -91a9e4b60b0f3572a7fdbd7648d0e69f36e5eb34db715315b0082558d7ed8b65 +91a9e4b60b0f3572a7fdbd7648d0e69f36e5eb34db715315b0082558d7ed8b65 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_island-v0-res.json b/tests/testdata/blimp_wh_island-v0-res.json similarity index 53% rename from tests/tests/testdata/blimp_wh_island-v0-res.json rename to tests/testdata/blimp_wh_island-v0-res.json index 73dc8131e1..1d50683774 100644 --- a/tests/tests/testdata/blimp_wh_island-v0-res.json +++ b/tests/testdata/blimp_wh_island-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_wh_island": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_island": 0}} +{"results": {"blimp_wh_island": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_island": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_questions_object_gap-v0-loglikelihood b/tests/testdata/blimp_wh_questions_object_gap-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_wh_questions_object_gap-v0-loglikelihood rename to tests/testdata/blimp_wh_questions_object_gap-v0-loglikelihood index dca61bd8a0..c3e6af12f2 100644 --- a/tests/tests/testdata/blimp_wh_questions_object_gap-v0-loglikelihood +++ b/tests/testdata/blimp_wh_questions_object_gap-v0-loglikelihood @@ -1 +1 @@ -4d4aaa0274ccd485ff8430ed61b8f83806febe18c16616c7d050f637a0463eba +4d4aaa0274ccd485ff8430ed61b8f83806febe18c16616c7d050f637a0463eba \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_questions_object_gap-v0-res.json b/tests/testdata/blimp_wh_questions_object_gap-v0-res.json similarity index 87% rename from tests/tests/testdata/blimp_wh_questions_object_gap-v0-res.json rename to tests/testdata/blimp_wh_questions_object_gap-v0-res.json index 3acb9d0117..60228b7918 100644 --- a/tests/tests/testdata/blimp_wh_questions_object_gap-v0-res.json +++ b/tests/testdata/blimp_wh_questions_object_gap-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_wh_questions_object_gap": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_questions_object_gap": 0}} +{"results": {"blimp_wh_questions_object_gap": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_questions_object_gap": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_questions_subject_gap-v0-loglikelihood b/tests/testdata/blimp_wh_questions_subject_gap-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_wh_questions_subject_gap-v0-loglikelihood rename to tests/testdata/blimp_wh_questions_subject_gap-v0-loglikelihood index 48ad6a252e..1a88f8fa87 100644 --- a/tests/tests/testdata/blimp_wh_questions_subject_gap-v0-loglikelihood +++ b/tests/testdata/blimp_wh_questions_subject_gap-v0-loglikelihood @@ -1 +1 @@ -d5486ffcc075cad4302e37ece9bbf5b2063c0b5a48e76c8e1dd365e22a5a48fc +d5486ffcc075cad4302e37ece9bbf5b2063c0b5a48e76c8e1dd365e22a5a48fc \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_questions_subject_gap-v0-res.json b/tests/testdata/blimp_wh_questions_subject_gap-v0-res.json similarity index 85% rename from tests/tests/testdata/blimp_wh_questions_subject_gap-v0-res.json rename to tests/testdata/blimp_wh_questions_subject_gap-v0-res.json index 50869d2c68..4b21da71d5 100644 --- a/tests/tests/testdata/blimp_wh_questions_subject_gap-v0-res.json +++ b/tests/testdata/blimp_wh_questions_subject_gap-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_wh_questions_subject_gap": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_questions_subject_gap": 0}} +{"results": {"blimp_wh_questions_subject_gap": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_questions_subject_gap": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood b/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood rename to tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood index ada08f3908..f83ed1fb74 100644 --- a/tests/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood +++ b/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-loglikelihood @@ -1 +1 @@ -37483dfda688b62ad27161c9fc1e1e7710c5a6e6a7cd3474df119bcafd30e97f +37483dfda688b62ad27161c9fc1e1e7710c5a6e6a7cd3474df119bcafd30e97f \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-res.json b/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-res.json similarity index 72% rename from tests/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-res.json rename to tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-res.json index c487505f5b..fe6bbf95e5 100644 --- a/tests/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-res.json +++ b/tests/testdata/blimp_wh_questions_subject_gap_long_distance-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_wh_questions_subject_gap_long_distance": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_questions_subject_gap_long_distance": 0}} +{"results": {"blimp_wh_questions_subject_gap_long_distance": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_questions_subject_gap_long_distance": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_vs_that_no_gap-v0-loglikelihood b/tests/testdata/blimp_wh_vs_that_no_gap-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_wh_vs_that_no_gap-v0-loglikelihood rename to tests/testdata/blimp_wh_vs_that_no_gap-v0-loglikelihood index 4db9b2ae80..5f40ea63f1 100644 --- a/tests/tests/testdata/blimp_wh_vs_that_no_gap-v0-loglikelihood +++ b/tests/testdata/blimp_wh_vs_that_no_gap-v0-loglikelihood @@ -1 +1 @@ -d1d3e439b2020ef5ed232bfebbcc9634adc5117e9eb61e38fdbbe2c8ea128d54 +d1d3e439b2020ef5ed232bfebbcc9634adc5117e9eb61e38fdbbe2c8ea128d54 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_vs_that_no_gap-v0-res.json b/tests/testdata/blimp_wh_vs_that_no_gap-v0-res.json similarity index 94% rename from tests/tests/testdata/blimp_wh_vs_that_no_gap-v0-res.json rename to tests/testdata/blimp_wh_vs_that_no_gap-v0-res.json index c7112f5e57..dfd3f66b77 100644 --- a/tests/tests/testdata/blimp_wh_vs_that_no_gap-v0-res.json +++ b/tests/testdata/blimp_wh_vs_that_no_gap-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_wh_vs_that_no_gap": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_vs_that_no_gap": 0}} +{"results": {"blimp_wh_vs_that_no_gap": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_vs_that_no_gap": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood b/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood rename to tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood index e377494473..13359ac3d2 100644 --- a/tests/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood +++ b/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-loglikelihood @@ -1 +1 @@ -a142cc2a6fcd93230b650927b07367cad957b8f3f42cb4072151da53dea301df +a142cc2a6fcd93230b650927b07367cad957b8f3f42cb4072151da53dea301df \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-res.json b/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-res.json similarity index 78% rename from tests/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-res.json rename to tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-res.json index f23846a765..de9e800718 100644 --- a/tests/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-res.json +++ b/tests/testdata/blimp_wh_vs_that_no_gap_long_distance-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_wh_vs_that_no_gap_long_distance": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_vs_that_no_gap_long_distance": 0}} +{"results": {"blimp_wh_vs_that_no_gap_long_distance": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_vs_that_no_gap_long_distance": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_vs_that_with_gap-v0-loglikelihood b/tests/testdata/blimp_wh_vs_that_with_gap-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_wh_vs_that_with_gap-v0-loglikelihood rename to tests/testdata/blimp_wh_vs_that_with_gap-v0-loglikelihood index 0c5c8f1235..4c15f2283e 100644 --- a/tests/tests/testdata/blimp_wh_vs_that_with_gap-v0-loglikelihood +++ b/tests/testdata/blimp_wh_vs_that_with_gap-v0-loglikelihood @@ -1 +1 @@ -d41a9b85e4c31e445bf9b46b8642df02203ccc02b4a9b254bf76066d5c54b4b7 +d41a9b85e4c31e445bf9b46b8642df02203ccc02b4a9b254bf76066d5c54b4b7 \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_vs_that_with_gap-v0-res.json b/tests/testdata/blimp_wh_vs_that_with_gap-v0-res.json similarity index 92% rename from tests/tests/testdata/blimp_wh_vs_that_with_gap-v0-res.json rename to tests/testdata/blimp_wh_vs_that_with_gap-v0-res.json index 3a3888e719..14befd4ab6 100644 --- a/tests/tests/testdata/blimp_wh_vs_that_with_gap-v0-res.json +++ b/tests/testdata/blimp_wh_vs_that_with_gap-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_wh_vs_that_with_gap": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_vs_that_with_gap": 0}} +{"results": {"blimp_wh_vs_that_with_gap": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_vs_that_with_gap": 0}} \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood b/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood similarity index 98% rename from tests/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood rename to tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood index 976005c758..34b9591396 100644 --- a/tests/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood +++ b/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-loglikelihood @@ -1 +1 @@ -eed67491bdf493a1dad8f1d9766bc7bd0e79946365b833c0f7eb81ac998e3dca +eed67491bdf493a1dad8f1d9766bc7bd0e79946365b833c0f7eb81ac998e3dca \ No newline at end of file diff --git a/tests/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-res.json b/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-res.json similarity index 76% rename from tests/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-res.json rename to tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-res.json index c5d2393594..95a2c0c7e1 100644 --- a/tests/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-res.json +++ b/tests/testdata/blimp_wh_vs_that_with_gap_long_distance-v0-res.json @@ -1 +1 @@ -{"results": {"blimp_wh_vs_that_with_gap_long_distance": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_vs_that_with_gap_long_distance": 0}} +{"results": {"blimp_wh_vs_that_with_gap_long_distance": {"acc": 0.485, "acc_stderr": 0.0158121796418149}}, "versions": {"blimp_wh_vs_that_with_gap_long_distance": 0}} \ No newline at end of file diff --git a/tests/testdata/boolq-v0-loglikelihood b/tests/testdata/boolq-v0-loglikelihood index 9e546c722d..14c1bf5f5e 100644 --- a/tests/testdata/boolq-v0-loglikelihood +++ b/tests/testdata/boolq-v0-loglikelihood @@ -1 +1 @@ -de5aa6f77a2e0fd050b9c272f10c4d5d5581e4f75ffa60926f79e60ae1738960 +de5aa6f77a2e0fd050b9c272f10c4d5d5581e4f75ffa60926f79e60ae1738960 \ No newline at end of file diff --git a/tests/testdata/boolq-v0-res.json b/tests/testdata/boolq-v0-res.json index 9c858c4ea5..2b459d8b28 100644 --- a/tests/testdata/boolq-v0-res.json +++ b/tests/testdata/boolq-v0-res.json @@ -1 +1 @@ -{"results": {"boolq": {"acc": 0.5048929663608562, "acc_stderr": 0.00874463623355505}}, "versions": {"boolq": 0}} +{"results": {"boolq": {"acc": 0.5048929663608562, "acc_stderr": 0.00874463623355505}}, "versions": {"boolq": 0}} \ No newline at end of file diff --git a/tests/testdata/boolq-v1-loglikelihood b/tests/testdata/boolq-v1-loglikelihood index ebd0f5ec32..7811121c9f 100644 --- a/tests/testdata/boolq-v1-loglikelihood +++ b/tests/testdata/boolq-v1-loglikelihood @@ -1 +1 @@ -6577e0d88572772ef08e64f624c0e3df0953286ae1f118ccef15623b59ffeabf +6577e0d88572772ef08e64f624c0e3df0953286ae1f118ccef15623b59ffeabf \ No newline at end of file diff --git a/tests/testdata/boolq-v1-res.json b/tests/testdata/boolq-v1-res.json index ec53d0b932..291b9f122d 100644 --- a/tests/testdata/boolq-v1-res.json +++ b/tests/testdata/boolq-v1-res.json @@ -1 +1 @@ -{"results": {"boolq": {"acc": 0.5048929663608562, "acc_stderr": 0.00874463623355505}}, "versions": {"boolq": 1}} +{"results": {"boolq": {"acc": 0.5048929663608562, "acc_stderr": 0.00874463623355505}}, "versions": {"boolq": 1}} \ No newline at end of file diff --git a/tests/testdata/cb-v0-loglikelihood b/tests/testdata/cb-v0-loglikelihood index 01e69edb0c..6fa6f6dae6 100644 --- a/tests/testdata/cb-v0-loglikelihood +++ b/tests/testdata/cb-v0-loglikelihood @@ -1 +1 @@ -ec3b1bbb9561e39c43c6f77a23b4060b15c606141c5346e3d0791b3e92aaa5d0 +ec3b1bbb9561e39c43c6f77a23b4060b15c606141c5346e3d0791b3e92aaa5d0 \ No newline at end of file diff --git a/tests/testdata/cb-v0-res.json b/tests/testdata/cb-v0-res.json index 6adeae3439..ba386fd6c7 100644 --- a/tests/testdata/cb-v0-res.json +++ b/tests/testdata/cb-v0-res.json @@ -1 +1 @@ -{"results": {"cb": {"acc": 0.3392857142857143, "acc_stderr": 0.06384226561930825, "f1": 0.2819143819143819}}, "versions": {"cb": 0}} +{"results": {"cb": {"acc": 0.3392857142857143, "acc_stderr": 0.06384226561930825, "f1": 0.2819143819143819}}, "versions": {"cb": 0}} \ No newline at end of file diff --git a/tests/testdata/cb-v1-loglikelihood b/tests/testdata/cb-v1-loglikelihood index 6052306670..ad7e928fe6 100644 --- a/tests/testdata/cb-v1-loglikelihood +++ b/tests/testdata/cb-v1-loglikelihood @@ -1 +1 @@ -77b11f4348eb8a7f57faf95c531fda01ab4bf0e729f91a82451ed8e71ec8e66d +77b11f4348eb8a7f57faf95c531fda01ab4bf0e729f91a82451ed8e71ec8e66d \ No newline at end of file diff --git a/tests/testdata/cb-v1-res.json b/tests/testdata/cb-v1-res.json index 44cca02ed9..1cff410b2c 100644 --- a/tests/testdata/cb-v1-res.json +++ b/tests/testdata/cb-v1-res.json @@ -1 +1 @@ -{"results": {"cb": {"acc": 0.3392857142857143, "acc_stderr": 0.06384226561930825, "f1": 0.2819143819143819}}, "versions": {"cb": 1}} +{"results": {"cb": {"acc": 0.3392857142857143, "acc_stderr": 0.06384226561930825, "f1": 0.2819143819143819}}, "versions": {"cb": 1}} \ No newline at end of file diff --git a/tests/testdata/cola-v0-loglikelihood b/tests/testdata/cola-v0-loglikelihood index 396e1f1560..45737909e7 100644 --- a/tests/testdata/cola-v0-loglikelihood +++ b/tests/testdata/cola-v0-loglikelihood @@ -1 +1 @@ -e8635578ed8ee70b707a666d35e468b9321db24470f80c92080651e2bfa01751 +e8635578ed8ee70b707a666d35e468b9321db24470f80c92080651e2bfa01751 \ No newline at end of file diff --git a/tests/testdata/cola-v0-res.json b/tests/testdata/cola-v0-res.json index 11d13c0d8c..462e5d9401 100644 --- a/tests/testdata/cola-v0-res.json +++ b/tests/testdata/cola-v0-res.json @@ -1 +1 @@ -{"results": {"cola": {"mcc": -0.04538802810223175, "mcc_stderr": 0.023100371589225246}}, "versions": {"cola": 0}} +{"results": {"cola": {"mcc": -0.04538802810223175, "mcc_stderr": 0.023100371589225246}}, "versions": {"cola": 0}} \ No newline at end of file diff --git a/tests/testdata/copa-v0-loglikelihood b/tests/testdata/copa-v0-loglikelihood index 9636e9f8bd..ebe4c6512a 100644 --- a/tests/testdata/copa-v0-loglikelihood +++ b/tests/testdata/copa-v0-loglikelihood @@ -1 +1 @@ -66276b9045b5300cba4b81340db06f674f031fa0b8883714ad0d03be464cd799 +66276b9045b5300cba4b81340db06f674f031fa0b8883714ad0d03be464cd799 \ No newline at end of file diff --git a/tests/testdata/copa-v0-res.json b/tests/testdata/copa-v0-res.json index 659b3bddce..9a537ec768 100644 --- a/tests/testdata/copa-v0-res.json +++ b/tests/testdata/copa-v0-res.json @@ -1 +1 @@ -{"results": {"copa": {"acc": 0.48, "acc_stderr": 0.050211673156867795}}, "versions": {"copa": 0}} +{"results": {"copa": {"acc": 0.48, "acc_stderr": 0.050211673156867795}}, "versions": {"copa": 0}} \ No newline at end of file diff --git a/tests/testdata/coqa-v0-greedy_until b/tests/testdata/coqa-v0-greedy_until index fc8eff9390..c1a9e165a7 100644 --- a/tests/testdata/coqa-v0-greedy_until +++ b/tests/testdata/coqa-v0-greedy_until @@ -1 +1 @@ -4a8605d5deed0423ec095700251ed93325b45d320aca35d4ce1e94702094435e +4a8605d5deed0423ec095700251ed93325b45d320aca35d4ce1e94702094435e \ No newline at end of file diff --git a/tests/testdata/coqa-v0-res.json b/tests/testdata/coqa-v0-res.json index d91e4e6113..9ca8024e3b 100644 --- a/tests/testdata/coqa-v0-res.json +++ b/tests/testdata/coqa-v0-res.json @@ -1 +1 @@ -{"results": {"coqa": {"em": 0.0, "em_stderr": 0.0, "f1": 0.0, "f1_stderr": 0.0}}, "versions": {"coqa": 0}} +{"results": {"coqa": {"em": 0.0, "em_stderr": 0.0, "f1": 0.0, "f1_stderr": 0.0}}, "versions": {"coqa": 0}} \ No newline at end of file diff --git a/tests/testdata/coqa-v1-greedy_until b/tests/testdata/coqa-v1-greedy_until index 0669bc24c5..f6e3f64b18 100644 --- a/tests/testdata/coqa-v1-greedy_until +++ b/tests/testdata/coqa-v1-greedy_until @@ -1 +1 @@ -57581470b921435d40da97872bb1cfda6ecf963ccc4b0240a3b04e3fea8c8e3a +57581470b921435d40da97872bb1cfda6ecf963ccc4b0240a3b04e3fea8c8e3a \ No newline at end of file diff --git a/tests/testdata/coqa-v1-res.json b/tests/testdata/coqa-v1-res.json index 4778173060..7941ad6299 100644 --- a/tests/testdata/coqa-v1-res.json +++ b/tests/testdata/coqa-v1-res.json @@ -1 +1 @@ -{"results": {"coqa": {"em": 0.0, "em_stderr": 0.0, "f1": 0.0, "f1_stderr": 0.0}}, "versions": {"coqa": 1}} +{"results": {"coqa": {"em": 0.0, "em_stderr": 0.0, "f1": 0.0, "f1_stderr": 0.0}}, "versions": {"coqa": 1}} \ No newline at end of file diff --git a/tests/testdata/cycle_letters-v0-greedy_until b/tests/testdata/cycle_letters-v0-greedy_until index 5dcb1c46f0..9068a24ef5 100644 --- a/tests/testdata/cycle_letters-v0-greedy_until +++ b/tests/testdata/cycle_letters-v0-greedy_until @@ -1 +1 @@ -eb23f7d5de7528eefd8ed5f8054c402ff947319cccfef7195995946f99389201 +eb23f7d5de7528eefd8ed5f8054c402ff947319cccfef7195995946f99389201 \ No newline at end of file diff --git a/tests/testdata/cycle_letters-v0-res.json b/tests/testdata/cycle_letters-v0-res.json index 0048e6b709..5b05a9430e 100644 --- a/tests/testdata/cycle_letters-v0-res.json +++ b/tests/testdata/cycle_letters-v0-res.json @@ -1 +1 @@ -{"results": {"cycle_letters": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"cycle_letters": 0}} +{"results": {"cycle_letters": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"cycle_letters": 0}} \ No newline at end of file diff --git a/tests/testdata/drop-v0-greedy_until b/tests/testdata/drop-v0-greedy_until index 6a6f4da4ea..6470b349d2 100644 --- a/tests/testdata/drop-v0-greedy_until +++ b/tests/testdata/drop-v0-greedy_until @@ -1 +1 @@ -ca566c630d8ac853d5785d4b5c40a5137172c34b48af3350e1f79e6d548b36ba +ca566c630d8ac853d5785d4b5c40a5137172c34b48af3350e1f79e6d548b36ba \ No newline at end of file diff --git a/tests/testdata/drop-v0-res.json b/tests/testdata/drop-v0-res.json index a60d623006..9384ca72fe 100644 --- a/tests/testdata/drop-v0-res.json +++ b/tests/testdata/drop-v0-res.json @@ -1 +1 @@ -{"results": {"drop": {"em": 0.0, "em_stderr": 0.0, "f1": 0.0, "f1_stderr": 0.0}}, "versions": {"drop": 0}} +{"results": {"drop": {"em": 0.0, "em_stderr": 0.0, "f1": 0.0, "f1_stderr": 0.0}}, "versions": {"drop": 0}} \ No newline at end of file diff --git a/tests/testdata/drop-v1-greedy_until b/tests/testdata/drop-v1-greedy_until index 7695a37e7c..3b2b697c91 100644 --- a/tests/testdata/drop-v1-greedy_until +++ b/tests/testdata/drop-v1-greedy_until @@ -1 +1 @@ -a670f911ab2999d72db15f534b22703d19e7837edbda4f9f199ad587f7aae6b2 +a670f911ab2999d72db15f534b22703d19e7837edbda4f9f199ad587f7aae6b2 \ No newline at end of file diff --git a/tests/testdata/drop-v1-res.json b/tests/testdata/drop-v1-res.json index d11936576c..8f397b410d 100644 --- a/tests/testdata/drop-v1-res.json +++ b/tests/testdata/drop-v1-res.json @@ -1 +1 @@ -{"results": {"drop": {"em": 0.0, "em_stderr": 0.0, "f1": 0.0, "f1_stderr": 0.0}}, "versions": {"drop": 1}} +{"results": {"drop": {"em": 0.0, "em_stderr": 0.0, "f1": 0.0, "f1_stderr": 0.0}}, "versions": {"drop": 1}} \ No newline at end of file diff --git a/tests/testdata/ethics_cm-v0-loglikelihood b/tests/testdata/ethics_cm-v0-loglikelihood index 208dbc2829..69289144e0 100644 --- a/tests/testdata/ethics_cm-v0-loglikelihood +++ b/tests/testdata/ethics_cm-v0-loglikelihood @@ -1 +1 @@ -92d136ebb2bd86cd036e61699ad9a1417dbb48651f0a3afa5045cf57cef5a3f6 +92d136ebb2bd86cd036e61699ad9a1417dbb48651f0a3afa5045cf57cef5a3f6 \ No newline at end of file diff --git a/tests/testdata/ethics_cm-v0-res.json b/tests/testdata/ethics_cm-v0-res.json index 5234987304..f81a700903 100644 --- a/tests/testdata/ethics_cm-v0-res.json +++ b/tests/testdata/ethics_cm-v0-res.json @@ -1 +1 @@ -{"results": {"ethics_cm": {"acc": 0.49987129987129986, "acc_stderr": 0.008022881531793336}}, "versions": {"ethics_cm": 0}} +{"results": {"ethics_cm": {"acc": 0.49987129987129986, "acc_stderr": 0.008022881531793336}}, "versions": {"ethics_cm": 0}} \ No newline at end of file diff --git a/tests/testdata/ethics_deontology-v0-loglikelihood b/tests/testdata/ethics_deontology-v0-loglikelihood index 94ba432a0c..ab01349737 100644 --- a/tests/testdata/ethics_deontology-v0-loglikelihood +++ b/tests/testdata/ethics_deontology-v0-loglikelihood @@ -1 +1 @@ -74ecebe322457d70afc16fde848978410a09b854dc65c47f428d100bd1593248 +74ecebe322457d70afc16fde848978410a09b854dc65c47f428d100bd1593248 \ No newline at end of file diff --git a/tests/testdata/ethics_deontology-v0-res.json b/tests/testdata/ethics_deontology-v0-res.json index c8988b2188..3af24f414a 100644 --- a/tests/testdata/ethics_deontology-v0-res.json +++ b/tests/testdata/ethics_deontology-v0-res.json @@ -1 +1 @@ -{"results": {"ethics_deontology": {"acc": 0.503615127919911, "acc_stderr": 0.008338908432085105, "em": 0.07119021134593993}}, "versions": {"ethics_deontology": 0}} +{"results": {"ethics_deontology": {"acc": 0.503615127919911, "acc_stderr": 0.008338908432085105, "em": 0.07119021134593993}}, "versions": {"ethics_deontology": 0}} \ No newline at end of file diff --git a/tests/testdata/ethics_justice-v0-loglikelihood b/tests/testdata/ethics_justice-v0-loglikelihood index 4688ceb090..cc18a7e67b 100644 --- a/tests/testdata/ethics_justice-v0-loglikelihood +++ b/tests/testdata/ethics_justice-v0-loglikelihood @@ -1 +1 @@ -d7dfc44fea507b5c5c3a8218f79ed8197da8599ebb396d85feb91c25512126b6 +d7dfc44fea507b5c5c3a8218f79ed8197da8599ebb396d85feb91c25512126b6 \ No newline at end of file diff --git a/tests/testdata/ethics_justice-v0-res.json b/tests/testdata/ethics_justice-v0-res.json index 3a82972c3b..39efbc506a 100644 --- a/tests/testdata/ethics_justice-v0-res.json +++ b/tests/testdata/ethics_justice-v0-res.json @@ -1 +1 @@ -{"results": {"ethics_justice": {"acc": 0.49556213017751477, "acc_stderr": 0.009616784279885177, "em": 0.057692307692307696}}, "versions": {"ethics_justice": 0}} +{"results": {"ethics_justice": {"acc": 0.49556213017751477, "acc_stderr": 0.009616784279885177, "em": 0.057692307692307696}}, "versions": {"ethics_justice": 0}} \ No newline at end of file diff --git a/tests/testdata/ethics_utilitarianism-v0-loglikelihood b/tests/testdata/ethics_utilitarianism-v0-loglikelihood index ba1de12b7b..0c01f54880 100644 --- a/tests/testdata/ethics_utilitarianism-v0-loglikelihood +++ b/tests/testdata/ethics_utilitarianism-v0-loglikelihood @@ -1 +1 @@ -88872f1ed1b203f9649a4ced4fb4627d18c17af455d713de6e17c05eced4ec60 +88872f1ed1b203f9649a4ced4fb4627d18c17af455d713de6e17c05eced4ec60 \ No newline at end of file diff --git a/tests/testdata/ethics_utilitarianism-v0-res.json b/tests/testdata/ethics_utilitarianism-v0-res.json index a2bb3786bb..857af346b4 100644 --- a/tests/testdata/ethics_utilitarianism-v0-res.json +++ b/tests/testdata/ethics_utilitarianism-v0-res.json @@ -1 +1 @@ -{"results": {"ethics_utilitarianism": {"acc": 0.49771214642262895, "acc_stderr": 0.007211546310787838}}, "versions": {"ethics_utilitarianism": 0}} +{"results": {"ethics_utilitarianism": {"acc": 0.49771214642262895, "acc_stderr": 0.007211546310787838}}, "versions": {"ethics_utilitarianism": 0}} \ No newline at end of file diff --git a/tests/testdata/ethics_utilitarianism_original-v0-loglikelihood b/tests/testdata/ethics_utilitarianism_original-v0-loglikelihood index 34f7c0b13a..bd3ff6c459 100644 --- a/tests/testdata/ethics_utilitarianism_original-v0-loglikelihood +++ b/tests/testdata/ethics_utilitarianism_original-v0-loglikelihood @@ -1 +1 @@ -5b42ba1faf5ece6a6ec9a3976ce79c1fac8df5b98272aab85457188c2142693c +5b42ba1faf5ece6a6ec9a3976ce79c1fac8df5b98272aab85457188c2142693c \ No newline at end of file diff --git a/tests/testdata/ethics_utilitarianism_original-v0-res.json b/tests/testdata/ethics_utilitarianism_original-v0-res.json index b16cc34831..16940c8f5a 100644 --- a/tests/testdata/ethics_utilitarianism_original-v0-res.json +++ b/tests/testdata/ethics_utilitarianism_original-v0-res.json @@ -1 +1 @@ -{"results": {"ethics_utilitarianism_original": {"acc": 0.5214226289517471, "acc_stderr": 0.007204999520618661}}, "versions": {"ethics_utilitarianism_original": 0}} +{"results": {"ethics_utilitarianism_original": {"acc": 0.5214226289517471, "acc_stderr": 0.007204999520618661}}, "versions": {"ethics_utilitarianism_original": 0}} \ No newline at end of file diff --git a/tests/testdata/ethics_virtue-v0-loglikelihood b/tests/testdata/ethics_virtue-v0-loglikelihood index eb892a6cf6..48652c4689 100644 --- a/tests/testdata/ethics_virtue-v0-loglikelihood +++ b/tests/testdata/ethics_virtue-v0-loglikelihood @@ -1 +1 @@ -8021db8de46850090ddae6e6ec2d382029c3027b7c69884607503f916d09b709 +8021db8de46850090ddae6e6ec2d382029c3027b7c69884607503f916d09b709 \ No newline at end of file diff --git a/tests/testdata/ethics_virtue-v0-res.json b/tests/testdata/ethics_virtue-v0-res.json index cb98c99e62..cf3e02d826 100644 --- a/tests/testdata/ethics_virtue-v0-res.json +++ b/tests/testdata/ethics_virtue-v0-res.json @@ -1 +1 @@ -{"results": {"ethics_virtue": {"acc": 0.5035175879396985, "acc_stderr": 0.0070893491553555765, "em": 0.036180904522613064}}, "versions": {"ethics_virtue": 0}} +{"results": {"ethics_virtue": {"acc": 0.5035175879396985, "acc_stderr": 0.0070893491553555765, "em": 0.036180904522613064}}, "versions": {"ethics_virtue": 0}} \ No newline at end of file diff --git a/tests/testdata/gsm8k-v0-greedy_until b/tests/testdata/gsm8k-v0-greedy_until index 43a57cab39..d49400007f 100644 --- a/tests/testdata/gsm8k-v0-greedy_until +++ b/tests/testdata/gsm8k-v0-greedy_until @@ -1 +1 @@ -e7292dbdd7fd8419ba954f2e0701e04c8d0e8842fe053dbf2fe47d926630e35e +e7292dbdd7fd8419ba954f2e0701e04c8d0e8842fe053dbf2fe47d926630e35e \ No newline at end of file diff --git a/tests/testdata/gsm8k-v0-res.json b/tests/testdata/gsm8k-v0-res.json index f542395c78..fb6514a0e7 100644 --- a/tests/testdata/gsm8k-v0-res.json +++ b/tests/testdata/gsm8k-v0-res.json @@ -1 +1 @@ -{"results": {"gsm8k": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"gsm8k": 0}} +{"results": {"gsm8k": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"gsm8k": 0}} \ No newline at end of file diff --git a/tests/testdata/headqa-v0-loglikelihood b/tests/testdata/headqa-v0-loglikelihood index e991bf5a0b..9129d834b6 100644 --- a/tests/testdata/headqa-v0-loglikelihood +++ b/tests/testdata/headqa-v0-loglikelihood @@ -1 +1 @@ -767ca34d9714edd9fb030ddbcc35a64e5180d1e247b0cb557fbb22fdf971ad1f +767ca34d9714edd9fb030ddbcc35a64e5180d1e247b0cb557fbb22fdf971ad1f \ No newline at end of file diff --git a/tests/testdata/headqa-v0-res.json b/tests/testdata/headqa-v0-res.json index dba0624600..adc093cf62 100644 --- a/tests/testdata/headqa-v0-res.json +++ b/tests/testdata/headqa-v0-res.json @@ -1 +1 @@ -{"results": {"headqa": {"acc": 0.23559445660102116, "acc_norm": 0.25018234865062, "acc_norm_stderr": 0.008272783230806014, "acc_stderr": 0.008105688874297972}}, "versions": {"headqa": 0}} +{"results": {"headqa": {"acc": 0.23559445660102116, "acc_norm": 0.25018234865062, "acc_norm_stderr": 0.008272783230806014, "acc_stderr": 0.008105688874297972}}, "versions": {"headqa": 0}} \ No newline at end of file diff --git a/tests/testdata/headqa_en-v0-loglikelihood b/tests/testdata/headqa_en-v0-loglikelihood index c8d26b86e2..11f07878fb 100644 --- a/tests/testdata/headqa_en-v0-loglikelihood +++ b/tests/testdata/headqa_en-v0-loglikelihood @@ -1 +1 @@ -09da45119b12a0144e3081f8fb790c2a22af7b9c3aac42f54423d348a711fbf5 +09da45119b12a0144e3081f8fb790c2a22af7b9c3aac42f54423d348a711fbf5 \ No newline at end of file diff --git a/tests/testdata/headqa_en-v0-res.json b/tests/testdata/headqa_en-v0-res.json index b2353a4d44..6ac5a9c0b8 100644 --- a/tests/testdata/headqa_en-v0-res.json +++ b/tests/testdata/headqa_en-v0-res.json @@ -1 +1 @@ -{"results": {"headqa_en": {"acc": 0.23559445660102116, "acc_norm": 0.2447118891320204, "acc_norm_stderr": 0.008211629406841468, "acc_stderr": 0.008105688874297972}}, "versions": {"headqa_en": 0}} +{"results": {"headqa_en": {"acc": 0.23559445660102116, "acc_norm": 0.2447118891320204, "acc_norm_stderr": 0.008211629406841468, "acc_stderr": 0.008105688874297972}}, "versions": {"headqa_en": 0}} \ No newline at end of file diff --git a/tests/testdata/headqa_es-v0-loglikelihood b/tests/testdata/headqa_es-v0-loglikelihood index e991bf5a0b..9129d834b6 100644 --- a/tests/testdata/headqa_es-v0-loglikelihood +++ b/tests/testdata/headqa_es-v0-loglikelihood @@ -1 +1 @@ -767ca34d9714edd9fb030ddbcc35a64e5180d1e247b0cb557fbb22fdf971ad1f +767ca34d9714edd9fb030ddbcc35a64e5180d1e247b0cb557fbb22fdf971ad1f \ No newline at end of file diff --git a/tests/testdata/headqa_es-v0-res.json b/tests/testdata/headqa_es-v0-res.json index 878dd5197a..0964db9bbb 100644 --- a/tests/testdata/headqa_es-v0-res.json +++ b/tests/testdata/headqa_es-v0-res.json @@ -1 +1 @@ -{"results": {"headqa_es": {"acc": 0.23559445660102116, "acc_norm": 0.25018234865062, "acc_norm_stderr": 0.008272783230806014, "acc_stderr": 0.008105688874297972}}, "versions": {"headqa_es": 0}} +{"results": {"headqa_es": {"acc": 0.23559445660102116, "acc_norm": 0.25018234865062, "acc_norm_stderr": 0.008272783230806014, "acc_stderr": 0.008105688874297972}}, "versions": {"headqa_es": 0}} \ No newline at end of file diff --git a/tests/testdata/hellaswag-v0-loglikelihood b/tests/testdata/hellaswag-v0-loglikelihood index 96ad6cac9a..c679a3e311 100644 --- a/tests/testdata/hellaswag-v0-loglikelihood +++ b/tests/testdata/hellaswag-v0-loglikelihood @@ -1 +1 @@ -abb808c97d6529eda6c11067837a132c62d25cba0394d720f80cca6df9f7196e +abb808c97d6529eda6c11067837a132c62d25cba0394d720f80cca6df9f7196e \ No newline at end of file diff --git a/tests/testdata/hellaswag-v0-res.json b/tests/testdata/hellaswag-v0-res.json index 8f24fceed4..6be94a6409 100644 --- a/tests/testdata/hellaswag-v0-res.json +++ b/tests/testdata/hellaswag-v0-res.json @@ -1 +1 @@ -{"results": {"hellaswag": {"acc": 0.24965146385182235, "acc_norm": 0.24756024696275641, "acc_norm_stderr": 0.004307128573285236, "acc_stderr": 0.004319267432460666}}, "versions": {"hellaswag": 0}} +{"results": {"hellaswag": {"acc": 0.24965146385182235, "acc_norm": 0.24756024696275641, "acc_norm_stderr": 0.004307128573285236, "acc_stderr": 0.004319267432460666}}, "versions": {"hellaswag": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-abstract_algebra-v0-loglikelihood b/tests/testdata/hendrycksTest-abstract_algebra-v0-loglikelihood index 7563b14750..d0d0fe872b 100644 --- a/tests/testdata/hendrycksTest-abstract_algebra-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-abstract_algebra-v0-loglikelihood @@ -1 +1 @@ -e35d1eeb356ac1084d4e9773f028cb3c81ba1c6e5574d598ac4a78aa467cd797 +e35d1eeb356ac1084d4e9773f028cb3c81ba1c6e5574d598ac4a78aa467cd797 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-abstract_algebra-v0-res.json b/tests/testdata/hendrycksTest-abstract_algebra-v0-res.json index 7fc82d2528..dc2c9a0d7d 100644 --- a/tests/testdata/hendrycksTest-abstract_algebra-v0-res.json +++ b/tests/testdata/hendrycksTest-abstract_algebra-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-abstract_algebra": {"acc": 0.32, "acc_norm": 0.34, "acc_norm_stderr": 0.04760952285695235, "acc_stderr": 0.04688261722621504}}, "versions": {"hendrycksTest-abstract_algebra": 0}} +{"results": {"hendrycksTest-abstract_algebra": {"acc": 0.32, "acc_norm": 0.34, "acc_norm_stderr": 0.04760952285695235, "acc_stderr": 0.04688261722621504}}, "versions": {"hendrycksTest-abstract_algebra": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-anatomy-v0-loglikelihood b/tests/testdata/hendrycksTest-anatomy-v0-loglikelihood index fe9b56c6eb..a7ae5fa705 100644 --- a/tests/testdata/hendrycksTest-anatomy-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-anatomy-v0-loglikelihood @@ -1 +1 @@ -bf05e04ed8cf61cf3aad294ed3f5a16137775ffdd20f1b129022ddffc1251768 +bf05e04ed8cf61cf3aad294ed3f5a16137775ffdd20f1b129022ddffc1251768 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-anatomy-v0-res.json b/tests/testdata/hendrycksTest-anatomy-v0-res.json index 32086e1f03..67bc2e7be6 100644 --- a/tests/testdata/hendrycksTest-anatomy-v0-res.json +++ b/tests/testdata/hendrycksTest-anatomy-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-anatomy": {"acc": 0.2222222222222222, "acc_norm": 0.23703703703703705, "acc_norm_stderr": 0.03673731683969506, "acc_stderr": 0.0359144408419697}}, "versions": {"hendrycksTest-anatomy": 0}} +{"results": {"hendrycksTest-anatomy": {"acc": 0.2222222222222222, "acc_norm": 0.23703703703703705, "acc_norm_stderr": 0.03673731683969506, "acc_stderr": 0.0359144408419697}}, "versions": {"hendrycksTest-anatomy": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-astronomy-v0-loglikelihood b/tests/testdata/hendrycksTest-astronomy-v0-loglikelihood index 1a2082a1cb..8ecb637cfe 100644 --- a/tests/testdata/hendrycksTest-astronomy-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-astronomy-v0-loglikelihood @@ -1 +1 @@ -bed1e47127cc2893c6aef63b9a0909cca31aa351a703da2a166b01cae03c3311 +bed1e47127cc2893c6aef63b9a0909cca31aa351a703da2a166b01cae03c3311 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-astronomy-v0-res.json b/tests/testdata/hendrycksTest-astronomy-v0-res.json index c9754d0619..d3626ccf80 100644 --- a/tests/testdata/hendrycksTest-astronomy-v0-res.json +++ b/tests/testdata/hendrycksTest-astronomy-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-astronomy": {"acc": 0.2565789473684211, "acc_norm": 0.29605263157894735, "acc_norm_stderr": 0.03715062154998904, "acc_stderr": 0.0355418036802569}}, "versions": {"hendrycksTest-astronomy": 0}} +{"results": {"hendrycksTest-astronomy": {"acc": 0.2565789473684211, "acc_norm": 0.29605263157894735, "acc_norm_stderr": 0.03715062154998904, "acc_stderr": 0.0355418036802569}}, "versions": {"hendrycksTest-astronomy": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-business_ethics-v0-loglikelihood b/tests/testdata/hendrycksTest-business_ethics-v0-loglikelihood index 6a4bc72e2a..a0f8b7c09b 100644 --- a/tests/testdata/hendrycksTest-business_ethics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-business_ethics-v0-loglikelihood @@ -1 +1 @@ -b3b27e9dbad587377d3c8cab1072782de883e245da93a563bd8b3099017b1fc0 +b3b27e9dbad587377d3c8cab1072782de883e245da93a563bd8b3099017b1fc0 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-business_ethics-v0-res.json b/tests/testdata/hendrycksTest-business_ethics-v0-res.json index b57f8c3e87..dcc5116204 100644 --- a/tests/testdata/hendrycksTest-business_ethics-v0-res.json +++ b/tests/testdata/hendrycksTest-business_ethics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-business_ethics": {"acc": 0.29, "acc_norm": 0.27, "acc_norm_stderr": 0.044619604333847394, "acc_stderr": 0.045604802157206845}}, "versions": {"hendrycksTest-business_ethics": 0}} +{"results": {"hendrycksTest-business_ethics": {"acc": 0.29, "acc_norm": 0.27, "acc_norm_stderr": 0.044619604333847394, "acc_stderr": 0.045604802157206845}}, "versions": {"hendrycksTest-business_ethics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-clinical_knowledge-v0-loglikelihood b/tests/testdata/hendrycksTest-clinical_knowledge-v0-loglikelihood index 6734238740..86f54245d5 100644 --- a/tests/testdata/hendrycksTest-clinical_knowledge-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-clinical_knowledge-v0-loglikelihood @@ -1 +1 @@ -fbcb7ce507e0675d811e71e10a67c8d05a6605e29036f46776e04a6588cefbda +fbcb7ce507e0675d811e71e10a67c8d05a6605e29036f46776e04a6588cefbda \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-clinical_knowledge-v0-res.json b/tests/testdata/hendrycksTest-clinical_knowledge-v0-res.json index 7277e99c86..596bb28a93 100644 --- a/tests/testdata/hendrycksTest-clinical_knowledge-v0-res.json +++ b/tests/testdata/hendrycksTest-clinical_knowledge-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-clinical_knowledge": {"acc": 0.23773584905660378, "acc_norm": 0.27169811320754716, "acc_norm_stderr": 0.027377706624670713, "acc_stderr": 0.02619980880756191}}, "versions": {"hendrycksTest-clinical_knowledge": 0}} +{"results": {"hendrycksTest-clinical_knowledge": {"acc": 0.23773584905660378, "acc_norm": 0.27169811320754716, "acc_norm_stderr": 0.027377706624670713, "acc_stderr": 0.02619980880756191}}, "versions": {"hendrycksTest-clinical_knowledge": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_biology-v0-loglikelihood b/tests/testdata/hendrycksTest-college_biology-v0-loglikelihood index 0041165ecb..7f665ef4a1 100644 --- a/tests/testdata/hendrycksTest-college_biology-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-college_biology-v0-loglikelihood @@ -1 +1 @@ -c29e4e67ff91af29b9434884874414d1b1b32ccc32903c6b1639469b19907419 +c29e4e67ff91af29b9434884874414d1b1b32ccc32903c6b1639469b19907419 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_biology-v0-res.json b/tests/testdata/hendrycksTest-college_biology-v0-res.json index fb3cd80392..6705b9cad2 100644 --- a/tests/testdata/hendrycksTest-college_biology-v0-res.json +++ b/tests/testdata/hendrycksTest-college_biology-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-college_biology": {"acc": 0.24305555555555555, "acc_norm": 0.2361111111111111, "acc_norm_stderr": 0.03551446610810826, "acc_stderr": 0.03586879280080341}}, "versions": {"hendrycksTest-college_biology": 0}} +{"results": {"hendrycksTest-college_biology": {"acc": 0.24305555555555555, "acc_norm": 0.2361111111111111, "acc_norm_stderr": 0.03551446610810826, "acc_stderr": 0.03586879280080341}}, "versions": {"hendrycksTest-college_biology": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_chemistry-v0-loglikelihood b/tests/testdata/hendrycksTest-college_chemistry-v0-loglikelihood index c35a97a4dc..52a255e82a 100644 --- a/tests/testdata/hendrycksTest-college_chemistry-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-college_chemistry-v0-loglikelihood @@ -1 +1 @@ -044752b21540db95118b8cbe7e75c4c9b8758e27df56543deaeadec7f749a28d +044752b21540db95118b8cbe7e75c4c9b8758e27df56543deaeadec7f749a28d \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_chemistry-v0-res.json b/tests/testdata/hendrycksTest-college_chemistry-v0-res.json index 91bbb16f1a..4dc95a151a 100644 --- a/tests/testdata/hendrycksTest-college_chemistry-v0-res.json +++ b/tests/testdata/hendrycksTest-college_chemistry-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-college_chemistry": {"acc": 0.28, "acc_norm": 0.26, "acc_norm_stderr": 0.04408440022768078, "acc_stderr": 0.04512608598542127}}, "versions": {"hendrycksTest-college_chemistry": 0}} +{"results": {"hendrycksTest-college_chemistry": {"acc": 0.28, "acc_norm": 0.26, "acc_norm_stderr": 0.04408440022768078, "acc_stderr": 0.04512608598542127}}, "versions": {"hendrycksTest-college_chemistry": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_computer_science-v0-loglikelihood b/tests/testdata/hendrycksTest-college_computer_science-v0-loglikelihood index c9076ffdd8..695bc8c315 100644 --- a/tests/testdata/hendrycksTest-college_computer_science-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-college_computer_science-v0-loglikelihood @@ -1 +1 @@ -4ea26ad780290429ac5a3317559c154848d662bd40532c966458ba6f2a32d0a3 +4ea26ad780290429ac5a3317559c154848d662bd40532c966458ba6f2a32d0a3 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_computer_science-v0-res.json b/tests/testdata/hendrycksTest-college_computer_science-v0-res.json index 82ba6d8d7f..aea595c09f 100644 --- a/tests/testdata/hendrycksTest-college_computer_science-v0-res.json +++ b/tests/testdata/hendrycksTest-college_computer_science-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-college_computer_science": {"acc": 0.22, "acc_norm": 0.24, "acc_norm_stderr": 0.04292346959909282, "acc_stderr": 0.041633319989322695}}, "versions": {"hendrycksTest-college_computer_science": 0}} +{"results": {"hendrycksTest-college_computer_science": {"acc": 0.22, "acc_norm": 0.24, "acc_norm_stderr": 0.04292346959909282, "acc_stderr": 0.041633319989322695}}, "versions": {"hendrycksTest-college_computer_science": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_mathematics-v0-loglikelihood b/tests/testdata/hendrycksTest-college_mathematics-v0-loglikelihood index 69d6270c27..a840b6b642 100644 --- a/tests/testdata/hendrycksTest-college_mathematics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-college_mathematics-v0-loglikelihood @@ -1 +1 @@ -e9fe80752686527281f834d2397875b4580581434b94799f9de6aaa450bd73ff +e9fe80752686527281f834d2397875b4580581434b94799f9de6aaa450bd73ff \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_mathematics-v0-res.json b/tests/testdata/hendrycksTest-college_mathematics-v0-res.json index d3b2fce1ed..766b3388ed 100644 --- a/tests/testdata/hendrycksTest-college_mathematics-v0-res.json +++ b/tests/testdata/hendrycksTest-college_mathematics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-college_mathematics": {"acc": 0.18, "acc_norm": 0.2, "acc_norm_stderr": 0.04020151261036844, "acc_stderr": 0.038612291966536955}}, "versions": {"hendrycksTest-college_mathematics": 0}} +{"results": {"hendrycksTest-college_mathematics": {"acc": 0.18, "acc_norm": 0.2, "acc_norm_stderr": 0.04020151261036844, "acc_stderr": 0.038612291966536955}}, "versions": {"hendrycksTest-college_mathematics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_medicine-v0-loglikelihood b/tests/testdata/hendrycksTest-college_medicine-v0-loglikelihood index 8ce2673f48..2fb96497d1 100644 --- a/tests/testdata/hendrycksTest-college_medicine-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-college_medicine-v0-loglikelihood @@ -1 +1 @@ -dd6e0a9be1407890e9f8cd4434fb6aa4752ab3d2473837fd465ad99f60ad685e +dd6e0a9be1407890e9f8cd4434fb6aa4752ab3d2473837fd465ad99f60ad685e \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_medicine-v0-res.json b/tests/testdata/hendrycksTest-college_medicine-v0-res.json index 2045d09d11..524552c9bb 100644 --- a/tests/testdata/hendrycksTest-college_medicine-v0-res.json +++ b/tests/testdata/hendrycksTest-college_medicine-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-college_medicine": {"acc": 0.27167630057803466, "acc_norm": 0.2543352601156069, "acc_norm_stderr": 0.0332055644308557, "acc_stderr": 0.03391750322321659}}, "versions": {"hendrycksTest-college_medicine": 0}} +{"results": {"hendrycksTest-college_medicine": {"acc": 0.27167630057803466, "acc_norm": 0.2543352601156069, "acc_norm_stderr": 0.0332055644308557, "acc_stderr": 0.03391750322321659}}, "versions": {"hendrycksTest-college_medicine": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_physics-v0-loglikelihood b/tests/testdata/hendrycksTest-college_physics-v0-loglikelihood index 176fd28e6b..7c2e2f4bf7 100644 --- a/tests/testdata/hendrycksTest-college_physics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-college_physics-v0-loglikelihood @@ -1 +1 @@ -704a7671ef981fb95594782bc446dd632e87ebdbe89436a0603b714fb5786c75 +704a7671ef981fb95594782bc446dd632e87ebdbe89436a0603b714fb5786c75 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-college_physics-v0-res.json b/tests/testdata/hendrycksTest-college_physics-v0-res.json index 5b31788208..97e56f2ae6 100644 --- a/tests/testdata/hendrycksTest-college_physics-v0-res.json +++ b/tests/testdata/hendrycksTest-college_physics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-college_physics": {"acc": 0.23529411764705882, "acc_norm": 0.23529411764705882, "acc_norm_stderr": 0.04220773659171453, "acc_stderr": 0.04220773659171452}}, "versions": {"hendrycksTest-college_physics": 0}} +{"results": {"hendrycksTest-college_physics": {"acc": 0.23529411764705882, "acc_norm": 0.23529411764705882, "acc_norm_stderr": 0.04220773659171453, "acc_stderr": 0.04220773659171452}}, "versions": {"hendrycksTest-college_physics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-computer_security-v0-loglikelihood b/tests/testdata/hendrycksTest-computer_security-v0-loglikelihood index 149f9afbfd..d4c0ee2d78 100644 --- a/tests/testdata/hendrycksTest-computer_security-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-computer_security-v0-loglikelihood @@ -1 +1 @@ -a8a1892d1906cc3e7ffd321043f0a60f3b8b69ef76e5c6ff03c6ea41dc87d0cb +a8a1892d1906cc3e7ffd321043f0a60f3b8b69ef76e5c6ff03c6ea41dc87d0cb \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-computer_security-v0-res.json b/tests/testdata/hendrycksTest-computer_security-v0-res.json index e624212b1d..60f02eba9c 100644 --- a/tests/testdata/hendrycksTest-computer_security-v0-res.json +++ b/tests/testdata/hendrycksTest-computer_security-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-computer_security": {"acc": 0.24, "acc_norm": 0.27, "acc_norm_stderr": 0.044619604333847394, "acc_stderr": 0.042923469599092816}}, "versions": {"hendrycksTest-computer_security": 0}} +{"results": {"hendrycksTest-computer_security": {"acc": 0.24, "acc_norm": 0.27, "acc_norm_stderr": 0.044619604333847394, "acc_stderr": 0.042923469599092816}}, "versions": {"hendrycksTest-computer_security": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-conceptual_physics-v0-loglikelihood b/tests/testdata/hendrycksTest-conceptual_physics-v0-loglikelihood index bffd2ada62..05c4db0e22 100644 --- a/tests/testdata/hendrycksTest-conceptual_physics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-conceptual_physics-v0-loglikelihood @@ -1 +1 @@ -622f191ccfc7a597d99f39897ebe3f95a9ddce0e662fcfb411aa554b289bb355 +622f191ccfc7a597d99f39897ebe3f95a9ddce0e662fcfb411aa554b289bb355 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-conceptual_physics-v0-res.json b/tests/testdata/hendrycksTest-conceptual_physics-v0-res.json index d60c387ab7..1388bcdcd9 100644 --- a/tests/testdata/hendrycksTest-conceptual_physics-v0-res.json +++ b/tests/testdata/hendrycksTest-conceptual_physics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-conceptual_physics": {"acc": 0.2680851063829787, "acc_norm": 0.2553191489361702, "acc_norm_stderr": 0.028504856470514185, "acc_stderr": 0.028957342788342347}}, "versions": {"hendrycksTest-conceptual_physics": 0}} +{"results": {"hendrycksTest-conceptual_physics": {"acc": 0.2680851063829787, "acc_norm": 0.2553191489361702, "acc_norm_stderr": 0.028504856470514185, "acc_stderr": 0.028957342788342347}}, "versions": {"hendrycksTest-conceptual_physics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-econometrics-v0-loglikelihood b/tests/testdata/hendrycksTest-econometrics-v0-loglikelihood index 80b70a8300..ed3332edda 100644 --- a/tests/testdata/hendrycksTest-econometrics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-econometrics-v0-loglikelihood @@ -1 +1 @@ -cde76ba2c7382b4876e17136c94f52aca2774e50342ab757b2a2d18da370dcb6 +cde76ba2c7382b4876e17136c94f52aca2774e50342ab757b2a2d18da370dcb6 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-econometrics-v0-res.json b/tests/testdata/hendrycksTest-econometrics-v0-res.json index d56eb5a560..4656fac3c3 100644 --- a/tests/testdata/hendrycksTest-econometrics-v0-res.json +++ b/tests/testdata/hendrycksTest-econometrics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-econometrics": {"acc": 0.24561403508771928, "acc_norm": 0.24561403508771928, "acc_norm_stderr": 0.04049339297748142, "acc_stderr": 0.040493392977481425}}, "versions": {"hendrycksTest-econometrics": 0}} +{"results": {"hendrycksTest-econometrics": {"acc": 0.24561403508771928, "acc_norm": 0.24561403508771928, "acc_norm_stderr": 0.04049339297748142, "acc_stderr": 0.040493392977481425}}, "versions": {"hendrycksTest-econometrics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-electrical_engineering-v0-loglikelihood b/tests/testdata/hendrycksTest-electrical_engineering-v0-loglikelihood index 7311e57c5a..9c9e72efdf 100644 --- a/tests/testdata/hendrycksTest-electrical_engineering-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-electrical_engineering-v0-loglikelihood @@ -1 +1 @@ -b9b5d8b8bb02696302ec6bc2a99bf987a5504d3bae0e529d2c8f263538c97518 +b9b5d8b8bb02696302ec6bc2a99bf987a5504d3bae0e529d2c8f263538c97518 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-electrical_engineering-v0-res.json b/tests/testdata/hendrycksTest-electrical_engineering-v0-res.json index 2dacd09ebb..13b76c1d5f 100644 --- a/tests/testdata/hendrycksTest-electrical_engineering-v0-res.json +++ b/tests/testdata/hendrycksTest-electrical_engineering-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-electrical_engineering": {"acc": 0.2689655172413793, "acc_norm": 0.2827586206896552, "acc_norm_stderr": 0.037528339580033376, "acc_stderr": 0.036951833116502325}}, "versions": {"hendrycksTest-electrical_engineering": 0}} +{"results": {"hendrycksTest-electrical_engineering": {"acc": 0.2689655172413793, "acc_norm": 0.2827586206896552, "acc_norm_stderr": 0.037528339580033376, "acc_stderr": 0.036951833116502325}}, "versions": {"hendrycksTest-electrical_engineering": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-elementary_mathematics-v0-loglikelihood b/tests/testdata/hendrycksTest-elementary_mathematics-v0-loglikelihood index cac4a7eaa1..e281f72feb 100644 --- a/tests/testdata/hendrycksTest-elementary_mathematics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-elementary_mathematics-v0-loglikelihood @@ -1 +1 @@ -6b21f5cd5606268421a667152ec989424b66905c02adbab8d4ff6bb9d21b77d1 +6b21f5cd5606268421a667152ec989424b66905c02adbab8d4ff6bb9d21b77d1 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-elementary_mathematics-v0-res.json b/tests/testdata/hendrycksTest-elementary_mathematics-v0-res.json index 19947fd21e..84cd983ee9 100644 --- a/tests/testdata/hendrycksTest-elementary_mathematics-v0-res.json +++ b/tests/testdata/hendrycksTest-elementary_mathematics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-elementary_mathematics": {"acc": 0.2724867724867725, "acc_norm": 0.2830687830687831, "acc_norm_stderr": 0.023201392938194978, "acc_stderr": 0.022930973071633345}}, "versions": {"hendrycksTest-elementary_mathematics": 0}} +{"results": {"hendrycksTest-elementary_mathematics": {"acc": 0.2724867724867725, "acc_norm": 0.2830687830687831, "acc_norm_stderr": 0.023201392938194978, "acc_stderr": 0.022930973071633345}}, "versions": {"hendrycksTest-elementary_mathematics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-formal_logic-v0-loglikelihood b/tests/testdata/hendrycksTest-formal_logic-v0-loglikelihood index 8ac034a4e5..ef6bec3f70 100644 --- a/tests/testdata/hendrycksTest-formal_logic-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-formal_logic-v0-loglikelihood @@ -1 +1 @@ -c0d0f0c008a5f3faf2f6f4268d87bbc09c40bb66ae08cf38eea0bf2e519c5a59 +c0d0f0c008a5f3faf2f6f4268d87bbc09c40bb66ae08cf38eea0bf2e519c5a59 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-formal_logic-v0-res.json b/tests/testdata/hendrycksTest-formal_logic-v0-res.json index 3ee6766b7f..acde01d4d7 100644 --- a/tests/testdata/hendrycksTest-formal_logic-v0-res.json +++ b/tests/testdata/hendrycksTest-formal_logic-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-formal_logic": {"acc": 0.25396825396825395, "acc_norm": 0.2698412698412698, "acc_norm_stderr": 0.03970158273235172, "acc_stderr": 0.03893259610604674}}, "versions": {"hendrycksTest-formal_logic": 0}} +{"results": {"hendrycksTest-formal_logic": {"acc": 0.25396825396825395, "acc_norm": 0.2698412698412698, "acc_norm_stderr": 0.03970158273235172, "acc_stderr": 0.03893259610604674}}, "versions": {"hendrycksTest-formal_logic": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-global_facts-v0-loglikelihood b/tests/testdata/hendrycksTest-global_facts-v0-loglikelihood index 8c92f96a48..a4751fdbfa 100644 --- a/tests/testdata/hendrycksTest-global_facts-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-global_facts-v0-loglikelihood @@ -1 +1 @@ -9fdc85240b8170839278b1e883ee0868611d84dce202cb8aa037c841ec76d089 +9fdc85240b8170839278b1e883ee0868611d84dce202cb8aa037c841ec76d089 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-global_facts-v0-res.json b/tests/testdata/hendrycksTest-global_facts-v0-res.json index 94a30256e1..d2fff47bcb 100644 --- a/tests/testdata/hendrycksTest-global_facts-v0-res.json +++ b/tests/testdata/hendrycksTest-global_facts-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-global_facts": {"acc": 0.23, "acc_norm": 0.23, "acc_norm_stderr": 0.04229525846816507, "acc_stderr": 0.04229525846816507}}, "versions": {"hendrycksTest-global_facts": 0}} +{"results": {"hendrycksTest-global_facts": {"acc": 0.23, "acc_norm": 0.23, "acc_norm_stderr": 0.04229525846816507, "acc_stderr": 0.04229525846816507}}, "versions": {"hendrycksTest-global_facts": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_biology-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_biology-v0-loglikelihood index d38bb991d9..1e2c01e2b1 100644 --- a/tests/testdata/hendrycksTest-high_school_biology-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_biology-v0-loglikelihood @@ -1 +1 @@ -d4dc051f37a49dc75c218741e87bc826fd44f31ee1309b55e0f33bd191c1bc78 +d4dc051f37a49dc75c218741e87bc826fd44f31ee1309b55e0f33bd191c1bc78 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_biology-v0-res.json b/tests/testdata/hendrycksTest-high_school_biology-v0-res.json index c581516ebb..a666d9ce9c 100644 --- a/tests/testdata/hendrycksTest-high_school_biology-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_biology-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_biology": {"acc": 0.23870967741935484, "acc_norm": 0.2709677419354839, "acc_norm_stderr": 0.025284416114900152, "acc_stderr": 0.024251071262208834}}, "versions": {"hendrycksTest-high_school_biology": 0}} +{"results": {"hendrycksTest-high_school_biology": {"acc": 0.23870967741935484, "acc_norm": 0.2709677419354839, "acc_norm_stderr": 0.025284416114900152, "acc_stderr": 0.024251071262208834}}, "versions": {"hendrycksTest-high_school_biology": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_chemistry-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_chemistry-v0-loglikelihood index a519bee854..d0ca97d6a5 100644 --- a/tests/testdata/hendrycksTest-high_school_chemistry-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_chemistry-v0-loglikelihood @@ -1 +1 @@ -f4f338e45415c4b5ee7f1d249155bcd910c8401bd1436760a5ec61cb6bb211b6 +f4f338e45415c4b5ee7f1d249155bcd910c8401bd1436760a5ec61cb6bb211b6 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_chemistry-v0-res.json b/tests/testdata/hendrycksTest-high_school_chemistry-v0-res.json index 00a28f1755..2d81594963 100644 --- a/tests/testdata/hendrycksTest-high_school_chemistry-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_chemistry-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_chemistry": {"acc": 0.2857142857142857, "acc_norm": 0.2660098522167488, "acc_norm_stderr": 0.031089826002937523, "acc_stderr": 0.031785297106427496}}, "versions": {"hendrycksTest-high_school_chemistry": 0}} +{"results": {"hendrycksTest-high_school_chemistry": {"acc": 0.2857142857142857, "acc_norm": 0.2660098522167488, "acc_norm_stderr": 0.031089826002937523, "acc_stderr": 0.031785297106427496}}, "versions": {"hendrycksTest-high_school_chemistry": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_computer_science-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_computer_science-v0-loglikelihood index 8d175c549b..a421564657 100644 --- a/tests/testdata/hendrycksTest-high_school_computer_science-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_computer_science-v0-loglikelihood @@ -1 +1 @@ -870d5a6300c527077aaf6baa3e750e75fa840b41657cf82549f39b768b14862d +870d5a6300c527077aaf6baa3e750e75fa840b41657cf82549f39b768b14862d \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_computer_science-v0-res.json b/tests/testdata/hendrycksTest-high_school_computer_science-v0-res.json index 2a27c1641d..bbc2dacf5f 100644 --- a/tests/testdata/hendrycksTest-high_school_computer_science-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_computer_science-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_computer_science": {"acc": 0.2, "acc_norm": 0.22, "acc_norm_stderr": 0.04163331998932269, "acc_stderr": 0.04020151261036845}}, "versions": {"hendrycksTest-high_school_computer_science": 0}} +{"results": {"hendrycksTest-high_school_computer_science": {"acc": 0.2, "acc_norm": 0.22, "acc_norm_stderr": 0.04163331998932269, "acc_stderr": 0.04020151261036845}}, "versions": {"hendrycksTest-high_school_computer_science": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_european_history-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_european_history-v0-loglikelihood index c900c590d6..eec5858ef9 100644 --- a/tests/testdata/hendrycksTest-high_school_european_history-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_european_history-v0-loglikelihood @@ -1 +1 @@ -d8070e113be9d420fef5578cb69c70df4ea5118f9b18553023fd9efd5ff0b7f4 +d8070e113be9d420fef5578cb69c70df4ea5118f9b18553023fd9efd5ff0b7f4 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_european_history-v0-res.json b/tests/testdata/hendrycksTest-high_school_european_history-v0-res.json index 0dcb282bcd..b5cea9cbe3 100644 --- a/tests/testdata/hendrycksTest-high_school_european_history-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_european_history-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_european_history": {"acc": 0.23636363636363636, "acc_norm": 0.24242424242424243, "acc_norm_stderr": 0.03346409881055953, "acc_stderr": 0.033175059300091805}}, "versions": {"hendrycksTest-high_school_european_history": 0}} +{"results": {"hendrycksTest-high_school_european_history": {"acc": 0.23636363636363636, "acc_norm": 0.24242424242424243, "acc_norm_stderr": 0.03346409881055953, "acc_stderr": 0.033175059300091805}}, "versions": {"hendrycksTest-high_school_european_history": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_geography-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_geography-v0-loglikelihood index 5d28be4d50..ac80d17880 100644 --- a/tests/testdata/hendrycksTest-high_school_geography-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_geography-v0-loglikelihood @@ -1 +1 @@ -add45970ea3865be7c7a31f788a835949f6937ac73f699b122ca56a3431e95f8 +add45970ea3865be7c7a31f788a835949f6937ac73f699b122ca56a3431e95f8 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_geography-v0-res.json b/tests/testdata/hendrycksTest-high_school_geography-v0-res.json index 2c18a78811..0fb76aa9ba 100644 --- a/tests/testdata/hendrycksTest-high_school_geography-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_geography-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_geography": {"acc": 0.2474747474747475, "acc_norm": 0.2777777777777778, "acc_norm_stderr": 0.03191178226713547, "acc_stderr": 0.03074630074212452}}, "versions": {"hendrycksTest-high_school_geography": 0}} +{"results": {"hendrycksTest-high_school_geography": {"acc": 0.2474747474747475, "acc_norm": 0.2777777777777778, "acc_norm_stderr": 0.03191178226713547, "acc_stderr": 0.03074630074212452}}, "versions": {"hendrycksTest-high_school_geography": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_government_and_politics-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_government_and_politics-v0-loglikelihood index 462d8186bd..12ea726b4b 100644 --- a/tests/testdata/hendrycksTest-high_school_government_and_politics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_government_and_politics-v0-loglikelihood @@ -1 +1 @@ -11f40d8f48ba5cd739e21d54c3c04d3761f81df5cb7ddd77df868d24ced44b49 +11f40d8f48ba5cd739e21d54c3c04d3761f81df5cb7ddd77df868d24ced44b49 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_government_and_politics-v0-res.json b/tests/testdata/hendrycksTest-high_school_government_and_politics-v0-res.json index 4cf14d721d..16cc02ff0a 100644 --- a/tests/testdata/hendrycksTest-high_school_government_and_politics-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_government_and_politics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_government_and_politics": {"acc": 0.24352331606217617, "acc_norm": 0.23834196891191708, "acc_norm_stderr": 0.03074890536390988, "acc_stderr": 0.030975436386845436}}, "versions": {"hendrycksTest-high_school_government_and_politics": 0}} +{"results": {"hendrycksTest-high_school_government_and_politics": {"acc": 0.24352331606217617, "acc_norm": 0.23834196891191708, "acc_norm_stderr": 0.03074890536390988, "acc_stderr": 0.030975436386845436}}, "versions": {"hendrycksTest-high_school_government_and_politics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-loglikelihood index ef61269b60..c0106d373d 100644 --- a/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-loglikelihood @@ -1 +1 @@ -ce4faae2fb6628caa48f6fc74cbc848880db49e6ff51079392778a2322bcefef +ce4faae2fb6628caa48f6fc74cbc848880db49e6ff51079392778a2322bcefef \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-res.json b/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-res.json index 9d55ab39a1..fb6835039c 100644 --- a/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_macroeconomics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_macroeconomics": {"acc": 0.2230769230769231, "acc_norm": 0.22564102564102564, "acc_norm_stderr": 0.021193632525148522, "acc_stderr": 0.021107730127244}}, "versions": {"hendrycksTest-high_school_macroeconomics": 0}} +{"results": {"hendrycksTest-high_school_macroeconomics": {"acc": 0.2230769230769231, "acc_norm": 0.22564102564102564, "acc_norm_stderr": 0.021193632525148522, "acc_stderr": 0.021107730127244}}, "versions": {"hendrycksTest-high_school_macroeconomics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_mathematics-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_mathematics-v0-loglikelihood index 085c71f415..dc86769fa9 100644 --- a/tests/testdata/hendrycksTest-high_school_mathematics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_mathematics-v0-loglikelihood @@ -1 +1 @@ -ab368d16fc4648ad27940f71abd266366663f51db612f732a0b9b0eea28de9f8 +ab368d16fc4648ad27940f71abd266366663f51db612f732a0b9b0eea28de9f8 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_mathematics-v0-res.json b/tests/testdata/hendrycksTest-high_school_mathematics-v0-res.json index 711db35dd1..cb3a3ec068 100644 --- a/tests/testdata/hendrycksTest-high_school_mathematics-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_mathematics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_mathematics": {"acc": 0.22592592592592592, "acc_norm": 0.24814814814814815, "acc_norm_stderr": 0.0263357394040558, "acc_stderr": 0.025497532639609553}}, "versions": {"hendrycksTest-high_school_mathematics": 0}} +{"results": {"hendrycksTest-high_school_mathematics": {"acc": 0.22592592592592592, "acc_norm": 0.24814814814814815, "acc_norm_stderr": 0.0263357394040558, "acc_stderr": 0.025497532639609553}}, "versions": {"hendrycksTest-high_school_mathematics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_microeconomics-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_microeconomics-v0-loglikelihood index 5a806df0b3..37962bf9fb 100644 --- a/tests/testdata/hendrycksTest-high_school_microeconomics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_microeconomics-v0-loglikelihood @@ -1 +1 @@ -513b998585ebc1ebdefca6435b7c84fd73dc36fc80321a22503467f04efed23e +513b998585ebc1ebdefca6435b7c84fd73dc36fc80321a22503467f04efed23e \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_microeconomics-v0-res.json b/tests/testdata/hendrycksTest-high_school_microeconomics-v0-res.json index f5bd4ff6d0..cf698d181c 100644 --- a/tests/testdata/hendrycksTest-high_school_microeconomics-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_microeconomics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_microeconomics": {"acc": 0.24369747899159663, "acc_norm": 0.22268907563025211, "acc_norm_stderr": 0.027025433498882378, "acc_stderr": 0.027886828078380558}}, "versions": {"hendrycksTest-high_school_microeconomics": 0}} +{"results": {"hendrycksTest-high_school_microeconomics": {"acc": 0.24369747899159663, "acc_norm": 0.22268907563025211, "acc_norm_stderr": 0.027025433498882378, "acc_stderr": 0.027886828078380558}}, "versions": {"hendrycksTest-high_school_microeconomics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_physics-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_physics-v0-loglikelihood index 7a2e1602f4..49a780bc97 100644 --- a/tests/testdata/hendrycksTest-high_school_physics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_physics-v0-loglikelihood @@ -1 +1 @@ -dae59e82d3d4d8dec82239d9620b72cc47bb6efbe2f1c2f9b9d23e849c9c5e32 +dae59e82d3d4d8dec82239d9620b72cc47bb6efbe2f1c2f9b9d23e849c9c5e32 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_physics-v0-res.json b/tests/testdata/hendrycksTest-high_school_physics-v0-res.json index 3b49922213..b6b3bb9d01 100644 --- a/tests/testdata/hendrycksTest-high_school_physics-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_physics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_physics": {"acc": 0.2582781456953642, "acc_norm": 0.271523178807947, "acc_norm_stderr": 0.03631329803969653, "acc_stderr": 0.035737053147634576}}, "versions": {"hendrycksTest-high_school_physics": 0}} +{"results": {"hendrycksTest-high_school_physics": {"acc": 0.2582781456953642, "acc_norm": 0.271523178807947, "acc_norm_stderr": 0.03631329803969653, "acc_stderr": 0.035737053147634576}}, "versions": {"hendrycksTest-high_school_physics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_psychology-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_psychology-v0-loglikelihood index cb2c6e48b5..0f39ddfde7 100644 --- a/tests/testdata/hendrycksTest-high_school_psychology-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_psychology-v0-loglikelihood @@ -1 +1 @@ -0e4c8d13806d3696167e40544d2d114c557c10c74bc61fcb9c51bbfced0266ef +0e4c8d13806d3696167e40544d2d114c557c10c74bc61fcb9c51bbfced0266ef \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_psychology-v0-res.json b/tests/testdata/hendrycksTest-high_school_psychology-v0-res.json index a4fadea7ea..42b781149b 100644 --- a/tests/testdata/hendrycksTest-high_school_psychology-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_psychology-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_psychology": {"acc": 0.24587155963302754, "acc_norm": 0.23302752293577983, "acc_norm_stderr": 0.018125669180861493, "acc_stderr": 0.018461940968708436}}, "versions": {"hendrycksTest-high_school_psychology": 0}} +{"results": {"hendrycksTest-high_school_psychology": {"acc": 0.24587155963302754, "acc_norm": 0.23302752293577983, "acc_norm_stderr": 0.018125669180861493, "acc_stderr": 0.018461940968708436}}, "versions": {"hendrycksTest-high_school_psychology": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_statistics-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_statistics-v0-loglikelihood index 7b0eb829b0..8a915ef7fc 100644 --- a/tests/testdata/hendrycksTest-high_school_statistics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_statistics-v0-loglikelihood @@ -1 +1 @@ -33d1d6eaaa2c3a944bf49d3f220a4efc328d7c3b3465b7cec40ae36d8984b75f +33d1d6eaaa2c3a944bf49d3f220a4efc328d7c3b3465b7cec40ae36d8984b75f \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_statistics-v0-res.json b/tests/testdata/hendrycksTest-high_school_statistics-v0-res.json index 77ca941749..4c6a21d7da 100644 --- a/tests/testdata/hendrycksTest-high_school_statistics-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_statistics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_statistics": {"acc": 0.2962962962962963, "acc_norm": 0.3055555555555556, "acc_norm_stderr": 0.03141554629402544, "acc_stderr": 0.03114144782353604}}, "versions": {"hendrycksTest-high_school_statistics": 0}} +{"results": {"hendrycksTest-high_school_statistics": {"acc": 0.2962962962962963, "acc_norm": 0.3055555555555556, "acc_norm_stderr": 0.03141554629402544, "acc_stderr": 0.03114144782353604}}, "versions": {"hendrycksTest-high_school_statistics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_us_history-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_us_history-v0-loglikelihood index 386bedb860..e05b91503e 100644 --- a/tests/testdata/hendrycksTest-high_school_us_history-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_us_history-v0-loglikelihood @@ -1 +1 @@ -8c65c1a28330dd001d395ac11f1bb80c3b33f5935f503e74067aef6e9e1d9d9b +8c65c1a28330dd001d395ac11f1bb80c3b33f5935f503e74067aef6e9e1d9d9b \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_us_history-v0-res.json b/tests/testdata/hendrycksTest-high_school_us_history-v0-res.json index f6460f4248..5b7a76909c 100644 --- a/tests/testdata/hendrycksTest-high_school_us_history-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_us_history-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_us_history": {"acc": 0.29901960784313725, "acc_norm": 0.28431372549019607, "acc_norm_stderr": 0.03166009679399814, "acc_stderr": 0.03213325717373618}}, "versions": {"hendrycksTest-high_school_us_history": 0}} +{"results": {"hendrycksTest-high_school_us_history": {"acc": 0.29901960784313725, "acc_norm": 0.28431372549019607, "acc_norm_stderr": 0.03166009679399814, "acc_stderr": 0.03213325717373618}}, "versions": {"hendrycksTest-high_school_us_history": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_world_history-v0-loglikelihood b/tests/testdata/hendrycksTest-high_school_world_history-v0-loglikelihood index c938b0a287..228dfe072c 100644 --- a/tests/testdata/hendrycksTest-high_school_world_history-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-high_school_world_history-v0-loglikelihood @@ -1 +1 @@ -1c8b994bd9a63ec874fc8d0e3a27077118b7adc472306b2fd6c55635a78b9d52 +1c8b994bd9a63ec874fc8d0e3a27077118b7adc472306b2fd6c55635a78b9d52 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-high_school_world_history-v0-res.json b/tests/testdata/hendrycksTest-high_school_world_history-v0-res.json index 30ea7361db..ca1bf95b9d 100644 --- a/tests/testdata/hendrycksTest-high_school_world_history-v0-res.json +++ b/tests/testdata/hendrycksTest-high_school_world_history-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-high_school_world_history": {"acc": 0.23628691983122363, "acc_norm": 0.24472573839662448, "acc_norm_stderr": 0.02798569938703642, "acc_stderr": 0.027652153144159263}}, "versions": {"hendrycksTest-high_school_world_history": 0}} +{"results": {"hendrycksTest-high_school_world_history": {"acc": 0.23628691983122363, "acc_norm": 0.24472573839662448, "acc_norm_stderr": 0.02798569938703642, "acc_stderr": 0.027652153144159263}}, "versions": {"hendrycksTest-high_school_world_history": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-human_aging-v0-loglikelihood b/tests/testdata/hendrycksTest-human_aging-v0-loglikelihood index 9c6422494a..d34fa52980 100644 --- a/tests/testdata/hendrycksTest-human_aging-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-human_aging-v0-loglikelihood @@ -1 +1 @@ -0880b3a78f8d7b17ffc612031427b9085367cf65dabe2a68c4b64e3171d17e88 +0880b3a78f8d7b17ffc612031427b9085367cf65dabe2a68c4b64e3171d17e88 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-human_aging-v0-res.json b/tests/testdata/hendrycksTest-human_aging-v0-res.json index 95d8742924..061678f2e4 100644 --- a/tests/testdata/hendrycksTest-human_aging-v0-res.json +++ b/tests/testdata/hendrycksTest-human_aging-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-human_aging": {"acc": 0.21524663677130046, "acc_norm": 0.17937219730941703, "acc_norm_stderr": 0.025749819569192804, "acc_stderr": 0.02758406660220827}}, "versions": {"hendrycksTest-human_aging": 0}} +{"results": {"hendrycksTest-human_aging": {"acc": 0.21524663677130046, "acc_norm": 0.17937219730941703, "acc_norm_stderr": 0.025749819569192804, "acc_stderr": 0.02758406660220827}}, "versions": {"hendrycksTest-human_aging": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-human_sexuality-v0-loglikelihood b/tests/testdata/hendrycksTest-human_sexuality-v0-loglikelihood index 7626c89962..b3d3ae438c 100644 --- a/tests/testdata/hendrycksTest-human_sexuality-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-human_sexuality-v0-loglikelihood @@ -1 +1 @@ -4b07922fa1d549b655c21440b13d869263ce7dd9771d8147c450f11c91d26c10 +4b07922fa1d549b655c21440b13d869263ce7dd9771d8147c450f11c91d26c10 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-human_sexuality-v0-res.json b/tests/testdata/hendrycksTest-human_sexuality-v0-res.json index 960a6e3b52..091d7352ce 100644 --- a/tests/testdata/hendrycksTest-human_sexuality-v0-res.json +++ b/tests/testdata/hendrycksTest-human_sexuality-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-human_sexuality": {"acc": 0.22137404580152673, "acc_norm": 0.22900763358778625, "acc_norm_stderr": 0.036853466317118506, "acc_stderr": 0.0364129708131373}}, "versions": {"hendrycksTest-human_sexuality": 0}} +{"results": {"hendrycksTest-human_sexuality": {"acc": 0.22137404580152673, "acc_norm": 0.22900763358778625, "acc_norm_stderr": 0.036853466317118506, "acc_stderr": 0.0364129708131373}}, "versions": {"hendrycksTest-human_sexuality": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-international_law-v0-loglikelihood b/tests/testdata/hendrycksTest-international_law-v0-loglikelihood index 23c6b58e9d..2b6aa8d605 100644 --- a/tests/testdata/hendrycksTest-international_law-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-international_law-v0-loglikelihood @@ -1 +1 @@ -ea9b2cefd27959db564168f6ad1169a5eaa012fc5a5d5b8faf9e34d94e335dc1 +ea9b2cefd27959db564168f6ad1169a5eaa012fc5a5d5b8faf9e34d94e335dc1 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-international_law-v0-res.json b/tests/testdata/hendrycksTest-international_law-v0-res.json index 97c70d2f2e..bd4edd2394 100644 --- a/tests/testdata/hendrycksTest-international_law-v0-res.json +++ b/tests/testdata/hendrycksTest-international_law-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-international_law": {"acc": 0.2396694214876033, "acc_norm": 0.3140495867768595, "acc_norm_stderr": 0.042369647530410164, "acc_stderr": 0.03896878985070417}}, "versions": {"hendrycksTest-international_law": 0}} +{"results": {"hendrycksTest-international_law": {"acc": 0.2396694214876033, "acc_norm": 0.3140495867768595, "acc_norm_stderr": 0.042369647530410164, "acc_stderr": 0.03896878985070417}}, "versions": {"hendrycksTest-international_law": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-jurisprudence-v0-loglikelihood b/tests/testdata/hendrycksTest-jurisprudence-v0-loglikelihood index 37aea2c8a2..3d55d21e02 100644 --- a/tests/testdata/hendrycksTest-jurisprudence-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-jurisprudence-v0-loglikelihood @@ -1 +1 @@ -cac440189f1ec778e82f4975d88b74689553ecc5116aaa7f76587a50c1a610e0 +cac440189f1ec778e82f4975d88b74689553ecc5116aaa7f76587a50c1a610e0 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-jurisprudence-v0-res.json b/tests/testdata/hendrycksTest-jurisprudence-v0-res.json index 66203b63bf..4ef1819749 100644 --- a/tests/testdata/hendrycksTest-jurisprudence-v0-res.json +++ b/tests/testdata/hendrycksTest-jurisprudence-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-jurisprudence": {"acc": 0.25, "acc_norm": 0.3148148148148148, "acc_norm_stderr": 0.04489931073591312, "acc_stderr": 0.04186091791394607}}, "versions": {"hendrycksTest-jurisprudence": 0}} +{"results": {"hendrycksTest-jurisprudence": {"acc": 0.25, "acc_norm": 0.3148148148148148, "acc_norm_stderr": 0.04489931073591312, "acc_stderr": 0.04186091791394607}}, "versions": {"hendrycksTest-jurisprudence": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-logical_fallacies-v0-loglikelihood b/tests/testdata/hendrycksTest-logical_fallacies-v0-loglikelihood index 56300c43a8..a5807b5831 100644 --- a/tests/testdata/hendrycksTest-logical_fallacies-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-logical_fallacies-v0-loglikelihood @@ -1 +1 @@ -2e9449dd803f9e2334dc562d9f04031fd013ed36b883b44ab500533a5dbbface +2e9449dd803f9e2334dc562d9f04031fd013ed36b883b44ab500533a5dbbface \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-logical_fallacies-v0-res.json b/tests/testdata/hendrycksTest-logical_fallacies-v0-res.json index e8ce4b58a8..c5cf5cb467 100644 --- a/tests/testdata/hendrycksTest-logical_fallacies-v0-res.json +++ b/tests/testdata/hendrycksTest-logical_fallacies-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-logical_fallacies": {"acc": 0.20245398773006135, "acc_norm": 0.2147239263803681, "acc_norm_stderr": 0.03226219377286774, "acc_stderr": 0.03157065078911902}}, "versions": {"hendrycksTest-logical_fallacies": 0}} +{"results": {"hendrycksTest-logical_fallacies": {"acc": 0.20245398773006135, "acc_norm": 0.2147239263803681, "acc_norm_stderr": 0.03226219377286774, "acc_stderr": 0.03157065078911902}}, "versions": {"hendrycksTest-logical_fallacies": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-machine_learning-v0-loglikelihood b/tests/testdata/hendrycksTest-machine_learning-v0-loglikelihood index 681794c7dc..53e498ddd4 100644 --- a/tests/testdata/hendrycksTest-machine_learning-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-machine_learning-v0-loglikelihood @@ -1 +1 @@ -7a7138821a66ef946e427b40344cf7f1a916a2926995a85ef731a3bee40cb7ce +7a7138821a66ef946e427b40344cf7f1a916a2926995a85ef731a3bee40cb7ce \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-machine_learning-v0-res.json b/tests/testdata/hendrycksTest-machine_learning-v0-res.json index 9138d9c40a..26be724f24 100644 --- a/tests/testdata/hendrycksTest-machine_learning-v0-res.json +++ b/tests/testdata/hendrycksTest-machine_learning-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-machine_learning": {"acc": 0.23214285714285715, "acc_norm": 0.22321428571428573, "acc_norm_stderr": 0.039523019677025116, "acc_stderr": 0.04007341809755806}}, "versions": {"hendrycksTest-machine_learning": 0}} +{"results": {"hendrycksTest-machine_learning": {"acc": 0.23214285714285715, "acc_norm": 0.22321428571428573, "acc_norm_stderr": 0.039523019677025116, "acc_stderr": 0.04007341809755806}}, "versions": {"hendrycksTest-machine_learning": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-management-v0-loglikelihood b/tests/testdata/hendrycksTest-management-v0-loglikelihood index 02b34a2f8b..5718739857 100644 --- a/tests/testdata/hendrycksTest-management-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-management-v0-loglikelihood @@ -1 +1 @@ -355489f4bd176ab84db5ef4c03d56ddeeeb1b0ad69827122b2d800e1cdc7e5f0 +355489f4bd176ab84db5ef4c03d56ddeeeb1b0ad69827122b2d800e1cdc7e5f0 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-management-v0-res.json b/tests/testdata/hendrycksTest-management-v0-res.json index 7ddab6c17b..7a84623fab 100644 --- a/tests/testdata/hendrycksTest-management-v0-res.json +++ b/tests/testdata/hendrycksTest-management-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-management": {"acc": 0.24271844660194175, "acc_norm": 0.2621359223300971, "acc_norm_stderr": 0.043546310772605956, "acc_stderr": 0.04245022486384495}}, "versions": {"hendrycksTest-management": 0}} +{"results": {"hendrycksTest-management": {"acc": 0.24271844660194175, "acc_norm": 0.2621359223300971, "acc_norm_stderr": 0.043546310772605956, "acc_stderr": 0.04245022486384495}}, "versions": {"hendrycksTest-management": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-marketing-v0-loglikelihood b/tests/testdata/hendrycksTest-marketing-v0-loglikelihood index 809d76d0ab..1d241a9773 100644 --- a/tests/testdata/hendrycksTest-marketing-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-marketing-v0-loglikelihood @@ -1 +1 @@ -b4fa0681fe54671a80509779d4338d744097a7206687f62977df7145dfa74a66 +b4fa0681fe54671a80509779d4338d744097a7206687f62977df7145dfa74a66 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-marketing-v0-res.json b/tests/testdata/hendrycksTest-marketing-v0-res.json index 3bd328bbad..2cc7a93f1c 100644 --- a/tests/testdata/hendrycksTest-marketing-v0-res.json +++ b/tests/testdata/hendrycksTest-marketing-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-marketing": {"acc": 0.2863247863247863, "acc_norm": 0.2905982905982906, "acc_norm_stderr": 0.029745048572674043, "acc_stderr": 0.029614323690456648}}, "versions": {"hendrycksTest-marketing": 0}} +{"results": {"hendrycksTest-marketing": {"acc": 0.2863247863247863, "acc_norm": 0.2905982905982906, "acc_norm_stderr": 0.029745048572674043, "acc_stderr": 0.029614323690456648}}, "versions": {"hendrycksTest-marketing": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-medical_genetics-v0-loglikelihood b/tests/testdata/hendrycksTest-medical_genetics-v0-loglikelihood index 856f74e41c..48d49de839 100644 --- a/tests/testdata/hendrycksTest-medical_genetics-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-medical_genetics-v0-loglikelihood @@ -1 +1 @@ -db6141246889a19dd3f6b9109f314d49c1a70f7a98795858804378b095c4a2fe +db6141246889a19dd3f6b9109f314d49c1a70f7a98795858804378b095c4a2fe \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-medical_genetics-v0-res.json b/tests/testdata/hendrycksTest-medical_genetics-v0-res.json index 24258f7338..eac53bcf4a 100644 --- a/tests/testdata/hendrycksTest-medical_genetics-v0-res.json +++ b/tests/testdata/hendrycksTest-medical_genetics-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-medical_genetics": {"acc": 0.27, "acc_norm": 0.29, "acc_norm_stderr": 0.04560480215720684, "acc_stderr": 0.0446196043338474}}, "versions": {"hendrycksTest-medical_genetics": 0}} +{"results": {"hendrycksTest-medical_genetics": {"acc": 0.27, "acc_norm": 0.29, "acc_norm_stderr": 0.04560480215720684, "acc_stderr": 0.0446196043338474}}, "versions": {"hendrycksTest-medical_genetics": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-miscellaneous-v0-loglikelihood b/tests/testdata/hendrycksTest-miscellaneous-v0-loglikelihood index 2ec63e648a..b09e99721b 100644 --- a/tests/testdata/hendrycksTest-miscellaneous-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-miscellaneous-v0-loglikelihood @@ -1 +1 @@ -972dd88dbbaf09d14766e243cfc233425e7c01a26dbc61bdb9eeefa788822331 +972dd88dbbaf09d14766e243cfc233425e7c01a26dbc61bdb9eeefa788822331 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-miscellaneous-v0-res.json b/tests/testdata/hendrycksTest-miscellaneous-v0-res.json index 8eac1f20ea..5c7859eb3a 100644 --- a/tests/testdata/hendrycksTest-miscellaneous-v0-res.json +++ b/tests/testdata/hendrycksTest-miscellaneous-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-miscellaneous": {"acc": 0.23499361430395913, "acc_norm": 0.2515964240102171, "acc_norm_stderr": 0.015517322365529622, "acc_stderr": 0.015162024152278445}}, "versions": {"hendrycksTest-miscellaneous": 0}} +{"results": {"hendrycksTest-miscellaneous": {"acc": 0.23499361430395913, "acc_norm": 0.2515964240102171, "acc_norm_stderr": 0.015517322365529622, "acc_stderr": 0.015162024152278445}}, "versions": {"hendrycksTest-miscellaneous": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-moral_disputes-v0-loglikelihood b/tests/testdata/hendrycksTest-moral_disputes-v0-loglikelihood index b267c94234..953fc3be48 100644 --- a/tests/testdata/hendrycksTest-moral_disputes-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-moral_disputes-v0-loglikelihood @@ -1 +1 @@ -d6ef028022c02b69d1516973e08bebaa14d8debcf2589a2bb124823178202d20 +d6ef028022c02b69d1516973e08bebaa14d8debcf2589a2bb124823178202d20 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-moral_disputes-v0-res.json b/tests/testdata/hendrycksTest-moral_disputes-v0-res.json index 7e852dd1ae..26ea1c2a75 100644 --- a/tests/testdata/hendrycksTest-moral_disputes-v0-res.json +++ b/tests/testdata/hendrycksTest-moral_disputes-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-moral_disputes": {"acc": 0.24855491329479767, "acc_norm": 0.27167630057803466, "acc_norm_stderr": 0.023948512905468365, "acc_stderr": 0.023267528432100174}}, "versions": {"hendrycksTest-moral_disputes": 0}} +{"results": {"hendrycksTest-moral_disputes": {"acc": 0.24855491329479767, "acc_norm": 0.27167630057803466, "acc_norm_stderr": 0.023948512905468365, "acc_stderr": 0.023267528432100174}}, "versions": {"hendrycksTest-moral_disputes": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-moral_scenarios-v0-loglikelihood b/tests/testdata/hendrycksTest-moral_scenarios-v0-loglikelihood index 727957ef45..d5ea0d8156 100644 --- a/tests/testdata/hendrycksTest-moral_scenarios-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-moral_scenarios-v0-loglikelihood @@ -1 +1 @@ -a8e1882e77728b53c8b86312254d08320d8363fb606d746a8dd145b812f62cf5 +a8e1882e77728b53c8b86312254d08320d8363fb606d746a8dd145b812f62cf5 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-moral_scenarios-v0-res.json b/tests/testdata/hendrycksTest-moral_scenarios-v0-res.json index b66e588bac..62ec159712 100644 --- a/tests/testdata/hendrycksTest-moral_scenarios-v0-res.json +++ b/tests/testdata/hendrycksTest-moral_scenarios-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-moral_scenarios": {"acc": 0.2547486033519553, "acc_norm": 0.25251396648044694, "acc_norm_stderr": 0.014530330201468654, "acc_stderr": 0.014572650383409158}}, "versions": {"hendrycksTest-moral_scenarios": 0}} +{"results": {"hendrycksTest-moral_scenarios": {"acc": 0.2547486033519553, "acc_norm": 0.25251396648044694, "acc_norm_stderr": 0.014530330201468654, "acc_stderr": 0.014572650383409158}}, "versions": {"hendrycksTest-moral_scenarios": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-nutrition-v0-loglikelihood b/tests/testdata/hendrycksTest-nutrition-v0-loglikelihood index 4993a6ab57..2716bebe69 100644 --- a/tests/testdata/hendrycksTest-nutrition-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-nutrition-v0-loglikelihood @@ -1 +1 @@ -19e49d218f55ed5ec4bd1a6cd3f3388c6f620b81484e7abe8b298e5481c3044d +19e49d218f55ed5ec4bd1a6cd3f3388c6f620b81484e7abe8b298e5481c3044d \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-nutrition-v0-res.json b/tests/testdata/hendrycksTest-nutrition-v0-res.json index 9004159609..e2838f8805 100644 --- a/tests/testdata/hendrycksTest-nutrition-v0-res.json +++ b/tests/testdata/hendrycksTest-nutrition-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-nutrition": {"acc": 0.24509803921568626, "acc_norm": 0.28104575163398693, "acc_norm_stderr": 0.025738854797818723, "acc_stderr": 0.02463004897982476}}, "versions": {"hendrycksTest-nutrition": 0}} +{"results": {"hendrycksTest-nutrition": {"acc": 0.24509803921568626, "acc_norm": 0.28104575163398693, "acc_norm_stderr": 0.025738854797818723, "acc_stderr": 0.02463004897982476}}, "versions": {"hendrycksTest-nutrition": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-philosophy-v0-loglikelihood b/tests/testdata/hendrycksTest-philosophy-v0-loglikelihood index 1cd1e1604d..3ea8ef0a0e 100644 --- a/tests/testdata/hendrycksTest-philosophy-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-philosophy-v0-loglikelihood @@ -1 +1 @@ -a419204da36c2b7a70fa8909a3a804260cc3283c7e07917534dfb76216c77f46 +a419204da36c2b7a70fa8909a3a804260cc3283c7e07917534dfb76216c77f46 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-philosophy-v0-res.json b/tests/testdata/hendrycksTest-philosophy-v0-res.json index 77f82f7fcc..ec9c1e79c1 100644 --- a/tests/testdata/hendrycksTest-philosophy-v0-res.json +++ b/tests/testdata/hendrycksTest-philosophy-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-philosophy": {"acc": 0.26366559485530544, "acc_norm": 0.2733118971061093, "acc_norm_stderr": 0.02531176597542612, "acc_stderr": 0.02502553850053234}}, "versions": {"hendrycksTest-philosophy": 0}} +{"results": {"hendrycksTest-philosophy": {"acc": 0.26366559485530544, "acc_norm": 0.2733118971061093, "acc_norm_stderr": 0.02531176597542612, "acc_stderr": 0.02502553850053234}}, "versions": {"hendrycksTest-philosophy": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-prehistory-v0-loglikelihood b/tests/testdata/hendrycksTest-prehistory-v0-loglikelihood index c92c929cf4..4c01847ef5 100644 --- a/tests/testdata/hendrycksTest-prehistory-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-prehistory-v0-loglikelihood @@ -1 +1 @@ -6983c560a562749f4f702249a3a6ae51fa495acc0643a980bf2cf52c6c5d4b95 +6983c560a562749f4f702249a3a6ae51fa495acc0643a980bf2cf52c6c5d4b95 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-prehistory-v0-res.json b/tests/testdata/hendrycksTest-prehistory-v0-res.json index 85a6d79ca6..e0163dd555 100644 --- a/tests/testdata/hendrycksTest-prehistory-v0-res.json +++ b/tests/testdata/hendrycksTest-prehistory-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-prehistory": {"acc": 0.2623456790123457, "acc_norm": 0.26851851851851855, "acc_norm_stderr": 0.024659685185967277, "acc_stderr": 0.02447722285613511}}, "versions": {"hendrycksTest-prehistory": 0}} +{"results": {"hendrycksTest-prehistory": {"acc": 0.2623456790123457, "acc_norm": 0.26851851851851855, "acc_norm_stderr": 0.024659685185967277, "acc_stderr": 0.02447722285613511}}, "versions": {"hendrycksTest-prehistory": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-professional_accounting-v0-loglikelihood b/tests/testdata/hendrycksTest-professional_accounting-v0-loglikelihood index 6e86a91554..fe5997427e 100644 --- a/tests/testdata/hendrycksTest-professional_accounting-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-professional_accounting-v0-loglikelihood @@ -1 +1 @@ -847418f7b22cd9b499e95fd73c40a2fbc40076895280cc2c560199c0c4c4f433 +847418f7b22cd9b499e95fd73c40a2fbc40076895280cc2c560199c0c4c4f433 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-professional_accounting-v0-res.json b/tests/testdata/hendrycksTest-professional_accounting-v0-res.json index 45fee739cf..b665d57e23 100644 --- a/tests/testdata/hendrycksTest-professional_accounting-v0-res.json +++ b/tests/testdata/hendrycksTest-professional_accounting-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-professional_accounting": {"acc": 0.2553191489361702, "acc_norm": 0.26595744680851063, "acc_norm_stderr": 0.026358065698880582, "acc_stderr": 0.026011992930902006}}, "versions": {"hendrycksTest-professional_accounting": 0}} +{"results": {"hendrycksTest-professional_accounting": {"acc": 0.2553191489361702, "acc_norm": 0.26595744680851063, "acc_norm_stderr": 0.026358065698880582, "acc_stderr": 0.026011992930902006}}, "versions": {"hendrycksTest-professional_accounting": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-professional_law-v0-loglikelihood b/tests/testdata/hendrycksTest-professional_law-v0-loglikelihood index b37d4bf2a5..23fbfcf78e 100644 --- a/tests/testdata/hendrycksTest-professional_law-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-professional_law-v0-loglikelihood @@ -1 +1 @@ -c38c9d5d84eeb7a5f3c4a34d6e70d7e15847b3c38f26e4b119c982bb935e118f +c38c9d5d84eeb7a5f3c4a34d6e70d7e15847b3c38f26e4b119c982bb935e118f \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-professional_law-v0-res.json b/tests/testdata/hendrycksTest-professional_law-v0-res.json index 231e6b76a9..f15a9b34ff 100644 --- a/tests/testdata/hendrycksTest-professional_law-v0-res.json +++ b/tests/testdata/hendrycksTest-professional_law-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-professional_law": {"acc": 0.2561929595827901, "acc_norm": 0.2470664928292047, "acc_norm_stderr": 0.011015752255279352, "acc_stderr": 0.011149173153110582}}, "versions": {"hendrycksTest-professional_law": 0}} +{"results": {"hendrycksTest-professional_law": {"acc": 0.2561929595827901, "acc_norm": 0.2470664928292047, "acc_norm_stderr": 0.011015752255279352, "acc_stderr": 0.011149173153110582}}, "versions": {"hendrycksTest-professional_law": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-professional_medicine-v0-loglikelihood b/tests/testdata/hendrycksTest-professional_medicine-v0-loglikelihood index c1c71f612b..cc3c3be8c6 100644 --- a/tests/testdata/hendrycksTest-professional_medicine-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-professional_medicine-v0-loglikelihood @@ -1 +1 @@ -7a30599858398169cde61430c18efdd7fb4dcd09c34aa9baba70f0f8cf17a9f1 +7a30599858398169cde61430c18efdd7fb4dcd09c34aa9baba70f0f8cf17a9f1 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-professional_medicine-v0-res.json b/tests/testdata/hendrycksTest-professional_medicine-v0-res.json index 07daf13a24..801ea2d224 100644 --- a/tests/testdata/hendrycksTest-professional_medicine-v0-res.json +++ b/tests/testdata/hendrycksTest-professional_medicine-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-professional_medicine": {"acc": 0.23161764705882354, "acc_norm": 0.2536764705882353, "acc_norm_stderr": 0.02643132987078953, "acc_stderr": 0.025626533803777562}}, "versions": {"hendrycksTest-professional_medicine": 0}} +{"results": {"hendrycksTest-professional_medicine": {"acc": 0.23161764705882354, "acc_norm": 0.2536764705882353, "acc_norm_stderr": 0.02643132987078953, "acc_stderr": 0.025626533803777562}}, "versions": {"hendrycksTest-professional_medicine": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-professional_psychology-v0-loglikelihood b/tests/testdata/hendrycksTest-professional_psychology-v0-loglikelihood index 70673e36ee..9865854da3 100644 --- a/tests/testdata/hendrycksTest-professional_psychology-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-professional_psychology-v0-loglikelihood @@ -1 +1 @@ -92a5fad6e9ec700f84946faeccd399dda3569fb71837c9fb0c5c87f5ec29c43e +92a5fad6e9ec700f84946faeccd399dda3569fb71837c9fb0c5c87f5ec29c43e \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-professional_psychology-v0-res.json b/tests/testdata/hendrycksTest-professional_psychology-v0-res.json index 90890c6230..c6b33f4be1 100644 --- a/tests/testdata/hendrycksTest-professional_psychology-v0-res.json +++ b/tests/testdata/hendrycksTest-professional_psychology-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-professional_psychology": {"acc": 0.27124183006535946, "acc_norm": 0.2826797385620915, "acc_norm_stderr": 0.01821726955205344, "acc_stderr": 0.01798661530403031}}, "versions": {"hendrycksTest-professional_psychology": 0}} +{"results": {"hendrycksTest-professional_psychology": {"acc": 0.27124183006535946, "acc_norm": 0.2826797385620915, "acc_norm_stderr": 0.01821726955205344, "acc_stderr": 0.01798661530403031}}, "versions": {"hendrycksTest-professional_psychology": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-public_relations-v0-loglikelihood b/tests/testdata/hendrycksTest-public_relations-v0-loglikelihood index 51b3d974eb..8f7b30ba88 100644 --- a/tests/testdata/hendrycksTest-public_relations-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-public_relations-v0-loglikelihood @@ -1 +1 @@ -ab70f500cf24e876f6ae6bdc27525a1d6074fa9b6ea97770255d9fc2559b36ff +ab70f500cf24e876f6ae6bdc27525a1d6074fa9b6ea97770255d9fc2559b36ff \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-public_relations-v0-res.json b/tests/testdata/hendrycksTest-public_relations-v0-res.json index c9922317a8..9ba711cca7 100644 --- a/tests/testdata/hendrycksTest-public_relations-v0-res.json +++ b/tests/testdata/hendrycksTest-public_relations-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-public_relations": {"acc": 0.3090909090909091, "acc_norm": 0.2636363636363636, "acc_norm_stderr": 0.04220224692971987, "acc_stderr": 0.044262946482000985}}, "versions": {"hendrycksTest-public_relations": 0}} +{"results": {"hendrycksTest-public_relations": {"acc": 0.3090909090909091, "acc_norm": 0.2636363636363636, "acc_norm_stderr": 0.04220224692971987, "acc_stderr": 0.044262946482000985}}, "versions": {"hendrycksTest-public_relations": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-security_studies-v0-loglikelihood b/tests/testdata/hendrycksTest-security_studies-v0-loglikelihood index 7bb47e4a55..6aa9b5ec00 100644 --- a/tests/testdata/hendrycksTest-security_studies-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-security_studies-v0-loglikelihood @@ -1 +1 @@ -92dfffe2acf3278256486d3e1cf1edb5a739ad0a54c0f9c67695f7a411ed5f76 +92dfffe2acf3278256486d3e1cf1edb5a739ad0a54c0f9c67695f7a411ed5f76 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-security_studies-v0-res.json b/tests/testdata/hendrycksTest-security_studies-v0-res.json index 109196000e..2c9de8886a 100644 --- a/tests/testdata/hendrycksTest-security_studies-v0-res.json +++ b/tests/testdata/hendrycksTest-security_studies-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-security_studies": {"acc": 0.2979591836734694, "acc_norm": 0.2693877551020408, "acc_norm_stderr": 0.02840125202902294, "acc_stderr": 0.029279567411065674}}, "versions": {"hendrycksTest-security_studies": 0}} +{"results": {"hendrycksTest-security_studies": {"acc": 0.2979591836734694, "acc_norm": 0.2693877551020408, "acc_norm_stderr": 0.02840125202902294, "acc_stderr": 0.029279567411065674}}, "versions": {"hendrycksTest-security_studies": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-sociology-v0-loglikelihood b/tests/testdata/hendrycksTest-sociology-v0-loglikelihood index 534d7c09b3..d3f581c9f2 100644 --- a/tests/testdata/hendrycksTest-sociology-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-sociology-v0-loglikelihood @@ -1 +1 @@ -f99a3caece11169f2a5cc951001f92027104afd25d29b2a399883bd4bf118605 +f99a3caece11169f2a5cc951001f92027104afd25d29b2a399883bd4bf118605 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-sociology-v0-res.json b/tests/testdata/hendrycksTest-sociology-v0-res.json index 0974f7e22f..8711cf195e 100644 --- a/tests/testdata/hendrycksTest-sociology-v0-res.json +++ b/tests/testdata/hendrycksTest-sociology-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-sociology": {"acc": 0.23383084577114427, "acc_norm": 0.24875621890547264, "acc_norm_stderr": 0.030567675938916707, "acc_stderr": 0.02992941540834838}}, "versions": {"hendrycksTest-sociology": 0}} +{"results": {"hendrycksTest-sociology": {"acc": 0.23383084577114427, "acc_norm": 0.24875621890547264, "acc_norm_stderr": 0.030567675938916707, "acc_stderr": 0.02992941540834838}}, "versions": {"hendrycksTest-sociology": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-us_foreign_policy-v0-loglikelihood b/tests/testdata/hendrycksTest-us_foreign_policy-v0-loglikelihood index c315cc8b77..eed85dbaf9 100644 --- a/tests/testdata/hendrycksTest-us_foreign_policy-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-us_foreign_policy-v0-loglikelihood @@ -1 +1 @@ -a1a338d0083a21054f74d36a296d6bd8e2e457327c0fd630bebcc61ed758044d +a1a338d0083a21054f74d36a296d6bd8e2e457327c0fd630bebcc61ed758044d \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-us_foreign_policy-v0-res.json b/tests/testdata/hendrycksTest-us_foreign_policy-v0-res.json index f594f9bb49..1077380de8 100644 --- a/tests/testdata/hendrycksTest-us_foreign_policy-v0-res.json +++ b/tests/testdata/hendrycksTest-us_foreign_policy-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-us_foreign_policy": {"acc": 0.2, "acc_norm": 0.24, "acc_norm_stderr": 0.04292346959909283, "acc_stderr": 0.040201512610368445}}, "versions": {"hendrycksTest-us_foreign_policy": 0}} +{"results": {"hendrycksTest-us_foreign_policy": {"acc": 0.2, "acc_norm": 0.24, "acc_norm_stderr": 0.04292346959909283, "acc_stderr": 0.040201512610368445}}, "versions": {"hendrycksTest-us_foreign_policy": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-virology-v0-loglikelihood b/tests/testdata/hendrycksTest-virology-v0-loglikelihood index 0af2342855..3555c2c535 100644 --- a/tests/testdata/hendrycksTest-virology-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-virology-v0-loglikelihood @@ -1 +1 @@ -0ffa491f7bad2abbb64ecd752a295729167599b3815238cab0ecf4cb08bba9b6 +0ffa491f7bad2abbb64ecd752a295729167599b3815238cab0ecf4cb08bba9b6 \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-virology-v0-res.json b/tests/testdata/hendrycksTest-virology-v0-res.json index eb2639a6b8..0004b19404 100644 --- a/tests/testdata/hendrycksTest-virology-v0-res.json +++ b/tests/testdata/hendrycksTest-virology-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-virology": {"acc": 0.27710843373493976, "acc_norm": 0.2710843373493976, "acc_norm_stderr": 0.03460579907553027, "acc_stderr": 0.034843315926805875}}, "versions": {"hendrycksTest-virology": 0}} +{"results": {"hendrycksTest-virology": {"acc": 0.27710843373493976, "acc_norm": 0.2710843373493976, "acc_norm_stderr": 0.03460579907553027, "acc_stderr": 0.034843315926805875}}, "versions": {"hendrycksTest-virology": 0}} \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-world_religions-v0-loglikelihood b/tests/testdata/hendrycksTest-world_religions-v0-loglikelihood index d7c0bd73b0..118c9b7435 100644 --- a/tests/testdata/hendrycksTest-world_religions-v0-loglikelihood +++ b/tests/testdata/hendrycksTest-world_religions-v0-loglikelihood @@ -1 +1 @@ -97a0f68ba30ea3a6ef1db1a2925c964b09ecc54455a0a930da083e52677815bd +97a0f68ba30ea3a6ef1db1a2925c964b09ecc54455a0a930da083e52677815bd \ No newline at end of file diff --git a/tests/testdata/hendrycksTest-world_religions-v0-res.json b/tests/testdata/hendrycksTest-world_religions-v0-res.json index 6c3ce29f0d..0fff75a7ea 100644 --- a/tests/testdata/hendrycksTest-world_religions-v0-res.json +++ b/tests/testdata/hendrycksTest-world_religions-v0-res.json @@ -1 +1 @@ -{"results": {"hendrycksTest-world_religions": {"acc": 0.21637426900584794, "acc_norm": 0.22807017543859648, "acc_norm_stderr": 0.03218093795602357, "acc_stderr": 0.03158149539338734}}, "versions": {"hendrycksTest-world_religions": 0}} +{"results": {"hendrycksTest-world_religions": {"acc": 0.21637426900584794, "acc_norm": 0.22807017543859648, "acc_norm_stderr": 0.03218093795602357, "acc_stderr": 0.03158149539338734}}, "versions": {"hendrycksTest-world_religions": 0}} \ No newline at end of file diff --git a/tests/testdata/iwslt17-ar-en-v0-greedy_until b/tests/testdata/iwslt17-ar-en-v0-greedy_until index 12c5803946..82921d1db0 100644 --- a/tests/testdata/iwslt17-ar-en-v0-greedy_until +++ b/tests/testdata/iwslt17-ar-en-v0-greedy_until @@ -1 +1 @@ -e94d310de91fad7ce36f4cf3305552020221482c5588f2efcefaa019893504f1 +e94d310de91fad7ce36f4cf3305552020221482c5588f2efcefaa019893504f1 \ No newline at end of file diff --git a/tests/testdata/iwslt17-ar-en-v0-res.json b/tests/testdata/iwslt17-ar-en-v0-res.json index 541ff17888..0f414a928b 100644 --- a/tests/testdata/iwslt17-ar-en-v0-res.json +++ b/tests/testdata/iwslt17-ar-en-v0-res.json @@ -1 +1 @@ -{"results": {"iwslt17-ar-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.015049895477752772, "chrf_stderr": 0.0002940315671893584, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"iwslt17-ar-en": 0}} +{"results": {"iwslt17-ar-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.015049895477752772, "chrf_stderr": 0.0002940315671893584, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"iwslt17-ar-en": 0}} \ No newline at end of file diff --git a/tests/testdata/iwslt17-en-ar-v0-greedy_until b/tests/testdata/iwslt17-en-ar-v0-greedy_until index 31e16e1f56..fc59546576 100644 --- a/tests/testdata/iwslt17-en-ar-v0-greedy_until +++ b/tests/testdata/iwslt17-en-ar-v0-greedy_until @@ -1 +1 @@ -b20adbcd2c6d135e28600b427113532c5df624cb3a90e8c5e48715c09a3a38fa +b20adbcd2c6d135e28600b427113532c5df624cb3a90e8c5e48715c09a3a38fa \ No newline at end of file diff --git a/tests/testdata/iwslt17-en-ar-v0-res.json b/tests/testdata/iwslt17-en-ar-v0-res.json index 27184cdd64..a22fa9036c 100644 --- a/tests/testdata/iwslt17-en-ar-v0-res.json +++ b/tests/testdata/iwslt17-en-ar-v0-res.json @@ -1 +1 @@ -{"results": {"iwslt17-en-ar": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.0, "chrf_stderr": 0.0, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"iwslt17-en-ar": 0}} +{"results": {"iwslt17-en-ar": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.0, "chrf_stderr": 0.0, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"iwslt17-en-ar": 0}} \ No newline at end of file diff --git a/tests/testdata/lambada-v0-loglikelihood b/tests/testdata/lambada-v0-loglikelihood index 60dc7f7338..efd450a8f2 100644 --- a/tests/testdata/lambada-v0-loglikelihood +++ b/tests/testdata/lambada-v0-loglikelihood @@ -1 +1 @@ -6829e6a8aa5922e6c92dd31403cc060f242dc0ede4a775e085a70da095ab2e20 +6829e6a8aa5922e6c92dd31403cc060f242dc0ede4a775e085a70da095ab2e20 \ No newline at end of file diff --git a/tests/testdata/lambada-v0-res.json b/tests/testdata/lambada-v0-res.json index cf02bafc3b..ead0e9ce5d 100644 --- a/tests/testdata/lambada-v0-res.json +++ b/tests/testdata/lambada-v0-res.json @@ -1 +1 @@ -{"results": {"lambada": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada": 0}} +{"results": {"lambada": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada": 0}} \ No newline at end of file diff --git a/tests/testdata/lambada_cloze-v0-loglikelihood b/tests/testdata/lambada_cloze-v0-loglikelihood index 3657eb6e71..b599a89f7a 100644 --- a/tests/testdata/lambada_cloze-v0-loglikelihood +++ b/tests/testdata/lambada_cloze-v0-loglikelihood @@ -1 +1 @@ -7655e748b63ae7e9911411d2d2a2577221d6c861ca4448509992541294d689f3 +7655e748b63ae7e9911411d2d2a2577221d6c861ca4448509992541294d689f3 \ No newline at end of file diff --git a/tests/testdata/lambada_cloze-v0-res.json b/tests/testdata/lambada_cloze-v0-res.json index 7cd36e9763..f3f3f931ac 100644 --- a/tests/testdata/lambada_cloze-v0-res.json +++ b/tests/testdata/lambada_cloze-v0-res.json @@ -1 +1 @@ -{"results": {"lambada_cloze": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_cloze": 0}} +{"results": {"lambada_cloze": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_cloze": 0}} \ No newline at end of file diff --git a/tests/testdata/lambada_mt_de-v0-loglikelihood b/tests/testdata/lambada_mt_de-v0-loglikelihood index cae8d9bc38..ae19de0e69 100644 --- a/tests/testdata/lambada_mt_de-v0-loglikelihood +++ b/tests/testdata/lambada_mt_de-v0-loglikelihood @@ -1 +1 @@ -5ad125e1708499832b2cee8c3388f89f9c0277010fd96fbd3359039ce8105984 +5ad125e1708499832b2cee8c3388f89f9c0277010fd96fbd3359039ce8105984 \ No newline at end of file diff --git a/tests/testdata/lambada_mt_de-v0-res.json b/tests/testdata/lambada_mt_de-v0-res.json index a7a0a44989..7267ea739a 100644 --- a/tests/testdata/lambada_mt_de-v0-res.json +++ b/tests/testdata/lambada_mt_de-v0-res.json @@ -1 +1 @@ -{"results": {"lambada_mt_de": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_de": 0}} +{"results": {"lambada_mt_de": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_de": 0}} \ No newline at end of file diff --git a/tests/testdata/lambada_mt_en-v0-loglikelihood b/tests/testdata/lambada_mt_en-v0-loglikelihood index 60dc7f7338..efd450a8f2 100644 --- a/tests/testdata/lambada_mt_en-v0-loglikelihood +++ b/tests/testdata/lambada_mt_en-v0-loglikelihood @@ -1 +1 @@ -6829e6a8aa5922e6c92dd31403cc060f242dc0ede4a775e085a70da095ab2e20 +6829e6a8aa5922e6c92dd31403cc060f242dc0ede4a775e085a70da095ab2e20 \ No newline at end of file diff --git a/tests/testdata/lambada_mt_en-v0-res.json b/tests/testdata/lambada_mt_en-v0-res.json index eb948a0fb0..561b88ffe1 100644 --- a/tests/testdata/lambada_mt_en-v0-res.json +++ b/tests/testdata/lambada_mt_en-v0-res.json @@ -1 +1 @@ -{"results": {"lambada_mt_en": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_en": 0}} +{"results": {"lambada_mt_en": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_en": 0}} \ No newline at end of file diff --git a/tests/testdata/lambada_mt_es-v0-loglikelihood b/tests/testdata/lambada_mt_es-v0-loglikelihood index ce6a044012..df895fe6d6 100644 --- a/tests/testdata/lambada_mt_es-v0-loglikelihood +++ b/tests/testdata/lambada_mt_es-v0-loglikelihood @@ -1 +1 @@ -4a88f4b316c72fe0396c382d6cbb33568ac4d0ad225150d3536635c085359fc9 +4a88f4b316c72fe0396c382d6cbb33568ac4d0ad225150d3536635c085359fc9 \ No newline at end of file diff --git a/tests/testdata/lambada_mt_es-v0-res.json b/tests/testdata/lambada_mt_es-v0-res.json index 107b63fa92..5f95957324 100644 --- a/tests/testdata/lambada_mt_es-v0-res.json +++ b/tests/testdata/lambada_mt_es-v0-res.json @@ -1 +1 @@ -{"results": {"lambada_mt_es": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_es": 0}} +{"results": {"lambada_mt_es": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_es": 0}} \ No newline at end of file diff --git a/tests/testdata/lambada_mt_fr-v0-loglikelihood b/tests/testdata/lambada_mt_fr-v0-loglikelihood index b1495180e9..3c444f6661 100644 --- a/tests/testdata/lambada_mt_fr-v0-loglikelihood +++ b/tests/testdata/lambada_mt_fr-v0-loglikelihood @@ -1 +1 @@ -5d16f4a0c51dc6d7b6df2ebeba2bbfa51e700b843779b559b3d90183d7b02a11 +5d16f4a0c51dc6d7b6df2ebeba2bbfa51e700b843779b559b3d90183d7b02a11 \ No newline at end of file diff --git a/tests/testdata/lambada_mt_fr-v0-res.json b/tests/testdata/lambada_mt_fr-v0-res.json index ec0b038c81..637c23500b 100644 --- a/tests/testdata/lambada_mt_fr-v0-res.json +++ b/tests/testdata/lambada_mt_fr-v0-res.json @@ -1 +1 @@ -{"results": {"lambada_mt_fr": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_fr": 0}} +{"results": {"lambada_mt_fr": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_fr": 0}} \ No newline at end of file diff --git a/tests/testdata/lambada_mt_it-v0-loglikelihood b/tests/testdata/lambada_mt_it-v0-loglikelihood index 3885f316af..ca3fd80298 100644 --- a/tests/testdata/lambada_mt_it-v0-loglikelihood +++ b/tests/testdata/lambada_mt_it-v0-loglikelihood @@ -1 +1 @@ -fd87c6c5cf4e0499c5f9f80e5bd7ee6a4f3d2991902a0cc3ec9e6eaf22d6760a +fd87c6c5cf4e0499c5f9f80e5bd7ee6a4f3d2991902a0cc3ec9e6eaf22d6760a \ No newline at end of file diff --git a/tests/testdata/lambada_mt_it-v0-res.json b/tests/testdata/lambada_mt_it-v0-res.json index 79efb8675a..b652210ae3 100644 --- a/tests/testdata/lambada_mt_it-v0-res.json +++ b/tests/testdata/lambada_mt_it-v0-res.json @@ -1 +1 @@ -{"results": {"lambada_mt_it": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_it": 0}} +{"results": {"lambada_mt_it": {"acc": 0.0, "acc_stderr": 0.0, "ppl": 1.6479047769869253, "ppl_stderr": 0.006497321146240192}}, "versions": {"lambada_mt_it": 0}} \ No newline at end of file diff --git a/tests/testdata/logiqa-v0-loglikelihood b/tests/testdata/logiqa-v0-loglikelihood index 91fe00756a..9cd40fce0a 100644 --- a/tests/testdata/logiqa-v0-loglikelihood +++ b/tests/testdata/logiqa-v0-loglikelihood @@ -1 +1 @@ -12495c50454ba5e1ce0753bd18c09aaca516bebd27648d815e37b15229dbf198 +12495c50454ba5e1ce0753bd18c09aaca516bebd27648d815e37b15229dbf198 \ No newline at end of file diff --git a/tests/testdata/logiqa-v0-res.json b/tests/testdata/logiqa-v0-res.json index d76464cd63..7a80c24d1b 100644 --- a/tests/testdata/logiqa-v0-res.json +++ b/tests/testdata/logiqa-v0-res.json @@ -1 +1 @@ -{"results": {"logiqa": {"acc": 0.25806451612903225, "acc_norm": 0.2764976958525346, "acc_norm_stderr": 0.017543209075825194, "acc_stderr": 0.017162894755127077}}, "versions": {"logiqa": 0}} +{"results": {"logiqa": {"acc": 0.25806451612903225, "acc_norm": 0.2764976958525346, "acc_norm_stderr": 0.017543209075825194, "acc_stderr": 0.017162894755127077}}, "versions": {"logiqa": 0}} \ No newline at end of file diff --git a/tests/testdata/math_algebra-v0-greedy_until b/tests/testdata/math_algebra-v0-greedy_until index 48090ee978..ce881a0232 100644 --- a/tests/testdata/math_algebra-v0-greedy_until +++ b/tests/testdata/math_algebra-v0-greedy_until @@ -1 +1 @@ -f19182ce697a2c095d9e5b56ee6659dc38c93994b69ca75d7c3d3f5fd87572b4 +f19182ce697a2c095d9e5b56ee6659dc38c93994b69ca75d7c3d3f5fd87572b4 \ No newline at end of file diff --git a/tests/testdata/math_algebra-v0-res.json b/tests/testdata/math_algebra-v0-res.json index 8e2552d4cc..192cb9d852 100644 --- a/tests/testdata/math_algebra-v0-res.json +++ b/tests/testdata/math_algebra-v0-res.json @@ -1 +1 @@ -{"results": {"math_algebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_algebra": 0}} +{"results": {"math_algebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_algebra": 0}} \ No newline at end of file diff --git a/tests/testdata/math_algebra-v1-greedy_until b/tests/testdata/math_algebra-v1-greedy_until index 48090ee978..ce881a0232 100644 --- a/tests/testdata/math_algebra-v1-greedy_until +++ b/tests/testdata/math_algebra-v1-greedy_until @@ -1 +1 @@ -f19182ce697a2c095d9e5b56ee6659dc38c93994b69ca75d7c3d3f5fd87572b4 +f19182ce697a2c095d9e5b56ee6659dc38c93994b69ca75d7c3d3f5fd87572b4 \ No newline at end of file diff --git a/tests/testdata/math_algebra-v1-res.json b/tests/testdata/math_algebra-v1-res.json index 00b237d6c5..10d18c2f86 100644 --- a/tests/testdata/math_algebra-v1-res.json +++ b/tests/testdata/math_algebra-v1-res.json @@ -1 +1 @@ -{"results": {"math_algebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_algebra": 1}} +{"results": {"math_algebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_algebra": 1}} \ No newline at end of file diff --git a/tests/testdata/math_counting_and_prob-v0-greedy_until b/tests/testdata/math_counting_and_prob-v0-greedy_until index a81e7cc085..6f49557ecf 100644 --- a/tests/testdata/math_counting_and_prob-v0-greedy_until +++ b/tests/testdata/math_counting_and_prob-v0-greedy_until @@ -1 +1 @@ -2aa9ae43ee9dbb2457525247d7b65358632c5eaa9cbfc40cf95a4f17f5d942ad +2aa9ae43ee9dbb2457525247d7b65358632c5eaa9cbfc40cf95a4f17f5d942ad \ No newline at end of file diff --git a/tests/testdata/math_counting_and_prob-v0-res.json b/tests/testdata/math_counting_and_prob-v0-res.json index df7f4e47a6..8ee1d031de 100644 --- a/tests/testdata/math_counting_and_prob-v0-res.json +++ b/tests/testdata/math_counting_and_prob-v0-res.json @@ -1 +1 @@ -{"results": {"math_counting_and_prob": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_counting_and_prob": 0}} +{"results": {"math_counting_and_prob": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_counting_and_prob": 0}} \ No newline at end of file diff --git a/tests/testdata/math_counting_and_prob-v1-greedy_until b/tests/testdata/math_counting_and_prob-v1-greedy_until index a81e7cc085..6f49557ecf 100644 --- a/tests/testdata/math_counting_and_prob-v1-greedy_until +++ b/tests/testdata/math_counting_and_prob-v1-greedy_until @@ -1 +1 @@ -2aa9ae43ee9dbb2457525247d7b65358632c5eaa9cbfc40cf95a4f17f5d942ad +2aa9ae43ee9dbb2457525247d7b65358632c5eaa9cbfc40cf95a4f17f5d942ad \ No newline at end of file diff --git a/tests/testdata/math_counting_and_prob-v1-res.json b/tests/testdata/math_counting_and_prob-v1-res.json index 1eae75c7cd..240f7b6b42 100644 --- a/tests/testdata/math_counting_and_prob-v1-res.json +++ b/tests/testdata/math_counting_and_prob-v1-res.json @@ -1 +1 @@ -{"results": {"math_counting_and_prob": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_counting_and_prob": 1}} +{"results": {"math_counting_and_prob": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_counting_and_prob": 1}} \ No newline at end of file diff --git a/tests/testdata/math_geometry-v0-greedy_until b/tests/testdata/math_geometry-v0-greedy_until index 3ed1fe9f97..1c7362fe44 100644 --- a/tests/testdata/math_geometry-v0-greedy_until +++ b/tests/testdata/math_geometry-v0-greedy_until @@ -1 +1 @@ -46bc4cb219b6903397da782699a684bdbb982c0c954ff82e6beeed5c84878f42 +46bc4cb219b6903397da782699a684bdbb982c0c954ff82e6beeed5c84878f42 \ No newline at end of file diff --git a/tests/testdata/math_geometry-v0-res.json b/tests/testdata/math_geometry-v0-res.json index f50c311ab4..1b25dc283c 100644 --- a/tests/testdata/math_geometry-v0-res.json +++ b/tests/testdata/math_geometry-v0-res.json @@ -1 +1 @@ -{"results": {"math_geometry": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_geometry": 0}} +{"results": {"math_geometry": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_geometry": 0}} \ No newline at end of file diff --git a/tests/testdata/math_geometry-v1-greedy_until b/tests/testdata/math_geometry-v1-greedy_until index 3ed1fe9f97..1c7362fe44 100644 --- a/tests/testdata/math_geometry-v1-greedy_until +++ b/tests/testdata/math_geometry-v1-greedy_until @@ -1 +1 @@ -46bc4cb219b6903397da782699a684bdbb982c0c954ff82e6beeed5c84878f42 +46bc4cb219b6903397da782699a684bdbb982c0c954ff82e6beeed5c84878f42 \ No newline at end of file diff --git a/tests/testdata/math_geometry-v1-res.json b/tests/testdata/math_geometry-v1-res.json index 8a915069b5..eb6851fc63 100644 --- a/tests/testdata/math_geometry-v1-res.json +++ b/tests/testdata/math_geometry-v1-res.json @@ -1 +1 @@ -{"results": {"math_geometry": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_geometry": 1}} +{"results": {"math_geometry": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_geometry": 1}} \ No newline at end of file diff --git a/tests/testdata/math_intermediate_algebra-v0-greedy_until b/tests/testdata/math_intermediate_algebra-v0-greedy_until index ed4f5dc12a..3ab10de26a 100644 --- a/tests/testdata/math_intermediate_algebra-v0-greedy_until +++ b/tests/testdata/math_intermediate_algebra-v0-greedy_until @@ -1 +1 @@ -d53c699de272d517ed7ad783b4e692302be9f9f97a8d4ac7a6541e538a7cabe0 +d53c699de272d517ed7ad783b4e692302be9f9f97a8d4ac7a6541e538a7cabe0 \ No newline at end of file diff --git a/tests/testdata/math_intermediate_algebra-v0-res.json b/tests/testdata/math_intermediate_algebra-v0-res.json index e047aba684..7a195d9ac4 100644 --- a/tests/testdata/math_intermediate_algebra-v0-res.json +++ b/tests/testdata/math_intermediate_algebra-v0-res.json @@ -1 +1 @@ -{"results": {"math_intermediate_algebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_intermediate_algebra": 0}} +{"results": {"math_intermediate_algebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_intermediate_algebra": 0}} \ No newline at end of file diff --git a/tests/testdata/math_intermediate_algebra-v1-greedy_until b/tests/testdata/math_intermediate_algebra-v1-greedy_until index ed4f5dc12a..3ab10de26a 100644 --- a/tests/testdata/math_intermediate_algebra-v1-greedy_until +++ b/tests/testdata/math_intermediate_algebra-v1-greedy_until @@ -1 +1 @@ -d53c699de272d517ed7ad783b4e692302be9f9f97a8d4ac7a6541e538a7cabe0 +d53c699de272d517ed7ad783b4e692302be9f9f97a8d4ac7a6541e538a7cabe0 \ No newline at end of file diff --git a/tests/testdata/math_intermediate_algebra-v1-res.json b/tests/testdata/math_intermediate_algebra-v1-res.json index c6f1c39e18..63ab45b9ff 100644 --- a/tests/testdata/math_intermediate_algebra-v1-res.json +++ b/tests/testdata/math_intermediate_algebra-v1-res.json @@ -1 +1 @@ -{"results": {"math_intermediate_algebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_intermediate_algebra": 1}} +{"results": {"math_intermediate_algebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_intermediate_algebra": 1}} \ No newline at end of file diff --git a/tests/testdata/math_num_theory-v0-greedy_until b/tests/testdata/math_num_theory-v0-greedy_until index 8b9fae1314..82febb9f5d 100644 --- a/tests/testdata/math_num_theory-v0-greedy_until +++ b/tests/testdata/math_num_theory-v0-greedy_until @@ -1 +1 @@ -b920ccb507afdcf3ef6f4c04891913731e9f32ec914801791c6d9f8abf6e1897 +b920ccb507afdcf3ef6f4c04891913731e9f32ec914801791c6d9f8abf6e1897 \ No newline at end of file diff --git a/tests/testdata/math_num_theory-v0-res.json b/tests/testdata/math_num_theory-v0-res.json index f39ace0db5..a27a38fa9d 100644 --- a/tests/testdata/math_num_theory-v0-res.json +++ b/tests/testdata/math_num_theory-v0-res.json @@ -1 +1 @@ -{"results": {"math_num_theory": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_num_theory": 0}} +{"results": {"math_num_theory": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_num_theory": 0}} \ No newline at end of file diff --git a/tests/testdata/math_num_theory-v1-greedy_until b/tests/testdata/math_num_theory-v1-greedy_until index 8b9fae1314..82febb9f5d 100644 --- a/tests/testdata/math_num_theory-v1-greedy_until +++ b/tests/testdata/math_num_theory-v1-greedy_until @@ -1 +1 @@ -b920ccb507afdcf3ef6f4c04891913731e9f32ec914801791c6d9f8abf6e1897 +b920ccb507afdcf3ef6f4c04891913731e9f32ec914801791c6d9f8abf6e1897 \ No newline at end of file diff --git a/tests/testdata/math_num_theory-v1-res.json b/tests/testdata/math_num_theory-v1-res.json index 67799305de..00917b90dd 100644 --- a/tests/testdata/math_num_theory-v1-res.json +++ b/tests/testdata/math_num_theory-v1-res.json @@ -1 +1 @@ -{"results": {"math_num_theory": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_num_theory": 1}} +{"results": {"math_num_theory": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_num_theory": 1}} \ No newline at end of file diff --git a/tests/testdata/math_prealgebra-v0-greedy_until b/tests/testdata/math_prealgebra-v0-greedy_until index 570cf27876..5200f4cfa9 100644 --- a/tests/testdata/math_prealgebra-v0-greedy_until +++ b/tests/testdata/math_prealgebra-v0-greedy_until @@ -1 +1 @@ -752cdf343d7152e476b0273065024f6ea0e0f47ea385c6bdf9067736cb39724a +752cdf343d7152e476b0273065024f6ea0e0f47ea385c6bdf9067736cb39724a \ No newline at end of file diff --git a/tests/testdata/math_prealgebra-v0-res.json b/tests/testdata/math_prealgebra-v0-res.json index 8bbaef99a9..b3ada8a6be 100644 --- a/tests/testdata/math_prealgebra-v0-res.json +++ b/tests/testdata/math_prealgebra-v0-res.json @@ -1 +1 @@ -{"results": {"math_prealgebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_prealgebra": 0}} +{"results": {"math_prealgebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_prealgebra": 0}} \ No newline at end of file diff --git a/tests/testdata/math_prealgebra-v1-greedy_until b/tests/testdata/math_prealgebra-v1-greedy_until index 570cf27876..5200f4cfa9 100644 --- a/tests/testdata/math_prealgebra-v1-greedy_until +++ b/tests/testdata/math_prealgebra-v1-greedy_until @@ -1 +1 @@ -752cdf343d7152e476b0273065024f6ea0e0f47ea385c6bdf9067736cb39724a +752cdf343d7152e476b0273065024f6ea0e0f47ea385c6bdf9067736cb39724a \ No newline at end of file diff --git a/tests/testdata/math_prealgebra-v1-res.json b/tests/testdata/math_prealgebra-v1-res.json index 18b665567a..e3869faa80 100644 --- a/tests/testdata/math_prealgebra-v1-res.json +++ b/tests/testdata/math_prealgebra-v1-res.json @@ -1 +1 @@ -{"results": {"math_prealgebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_prealgebra": 1}} +{"results": {"math_prealgebra": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_prealgebra": 1}} \ No newline at end of file diff --git a/tests/testdata/math_precalc-v0-greedy_until b/tests/testdata/math_precalc-v0-greedy_until index 816534b355..71bbd8d9c2 100644 --- a/tests/testdata/math_precalc-v0-greedy_until +++ b/tests/testdata/math_precalc-v0-greedy_until @@ -1 +1 @@ -bc834b06fd79473ca6fe38a51b714aad0bf0478c1b0eec787eca34dbdf69cb71 +bc834b06fd79473ca6fe38a51b714aad0bf0478c1b0eec787eca34dbdf69cb71 \ No newline at end of file diff --git a/tests/testdata/math_precalc-v0-res.json b/tests/testdata/math_precalc-v0-res.json index f1f806d39e..699dc5fe38 100644 --- a/tests/testdata/math_precalc-v0-res.json +++ b/tests/testdata/math_precalc-v0-res.json @@ -1 +1 @@ -{"results": {"math_precalc": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_precalc": 0}} +{"results": {"math_precalc": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_precalc": 0}} \ No newline at end of file diff --git a/tests/testdata/math_precalc-v1-greedy_until b/tests/testdata/math_precalc-v1-greedy_until index 816534b355..71bbd8d9c2 100644 --- a/tests/testdata/math_precalc-v1-greedy_until +++ b/tests/testdata/math_precalc-v1-greedy_until @@ -1 +1 @@ -bc834b06fd79473ca6fe38a51b714aad0bf0478c1b0eec787eca34dbdf69cb71 +bc834b06fd79473ca6fe38a51b714aad0bf0478c1b0eec787eca34dbdf69cb71 \ No newline at end of file diff --git a/tests/testdata/math_precalc-v1-res.json b/tests/testdata/math_precalc-v1-res.json index c635e939b0..a5846590a3 100644 --- a/tests/testdata/math_precalc-v1-res.json +++ b/tests/testdata/math_precalc-v1-res.json @@ -1 +1 @@ -{"results": {"math_precalc": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_precalc": 1}} +{"results": {"math_precalc": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"math_precalc": 1}} \ No newline at end of file diff --git a/tests/testdata/mathqa-v0-loglikelihood b/tests/testdata/mathqa-v0-loglikelihood index b1481a24d0..9f33d79035 100644 --- a/tests/testdata/mathqa-v0-loglikelihood +++ b/tests/testdata/mathqa-v0-loglikelihood @@ -1 +1 @@ -a45260e49f02c7cb8886b3746db4d388890860b202dd8a9f0267e3c324e0af13 +a45260e49f02c7cb8886b3746db4d388890860b202dd8a9f0267e3c324e0af13 \ No newline at end of file diff --git a/tests/testdata/mathqa-v0-res.json b/tests/testdata/mathqa-v0-res.json index 07bfe685cc..dabd07c07c 100644 --- a/tests/testdata/mathqa-v0-res.json +++ b/tests/testdata/mathqa-v0-res.json @@ -1 +1 @@ -{"results": {"mathqa": {"acc": 0.20770519262981574, "acc_norm": 0.2050251256281407, "acc_norm_stderr": 0.007390619359738901, "acc_stderr": 0.007426217631188539}}, "versions": {"mathqa": 0}} +{"results": {"mathqa": {"acc": 0.20770519262981574, "acc_norm": 0.2050251256281407, "acc_norm_stderr": 0.007390619359738901, "acc_stderr": 0.007426217631188539}}, "versions": {"mathqa": 0}} \ No newline at end of file diff --git a/tests/testdata/mc_taco-v0-loglikelihood b/tests/testdata/mc_taco-v0-loglikelihood index 57258313b2..f0ce5c6458 100644 --- a/tests/testdata/mc_taco-v0-loglikelihood +++ b/tests/testdata/mc_taco-v0-loglikelihood @@ -1 +1 @@ -1811808ef05afd5f30ffc3471622a3dd7a1b681b17a2f7616695ad6b2a45943c +1811808ef05afd5f30ffc3471622a3dd7a1b681b17a2f7616695ad6b2a45943c \ No newline at end of file diff --git a/tests/testdata/mc_taco-v0-res.json b/tests/testdata/mc_taco-v0-res.json index 2fe07d4a3a..fc36d1ed3f 100644 --- a/tests/testdata/mc_taco-v0-res.json +++ b/tests/testdata/mc_taco-v0-res.json @@ -1 +1 @@ -{"results": {"mc_taco": {"em": 0.07732732732732733, "f1": 0.41600515965511614}}, "versions": {"mc_taco": 0}} +{"results": {"mc_taco": {"em": 0.07732732732732733, "f1": 0.41600515965511614}}, "versions": {"mc_taco": 0}} \ No newline at end of file diff --git a/tests/testdata/mnli-v0-loglikelihood b/tests/testdata/mnli-v0-loglikelihood index cb5e932cb5..433b76d010 100644 --- a/tests/testdata/mnli-v0-loglikelihood +++ b/tests/testdata/mnli-v0-loglikelihood @@ -1 +1 @@ -4fc7b56b8f1e37e38f4a052b227baec2df914c898c3405d3e994726ba4fba976 +4fc7b56b8f1e37e38f4a052b227baec2df914c898c3405d3e994726ba4fba976 \ No newline at end of file diff --git a/tests/testdata/mnli-v0-res.json b/tests/testdata/mnli-v0-res.json index d631581a60..d9dada7a02 100644 --- a/tests/testdata/mnli-v0-res.json +++ b/tests/testdata/mnli-v0-res.json @@ -1 +1 @@ -{"results": {"mnli": {"acc": 0.32868059093224655, "acc_stderr": 0.004741640290753859}}, "versions": {"mnli": 0}} +{"results": {"mnli": {"acc": 0.32868059093224655, "acc_stderr": 0.004741640290753859}}, "versions": {"mnli": 0}} \ No newline at end of file diff --git a/tests/testdata/mnli_mismatched-v0-loglikelihood b/tests/testdata/mnli_mismatched-v0-loglikelihood index fcd0e5b260..3fb242da3a 100644 --- a/tests/testdata/mnli_mismatched-v0-loglikelihood +++ b/tests/testdata/mnli_mismatched-v0-loglikelihood @@ -1 +1 @@ -3784acf322e79f31702a7a0612030e4ba5c4fc466ad976a34ee3f3d7278c01f0 +3784acf322e79f31702a7a0612030e4ba5c4fc466ad976a34ee3f3d7278c01f0 \ No newline at end of file diff --git a/tests/testdata/mnli_mismatched-v0-res.json b/tests/testdata/mnli_mismatched-v0-res.json index 5392bc600f..261deed962 100644 --- a/tests/testdata/mnli_mismatched-v0-res.json +++ b/tests/testdata/mnli_mismatched-v0-res.json @@ -1 +1 @@ -{"results": {"mnli_mismatched": {"acc": 0.3360455655004068, "acc_stderr": 0.004763973908606819}}, "versions": {"mnli_mismatched": 0}} +{"results": {"mnli_mismatched": {"acc": 0.3360455655004068, "acc_stderr": 0.004763973908606819}}, "versions": {"mnli_mismatched": 0}} \ No newline at end of file diff --git a/tests/testdata/mrpc-v0-loglikelihood b/tests/testdata/mrpc-v0-loglikelihood index 284b450fb0..95c849a153 100644 --- a/tests/testdata/mrpc-v0-loglikelihood +++ b/tests/testdata/mrpc-v0-loglikelihood @@ -1 +1 @@ -9f54cbff8d6accba99cfa2c4c4b359563313941018173d7dcf9e32dc28c06583 +9f54cbff8d6accba99cfa2c4c4b359563313941018173d7dcf9e32dc28c06583 \ No newline at end of file diff --git a/tests/testdata/mrpc-v0-res.json b/tests/testdata/mrpc-v0-res.json index 54d2dac47a..f141eaa0a4 100644 --- a/tests/testdata/mrpc-v0-res.json +++ b/tests/testdata/mrpc-v0-res.json @@ -1 +1 @@ -{"results": {"mrpc": {"acc": 0.5392156862745098, "acc_stderr": 0.024707732873723128, "f1": 0.5982905982905982, "f1_stderr": 0.028928325246283727}}, "versions": {"mrpc": 0}} +{"results": {"mrpc": {"acc": 0.5392156862745098, "acc_stderr": 0.024707732873723128, "f1": 0.5982905982905982, "f1_stderr": 0.028928325246283727}}, "versions": {"mrpc": 0}} \ No newline at end of file diff --git a/tests/testdata/multirc-v0-loglikelihood b/tests/testdata/multirc-v0-loglikelihood index 6391fe9464..b3681ec175 100644 --- a/tests/testdata/multirc-v0-loglikelihood +++ b/tests/testdata/multirc-v0-loglikelihood @@ -1 +1 @@ -cdb026c027437a8b4653212d0944d36fc16f49921dcb8e4bef899d15a55e9f80 +cdb026c027437a8b4653212d0944d36fc16f49921dcb8e4bef899d15a55e9f80 \ No newline at end of file diff --git a/tests/testdata/multirc-v0-res.json b/tests/testdata/multirc-v0-res.json index 864c987f55..87e9c532eb 100644 --- a/tests/testdata/multirc-v0-res.json +++ b/tests/testdata/multirc-v0-res.json @@ -1 +1 @@ -{"results": {"multirc": {"acc": 0.07450157397691501, "acc_stderr": 0.008510441526175931}}, "versions": {"multirc": 0}} +{"results": {"multirc": {"acc": 0.07450157397691501, "acc_stderr": 0.008510441526175931}}, "versions": {"multirc": 0}} \ No newline at end of file diff --git a/tests/testdata/multirc-v1-loglikelihood b/tests/testdata/multirc-v1-loglikelihood index 7a1d5b828f..52a89c6f9e 100644 --- a/tests/testdata/multirc-v1-loglikelihood +++ b/tests/testdata/multirc-v1-loglikelihood @@ -1 +1 @@ -0e793bd6f637a70a04c6f2cda080188fc037961b2f909095fe63f7bdbc4a90c6 +0e793bd6f637a70a04c6f2cda080188fc037961b2f909095fe63f7bdbc4a90c6 \ No newline at end of file diff --git a/tests/testdata/multirc-v1-res.json b/tests/testdata/multirc-v1-res.json index 2b782974d2..938141bbb8 100644 --- a/tests/testdata/multirc-v1-res.json +++ b/tests/testdata/multirc-v1-res.json @@ -1 +1 @@ -{"results": {"multirc": {"acc": 0.046169989506820566, "acc_stderr": 0.006801377886208738}}, "versions": {"multirc": 1}} +{"results": {"multirc": {"acc": 0.046169989506820566, "acc_stderr": 0.006801377886208738}}, "versions": {"multirc": 1}} \ No newline at end of file diff --git a/tests/testdata/mutual-v0-loglikelihood b/tests/testdata/mutual-v0-loglikelihood index 8d93380a2a..0022f466d2 100644 --- a/tests/testdata/mutual-v0-loglikelihood +++ b/tests/testdata/mutual-v0-loglikelihood @@ -1 +1 @@ -f759213a28f0412510bf1a24c9cab0dae64bdee902d42a26225295445e7779db +f759213a28f0412510bf1a24c9cab0dae64bdee902d42a26225295445e7779db \ No newline at end of file diff --git a/tests/testdata/mutual-v0-res.json b/tests/testdata/mutual-v0-res.json index aac1d3f4e2..2d240576b3 100644 --- a/tests/testdata/mutual-v0-res.json +++ b/tests/testdata/mutual-v0-res.json @@ -1 +1 @@ -{"results": {"mutual": {"mrr": 0.5023513920240772, "mrr_stderr": 0.009501864812936679, "r@1": 0.22573363431151242, "r@1_stderr": 0.014053085820407457, "r@2": 0.4221218961625282, "r@2_stderr": 0.016602191705517556}}, "versions": {"mutual": 0}} +{"results": {"mutual": {"mrr": 0.5023513920240772, "mrr_stderr": 0.009501864812936679, "r@1": 0.22573363431151242, "r@1_stderr": 0.014053085820407457, "r@2": 0.4221218961625282, "r@2_stderr": 0.016602191705517556}}, "versions": {"mutual": 0}} \ No newline at end of file diff --git a/tests/testdata/mutual-v1-loglikelihood b/tests/testdata/mutual-v1-loglikelihood index 8d93380a2a..0022f466d2 100644 --- a/tests/testdata/mutual-v1-loglikelihood +++ b/tests/testdata/mutual-v1-loglikelihood @@ -1 +1 @@ -f759213a28f0412510bf1a24c9cab0dae64bdee902d42a26225295445e7779db +f759213a28f0412510bf1a24c9cab0dae64bdee902d42a26225295445e7779db \ No newline at end of file diff --git a/tests/testdata/mutual-v1-res.json b/tests/testdata/mutual-v1-res.json index 4d680e5934..42e97c6f1a 100644 --- a/tests/testdata/mutual-v1-res.json +++ b/tests/testdata/mutual-v1-res.json @@ -1 +1 @@ -{"results": {"mutual": {"mrr": 0.5023513920240772, "mrr_stderr": 0.009501864812936679, "r@1": 0.22460496613995484, "r@1_stderr": 0.014028122493992806, "r@2": 0.4706546275395034, "r@2_stderr": 0.016778343895001414}}, "versions": {"mutual": 1}} +{"results": {"mutual": {"mrr": 0.5023513920240772, "mrr_stderr": 0.009501864812936679, "r@1": 0.22460496613995484, "r@1_stderr": 0.014028122493992806, "r@2": 0.4706546275395034, "r@2_stderr": 0.016778343895001414}}, "versions": {"mutual": 1}} \ No newline at end of file diff --git a/tests/testdata/mutual_plus-v0-loglikelihood b/tests/testdata/mutual_plus-v0-loglikelihood index 93c1f711fd..f4ba9d3731 100644 --- a/tests/testdata/mutual_plus-v0-loglikelihood +++ b/tests/testdata/mutual_plus-v0-loglikelihood @@ -1 +1 @@ -b846bb9db109535f59a93d1ce340cf09f68bdf4fed5b8decd168784220fe07fa +b846bb9db109535f59a93d1ce340cf09f68bdf4fed5b8decd168784220fe07fa \ No newline at end of file diff --git a/tests/testdata/mutual_plus-v0-res.json b/tests/testdata/mutual_plus-v0-res.json index 4de1605d42..9c03488263 100644 --- a/tests/testdata/mutual_plus-v0-res.json +++ b/tests/testdata/mutual_plus-v0-res.json @@ -1 +1 @@ -{"results": {"mutual_plus": {"mrr": 0.5275583145221953, "mrr_stderr": 0.009940894824430708, "r@1": 0.2595936794582393, "r@1_stderr": 0.014737047402750955, "r@2": 0.45372460496614, "r@2_stderr": 0.01673517854461967}}, "versions": {"mutual_plus": 0}} +{"results": {"mutual_plus": {"mrr": 0.5275583145221953, "mrr_stderr": 0.009940894824430708, "r@1": 0.2595936794582393, "r@1_stderr": 0.014737047402750955, "r@2": 0.45372460496614, "r@2_stderr": 0.01673517854461967}}, "versions": {"mutual_plus": 0}} \ No newline at end of file diff --git a/tests/testdata/mutual_plus-v1-loglikelihood b/tests/testdata/mutual_plus-v1-loglikelihood index 93c1f711fd..f4ba9d3731 100644 --- a/tests/testdata/mutual_plus-v1-loglikelihood +++ b/tests/testdata/mutual_plus-v1-loglikelihood @@ -1 +1 @@ -b846bb9db109535f59a93d1ce340cf09f68bdf4fed5b8decd168784220fe07fa +b846bb9db109535f59a93d1ce340cf09f68bdf4fed5b8decd168784220fe07fa \ No newline at end of file diff --git a/tests/testdata/mutual_plus-v1-res.json b/tests/testdata/mutual_plus-v1-res.json index c5261b8167..cdb6c85b65 100644 --- a/tests/testdata/mutual_plus-v1-res.json +++ b/tests/testdata/mutual_plus-v1-res.json @@ -1 +1 @@ -{"results": {"mutual_plus": {"mrr": 0.5275583145221953, "mrr_stderr": 0.009940894824430708, "r@1": 0.26297968397291194, "r@1_stderr": 0.01479889176605113, "r@2": 0.5, "r@2_stderr": 0.01680731613632036}}, "versions": {"mutual_plus": 1}} +{"results": {"mutual_plus": {"mrr": 0.5275583145221953, "mrr_stderr": 0.009940894824430708, "r@1": 0.26297968397291194, "r@1_stderr": 0.01479889176605113, "r@2": 0.5, "r@2_stderr": 0.01680731613632036}}, "versions": {"mutual_plus": 1}} \ No newline at end of file diff --git a/tests/testdata/openbookqa-v0-loglikelihood b/tests/testdata/openbookqa-v0-loglikelihood index bc70ae8322..b2cc5e9795 100644 --- a/tests/testdata/openbookqa-v0-loglikelihood +++ b/tests/testdata/openbookqa-v0-loglikelihood @@ -1 +1 @@ -78a49a0ca1a47373adb33463b1d092e6bc0d8f4b01bcb380ada48065037849d7 +78a49a0ca1a47373adb33463b1d092e6bc0d8f4b01bcb380ada48065037849d7 \ No newline at end of file diff --git a/tests/testdata/openbookqa-v0-res.json b/tests/testdata/openbookqa-v0-res.json index c7195c2448..04f4c25442 100644 --- a/tests/testdata/openbookqa-v0-res.json +++ b/tests/testdata/openbookqa-v0-res.json @@ -1 +1 @@ -{"results": {"openbookqa": {"acc": 0.214, "acc_norm": 0.276, "acc_norm_stderr": 0.020011219298073517, "acc_stderr": 0.018359797502387046}}, "versions": {"openbookqa": 0}} +{"results": {"openbookqa": {"acc": 0.214, "acc_norm": 0.276, "acc_norm_stderr": 0.020011219298073517, "acc_stderr": 0.018359797502387046}}, "versions": {"openbookqa": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_arxiv-v0-loglikelihood_rolling b/tests/testdata/pile_arxiv-v0-loglikelihood_rolling index 7f2cf1c523..3aa1d8c734 100644 --- a/tests/testdata/pile_arxiv-v0-loglikelihood_rolling +++ b/tests/testdata/pile_arxiv-v0-loglikelihood_rolling @@ -1 +1 @@ -814f9954e44368559602c00f7e85fa3971acdfd0315f508ec7df6318a79c55ec +814f9954e44368559602c00f7e85fa3971acdfd0315f508ec7df6318a79c55ec \ No newline at end of file diff --git a/tests/testdata/pile_arxiv-v0-res.json b/tests/testdata/pile_arxiv-v0-res.json index 5de44de446..d19d0c6fee 100644 --- a/tests/testdata/pile_arxiv-v0-res.json +++ b/tests/testdata/pile_arxiv-v0-res.json @@ -1 +1 @@ -{"results": {"pile_arxiv": {"bits_per_byte": 1.0750412350569374e-05, "byte_perplexity": 1.0000107504701365, "word_perplexity": 1.0000819333090385}}, "versions": {"pile_arxiv": 0}} +{"results": {"pile_arxiv": {"bits_per_byte": 1.0750412350569374e-05, "byte_perplexity": 1.0000107504701365, "word_perplexity": 1.0000819333090385}}, "versions": {"pile_arxiv": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_arxiv-v1-loglikelihood_rolling b/tests/testdata/pile_arxiv-v1-loglikelihood_rolling index 7f2cf1c523..3aa1d8c734 100644 --- a/tests/testdata/pile_arxiv-v1-loglikelihood_rolling +++ b/tests/testdata/pile_arxiv-v1-loglikelihood_rolling @@ -1 +1 @@ -814f9954e44368559602c00f7e85fa3971acdfd0315f508ec7df6318a79c55ec +814f9954e44368559602c00f7e85fa3971acdfd0315f508ec7df6318a79c55ec \ No newline at end of file diff --git a/tests/testdata/pile_arxiv-v1-res.json b/tests/testdata/pile_arxiv-v1-res.json index 649a9692b6..05cbab3873 100644 --- a/tests/testdata/pile_arxiv-v1-res.json +++ b/tests/testdata/pile_arxiv-v1-res.json @@ -1 +1 @@ -{"results": {"pile_arxiv": {"bits_per_byte": 1.55095665856779e-05, "byte_perplexity": 1.0000107504701365, "word_perplexity": 1.0000819333090385}}, "versions": {"pile_arxiv": 1}} +{"results": {"pile_arxiv": {"bits_per_byte": 1.55095665856779e-05, "byte_perplexity": 1.0000107504701365, "word_perplexity": 1.0000819333090385}}, "versions": {"pile_arxiv": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_bookcorpus2-v0-loglikelihood_rolling b/tests/testdata/pile_bookcorpus2-v0-loglikelihood_rolling index 5c93b16fdc..b37a91cc2d 100644 --- a/tests/testdata/pile_bookcorpus2-v0-loglikelihood_rolling +++ b/tests/testdata/pile_bookcorpus2-v0-loglikelihood_rolling @@ -1 +1 @@ -5c17ddfebeab8c41dabadb6fc216ceda91e3fe5dc95aaf1b2c843d7f11828b03 +5c17ddfebeab8c41dabadb6fc216ceda91e3fe5dc95aaf1b2c843d7f11828b03 \ No newline at end of file diff --git a/tests/testdata/pile_bookcorpus2-v0-res.json b/tests/testdata/pile_bookcorpus2-v0-res.json index f9791d82dc..698b03e8b3 100644 --- a/tests/testdata/pile_bookcorpus2-v0-res.json +++ b/tests/testdata/pile_bookcorpus2-v0-res.json @@ -1 +1 @@ -{"results": {"pile_bookcorpus2": {"bits_per_byte": 1.1631037706429144e-06, "byte_perplexity": 1.000001163104447, "word_perplexity": 1.0000066499426599}}, "versions": {"pile_bookcorpus2": 0}} +{"results": {"pile_bookcorpus2": {"bits_per_byte": 1.1631037706429144e-06, "byte_perplexity": 1.000001163104447, "word_perplexity": 1.0000066499426599}}, "versions": {"pile_bookcorpus2": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling b/tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling index 5c93b16fdc..b37a91cc2d 100644 --- a/tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling +++ b/tests/testdata/pile_bookcorpus2-v1-loglikelihood_rolling @@ -1 +1 @@ -5c17ddfebeab8c41dabadb6fc216ceda91e3fe5dc95aaf1b2c843d7f11828b03 +5c17ddfebeab8c41dabadb6fc216ceda91e3fe5dc95aaf1b2c843d7f11828b03 \ No newline at end of file diff --git a/tests/testdata/pile_bookcorpus2-v1-res.json b/tests/testdata/pile_bookcorpus2-v1-res.json index 58038eee33..967c14934b 100644 --- a/tests/testdata/pile_bookcorpus2-v1-res.json +++ b/tests/testdata/pile_bookcorpus2-v1-res.json @@ -1 +1 @@ -{"results": {"pile_bookcorpus2": {"bits_per_byte": 1.6780040419457868e-06, "byte_perplexity": 1.000001163104447, "word_perplexity": 1.0000066499426599}}, "versions": {"pile_bookcorpus2": 1}} +{"results": {"pile_bookcorpus2": {"bits_per_byte": 1.6780040419457868e-06, "byte_perplexity": 1.000001163104447, "word_perplexity": 1.0000066499426599}}, "versions": {"pile_bookcorpus2": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_books3-v0-loglikelihood_rolling b/tests/testdata/pile_books3-v0-loglikelihood_rolling index 91d876fce6..b483d3b45b 100644 --- a/tests/testdata/pile_books3-v0-loglikelihood_rolling +++ b/tests/testdata/pile_books3-v0-loglikelihood_rolling @@ -1 +1 @@ -0f8f36f705b999b6d55fa72ff89a82793dd1cb568ab1f8727a6a2086a12b9410 +0f8f36f705b999b6d55fa72ff89a82793dd1cb568ab1f8727a6a2086a12b9410 \ No newline at end of file diff --git a/tests/testdata/pile_books3-v0-res.json b/tests/testdata/pile_books3-v0-res.json index ea7459d072..df19cd0a18 100644 --- a/tests/testdata/pile_books3-v0-res.json +++ b/tests/testdata/pile_books3-v0-res.json @@ -1 +1 @@ -{"results": {"pile_books3": {"bits_per_byte": 8.942486206275221e-07, "byte_perplexity": 1.0000008942490204, "word_perplexity": 1.0000052870063607}}, "versions": {"pile_books3": 0}} +{"results": {"pile_books3": {"bits_per_byte": 8.942486206275221e-07, "byte_perplexity": 1.0000008942490204, "word_perplexity": 1.0000052870063607}}, "versions": {"pile_books3": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_books3-v1-loglikelihood_rolling b/tests/testdata/pile_books3-v1-loglikelihood_rolling index 91d876fce6..b483d3b45b 100644 --- a/tests/testdata/pile_books3-v1-loglikelihood_rolling +++ b/tests/testdata/pile_books3-v1-loglikelihood_rolling @@ -1 +1 @@ -0f8f36f705b999b6d55fa72ff89a82793dd1cb568ab1f8727a6a2086a12b9410 +0f8f36f705b999b6d55fa72ff89a82793dd1cb568ab1f8727a6a2086a12b9410 \ No newline at end of file diff --git a/tests/testdata/pile_books3-v1-res.json b/tests/testdata/pile_books3-v1-res.json index 75e90bf524..6ff7a51711 100644 --- a/tests/testdata/pile_books3-v1-res.json +++ b/tests/testdata/pile_books3-v1-res.json @@ -1 +1 @@ -{"results": {"pile_books3": {"bits_per_byte": 1.2901280503011222e-06, "byte_perplexity": 1.0000008942490204, "word_perplexity": 1.0000052870063607}}, "versions": {"pile_books3": 1}} +{"results": {"pile_books3": {"bits_per_byte": 1.2901280503011222e-06, "byte_perplexity": 1.0000008942490204, "word_perplexity": 1.0000052870063607}}, "versions": {"pile_books3": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_dm-mathematics-v0-loglikelihood_rolling b/tests/testdata/pile_dm-mathematics-v0-loglikelihood_rolling index 728aed27d3..2fb27786c5 100644 --- a/tests/testdata/pile_dm-mathematics-v0-loglikelihood_rolling +++ b/tests/testdata/pile_dm-mathematics-v0-loglikelihood_rolling @@ -1 +1 @@ -d5b7967c0ece8b816f3921a8bd0fad23365349e935b491595e2ad1135af42da6 +d5b7967c0ece8b816f3921a8bd0fad23365349e935b491595e2ad1135af42da6 \ No newline at end of file diff --git a/tests/testdata/pile_dm-mathematics-v0-res.json b/tests/testdata/pile_dm-mathematics-v0-res.json index 86fc412583..860aa06c97 100644 --- a/tests/testdata/pile_dm-mathematics-v0-res.json +++ b/tests/testdata/pile_dm-mathematics-v0-res.json @@ -1 +1 @@ -{"results": {"pile_dm-mathematics": {"bits_per_byte": 6.176600873627999e-05, "byte_perplexity": 1.0000617679162955, "word_perplexity": 1.0002875035042451}}, "versions": {"pile_dm-mathematics": 0}} +{"results": {"pile_dm-mathematics": {"bits_per_byte": 6.176600873627999e-05, "byte_perplexity": 1.0000617679162955, "word_perplexity": 1.0002875035042451}}, "versions": {"pile_dm-mathematics": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_dm-mathematics-v1-loglikelihood_rolling b/tests/testdata/pile_dm-mathematics-v1-loglikelihood_rolling index 728aed27d3..2fb27786c5 100644 --- a/tests/testdata/pile_dm-mathematics-v1-loglikelihood_rolling +++ b/tests/testdata/pile_dm-mathematics-v1-loglikelihood_rolling @@ -1 +1 @@ -d5b7967c0ece8b816f3921a8bd0fad23365349e935b491595e2ad1135af42da6 +d5b7967c0ece8b816f3921a8bd0fad23365349e935b491595e2ad1135af42da6 \ No newline at end of file diff --git a/tests/testdata/pile_dm-mathematics-v1-res.json b/tests/testdata/pile_dm-mathematics-v1-res.json index e4c44507cf..192e9066a4 100644 --- a/tests/testdata/pile_dm-mathematics-v1-res.json +++ b/tests/testdata/pile_dm-mathematics-v1-res.json @@ -1 +1 @@ -{"results": {"pile_dm-mathematics": {"bits_per_byte": 8.910951449933553e-05, "byte_perplexity": 1.0000617679162955, "word_perplexity": 1.0002875035042451}}, "versions": {"pile_dm-mathematics": 1}} +{"results": {"pile_dm-mathematics": {"bits_per_byte": 8.910951449933553e-05, "byte_perplexity": 1.0000617679162955, "word_perplexity": 1.0002875035042451}}, "versions": {"pile_dm-mathematics": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_enron-v0-loglikelihood_rolling b/tests/testdata/pile_enron-v0-loglikelihood_rolling index 374580107d..57dbe76460 100644 --- a/tests/testdata/pile_enron-v0-loglikelihood_rolling +++ b/tests/testdata/pile_enron-v0-loglikelihood_rolling @@ -1 +1 @@ -4baa6ccdc9e3aa9921675ab4400d5e89d7b546b844a8ea28f6461d649066418a +4baa6ccdc9e3aa9921675ab4400d5e89d7b546b844a8ea28f6461d649066418a \ No newline at end of file diff --git a/tests/testdata/pile_enron-v0-res.json b/tests/testdata/pile_enron-v0-res.json index c26a9322a5..a4a49493d5 100644 --- a/tests/testdata/pile_enron-v0-res.json +++ b/tests/testdata/pile_enron-v0-res.json @@ -1 +1 @@ -{"results": {"pile_enron": {"bits_per_byte": 0.0003163902828673244, "byte_perplexity": 1.000316440339552, "word_perplexity": 1.00224668051869}}, "versions": {"pile_enron": 0}} +{"results": {"pile_enron": {"bits_per_byte": 0.0003163902828673244, "byte_perplexity": 1.000316440339552, "word_perplexity": 1.00224668051869}}, "versions": {"pile_enron": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_enron-v1-loglikelihood_rolling b/tests/testdata/pile_enron-v1-loglikelihood_rolling index 374580107d..57dbe76460 100644 --- a/tests/testdata/pile_enron-v1-loglikelihood_rolling +++ b/tests/testdata/pile_enron-v1-loglikelihood_rolling @@ -1 +1 @@ -4baa6ccdc9e3aa9921675ab4400d5e89d7b546b844a8ea28f6461d649066418a +4baa6ccdc9e3aa9921675ab4400d5e89d7b546b844a8ea28f6461d649066418a \ No newline at end of file diff --git a/tests/testdata/pile_enron-v1-res.json b/tests/testdata/pile_enron-v1-res.json index 257e3d0b06..abe7b45f9a 100644 --- a/tests/testdata/pile_enron-v1-res.json +++ b/tests/testdata/pile_enron-v1-res.json @@ -1 +1 @@ -{"results": {"pile_enron": {"bits_per_byte": 0.0004564546920781453, "byte_perplexity": 1.000316440339552, "word_perplexity": 1.00224668051869}}, "versions": {"pile_enron": 1}} +{"results": {"pile_enron": {"bits_per_byte": 0.0004564546920781453, "byte_perplexity": 1.000316440339552, "word_perplexity": 1.00224668051869}}, "versions": {"pile_enron": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_europarl-v0-loglikelihood_rolling b/tests/testdata/pile_europarl-v0-loglikelihood_rolling index beb1f6fbed..8027260755 100644 --- a/tests/testdata/pile_europarl-v0-loglikelihood_rolling +++ b/tests/testdata/pile_europarl-v0-loglikelihood_rolling @@ -1 +1 @@ -e67d3dbccd47d308bfc5b0e66b76d0dfc5e386ebfa94e056562c2281c395543f +e67d3dbccd47d308bfc5b0e66b76d0dfc5e386ebfa94e056562c2281c395543f \ No newline at end of file diff --git a/tests/testdata/pile_europarl-v0-res.json b/tests/testdata/pile_europarl-v0-res.json index d6fd7d406e..4c53edd2ce 100644 --- a/tests/testdata/pile_europarl-v0-res.json +++ b/tests/testdata/pile_europarl-v0-res.json @@ -1 +1 @@ -{"results": {"pile_europarl": {"bits_per_byte": 8.648858203555344e-06, "byte_perplexity": 1.000008648895605, "word_perplexity": 1.000063506523818}}, "versions": {"pile_europarl": 0}} +{"results": {"pile_europarl": {"bits_per_byte": 8.648858203555344e-06, "byte_perplexity": 1.000008648895605, "word_perplexity": 1.000063506523818}}, "versions": {"pile_europarl": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_europarl-v1-loglikelihood_rolling b/tests/testdata/pile_europarl-v1-loglikelihood_rolling index beb1f6fbed..8027260755 100644 --- a/tests/testdata/pile_europarl-v1-loglikelihood_rolling +++ b/tests/testdata/pile_europarl-v1-loglikelihood_rolling @@ -1 +1 @@ -e67d3dbccd47d308bfc5b0e66b76d0dfc5e386ebfa94e056562c2281c395543f +e67d3dbccd47d308bfc5b0e66b76d0dfc5e386ebfa94e056562c2281c395543f \ No newline at end of file diff --git a/tests/testdata/pile_europarl-v1-res.json b/tests/testdata/pile_europarl-v1-res.json index d9c45675a3..b948f0d369 100644 --- a/tests/testdata/pile_europarl-v1-res.json +++ b/tests/testdata/pile_europarl-v1-res.json @@ -1 +1 @@ -{"results": {"pile_europarl": {"bits_per_byte": 1.2477664839621123e-05, "byte_perplexity": 1.000008648895605, "word_perplexity": 1.000063506523818}}, "versions": {"pile_europarl": 1}} +{"results": {"pile_europarl": {"bits_per_byte": 1.2477664839621123e-05, "byte_perplexity": 1.000008648895605, "word_perplexity": 1.000063506523818}}, "versions": {"pile_europarl": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_freelaw-v0-loglikelihood_rolling b/tests/testdata/pile_freelaw-v0-loglikelihood_rolling index c8e1cd2e38..7b5771f491 100644 --- a/tests/testdata/pile_freelaw-v0-loglikelihood_rolling +++ b/tests/testdata/pile_freelaw-v0-loglikelihood_rolling @@ -1 +1 @@ -d77f3f68aadd6cbf1290c2f6737b2ed5d5c2a60e4c81a65c280f207783caabe1 +d77f3f68aadd6cbf1290c2f6737b2ed5d5c2a60e4c81a65c280f207783caabe1 \ No newline at end of file diff --git a/tests/testdata/pile_freelaw-v0-res.json b/tests/testdata/pile_freelaw-v0-res.json index cff04889f9..0bda41ffb3 100644 --- a/tests/testdata/pile_freelaw-v0-res.json +++ b/tests/testdata/pile_freelaw-v0-res.json @@ -1 +1 @@ -{"results": {"pile_freelaw": {"bits_per_byte": 3.16238943008513e-05, "byte_perplexity": 1.0000316243943415, "word_perplexity": 1.000203169094218}}, "versions": {"pile_freelaw": 0}} +{"results": {"pile_freelaw": {"bits_per_byte": 3.16238943008513e-05, "byte_perplexity": 1.0000316243943415, "word_perplexity": 1.000203169094218}}, "versions": {"pile_freelaw": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_freelaw-v1-loglikelihood_rolling b/tests/testdata/pile_freelaw-v1-loglikelihood_rolling index c8e1cd2e38..7b5771f491 100644 --- a/tests/testdata/pile_freelaw-v1-loglikelihood_rolling +++ b/tests/testdata/pile_freelaw-v1-loglikelihood_rolling @@ -1 +1 @@ -d77f3f68aadd6cbf1290c2f6737b2ed5d5c2a60e4c81a65c280f207783caabe1 +d77f3f68aadd6cbf1290c2f6737b2ed5d5c2a60e4c81a65c280f207783caabe1 \ No newline at end of file diff --git a/tests/testdata/pile_freelaw-v1-res.json b/tests/testdata/pile_freelaw-v1-res.json index e4831db8ad..dd0e0bac36 100644 --- a/tests/testdata/pile_freelaw-v1-res.json +++ b/tests/testdata/pile_freelaw-v1-res.json @@ -1 +1 @@ -{"results": {"pile_freelaw": {"bits_per_byte": 4.5623635481434923e-05, "byte_perplexity": 1.0000316243943415, "word_perplexity": 1.000203169094218}}, "versions": {"pile_freelaw": 1}} +{"results": {"pile_freelaw": {"bits_per_byte": 4.5623635481434923e-05, "byte_perplexity": 1.0000316243943415, "word_perplexity": 1.000203169094218}}, "versions": {"pile_freelaw": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_github-v0-loglikelihood_rolling b/tests/testdata/pile_github-v0-loglikelihood_rolling index 98e197e1e6..cf8251e4f6 100644 --- a/tests/testdata/pile_github-v0-loglikelihood_rolling +++ b/tests/testdata/pile_github-v0-loglikelihood_rolling @@ -1 +1 @@ -df384c3df3d8f53273e97127c5bb84c17e638acad7d6bc9c91f6dee96d43b639 +df384c3df3d8f53273e97127c5bb84c17e638acad7d6bc9c91f6dee96d43b639 \ No newline at end of file diff --git a/tests/testdata/pile_github-v0-res.json b/tests/testdata/pile_github-v0-res.json index 8f999e5d28..bdabf39969 100644 --- a/tests/testdata/pile_github-v0-res.json +++ b/tests/testdata/pile_github-v0-res.json @@ -1 +1 @@ -{"results": {"pile_github": {"bits_per_byte": 9.540627613754646e-05, "byte_perplexity": 1.0000954108274611, "word_perplexity": 1.0009643183931227}}, "versions": {"pile_github": 0}} +{"results": {"pile_github": {"bits_per_byte": 9.540627613754646e-05, "byte_perplexity": 1.0000954108274611, "word_perplexity": 1.0009643183931227}}, "versions": {"pile_github": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_github-v1-loglikelihood_rolling b/tests/testdata/pile_github-v1-loglikelihood_rolling index 98e197e1e6..cf8251e4f6 100644 --- a/tests/testdata/pile_github-v1-loglikelihood_rolling +++ b/tests/testdata/pile_github-v1-loglikelihood_rolling @@ -1 +1 @@ -df384c3df3d8f53273e97127c5bb84c17e638acad7d6bc9c91f6dee96d43b639 +df384c3df3d8f53273e97127c5bb84c17e638acad7d6bc9c91f6dee96d43b639 \ No newline at end of file diff --git a/tests/testdata/pile_github-v1-res.json b/tests/testdata/pile_github-v1-res.json index 4835ab2df7..cc06a45501 100644 --- a/tests/testdata/pile_github-v1-res.json +++ b/tests/testdata/pile_github-v1-res.json @@ -1 +1 @@ -{"results": {"pile_github": {"bits_per_byte": 0.00013764216145332133, "byte_perplexity": 1.0000954108274611, "word_perplexity": 1.0009643183931227}}, "versions": {"pile_github": 1}} +{"results": {"pile_github": {"bits_per_byte": 0.00013764216145332133, "byte_perplexity": 1.0000954108274611, "word_perplexity": 1.0009643183931227}}, "versions": {"pile_github": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_gutenberg-v0-loglikelihood_rolling b/tests/testdata/pile_gutenberg-v0-loglikelihood_rolling index 73017a7f73..bd7b15927f 100644 --- a/tests/testdata/pile_gutenberg-v0-loglikelihood_rolling +++ b/tests/testdata/pile_gutenberg-v0-loglikelihood_rolling @@ -1 +1 @@ -02a559f74a9105145e7d4d9c5ddea372b5b4938f5368dc8ffafc39cbe3b4c7ef +02a559f74a9105145e7d4d9c5ddea372b5b4938f5368dc8ffafc39cbe3b4c7ef \ No newline at end of file diff --git a/tests/testdata/pile_gutenberg-v0-res.json b/tests/testdata/pile_gutenberg-v0-res.json index f5b866ba76..757ef06f79 100644 --- a/tests/testdata/pile_gutenberg-v0-res.json +++ b/tests/testdata/pile_gutenberg-v0-res.json @@ -1 +1 @@ -{"results": {"pile_gutenberg": {"bits_per_byte": 1.2443606332351536e-06, "byte_perplexity": 1.0000012443614075, "word_perplexity": 1.0000072174665404}}, "versions": {"pile_gutenberg": 0}} +{"results": {"pile_gutenberg": {"bits_per_byte": 1.2443606332351536e-06, "byte_perplexity": 1.0000012443614075, "word_perplexity": 1.0000072174665404}}, "versions": {"pile_gutenberg": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_gutenberg-v1-loglikelihood_rolling b/tests/testdata/pile_gutenberg-v1-loglikelihood_rolling index 73017a7f73..bd7b15927f 100644 --- a/tests/testdata/pile_gutenberg-v1-loglikelihood_rolling +++ b/tests/testdata/pile_gutenberg-v1-loglikelihood_rolling @@ -1 +1 @@ -02a559f74a9105145e7d4d9c5ddea372b5b4938f5368dc8ffafc39cbe3b4c7ef +02a559f74a9105145e7d4d9c5ddea372b5b4938f5368dc8ffafc39cbe3b4c7ef \ No newline at end of file diff --git a/tests/testdata/pile_gutenberg-v1-res.json b/tests/testdata/pile_gutenberg-v1-res.json index 92add0a29b..6d22ed3ff5 100644 --- a/tests/testdata/pile_gutenberg-v1-res.json +++ b/tests/testdata/pile_gutenberg-v1-res.json @@ -1 +1 @@ -{"results": {"pile_gutenberg": {"bits_per_byte": 1.7952329146458065e-06, "byte_perplexity": 1.0000012443614075, "word_perplexity": 1.0000072174665404}}, "versions": {"pile_gutenberg": 1}} +{"results": {"pile_gutenberg": {"bits_per_byte": 1.7952329146458065e-06, "byte_perplexity": 1.0000012443614075, "word_perplexity": 1.0000072174665404}}, "versions": {"pile_gutenberg": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_hackernews-v0-loglikelihood_rolling b/tests/testdata/pile_hackernews-v0-loglikelihood_rolling index 9298a46425..48b767bfe7 100644 --- a/tests/testdata/pile_hackernews-v0-loglikelihood_rolling +++ b/tests/testdata/pile_hackernews-v0-loglikelihood_rolling @@ -1 +1 @@ -ec1082ee5a5326e0d57aa4e73b634937140c1de9af95f154e8ab57b05d9b422b +ec1082ee5a5326e0d57aa4e73b634937140c1de9af95f154e8ab57b05d9b422b \ No newline at end of file diff --git a/tests/testdata/pile_hackernews-v0-res.json b/tests/testdata/pile_hackernews-v0-res.json index e2deadbd78..68578fe4c9 100644 --- a/tests/testdata/pile_hackernews-v0-res.json +++ b/tests/testdata/pile_hackernews-v0-res.json @@ -1 +1 @@ -{"results": {"pile_hackernews": {"bits_per_byte": 0.00010170276359193358, "byte_perplexity": 1.0001017079354932, "word_perplexity": 1.0006273924348839}}, "versions": {"pile_hackernews": 0}} +{"results": {"pile_hackernews": {"bits_per_byte": 0.00010170276359193358, "byte_perplexity": 1.0001017079354932, "word_perplexity": 1.0006273924348839}}, "versions": {"pile_hackernews": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_hackernews-v1-loglikelihood_rolling b/tests/testdata/pile_hackernews-v1-loglikelihood_rolling index 9298a46425..48b767bfe7 100644 --- a/tests/testdata/pile_hackernews-v1-loglikelihood_rolling +++ b/tests/testdata/pile_hackernews-v1-loglikelihood_rolling @@ -1 +1 @@ -ec1082ee5a5326e0d57aa4e73b634937140c1de9af95f154e8ab57b05d9b422b +ec1082ee5a5326e0d57aa4e73b634937140c1de9af95f154e8ab57b05d9b422b \ No newline at end of file diff --git a/tests/testdata/pile_hackernews-v1-res.json b/tests/testdata/pile_hackernews-v1-res.json index 46aeb3a266..ea135278b7 100644 --- a/tests/testdata/pile_hackernews-v1-res.json +++ b/tests/testdata/pile_hackernews-v1-res.json @@ -1 +1 @@ -{"results": {"pile_hackernews": {"bits_per_byte": 0.00014672607267878518, "byte_perplexity": 1.0001017079354932, "word_perplexity": 1.0006273924348839}}, "versions": {"pile_hackernews": 1}} +{"results": {"pile_hackernews": {"bits_per_byte": 0.00014672607267878518, "byte_perplexity": 1.0001017079354932, "word_perplexity": 1.0006273924348839}}, "versions": {"pile_hackernews": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_nih-exporter-v0-loglikelihood_rolling b/tests/testdata/pile_nih-exporter-v0-loglikelihood_rolling index ffc7508b50..5f76588a81 100644 --- a/tests/testdata/pile_nih-exporter-v0-loglikelihood_rolling +++ b/tests/testdata/pile_nih-exporter-v0-loglikelihood_rolling @@ -1 +1 @@ -520ea6e04e8a39dc0b5f63a837429a78a40e63d39d109096101feb8c5b2cf8d8 +520ea6e04e8a39dc0b5f63a837429a78a40e63d39d109096101feb8c5b2cf8d8 \ No newline at end of file diff --git a/tests/testdata/pile_nih-exporter-v0-res.json b/tests/testdata/pile_nih-exporter-v0-res.json index 66312fe60b..1c7bb56c6d 100644 --- a/tests/testdata/pile_nih-exporter-v0-res.json +++ b/tests/testdata/pile_nih-exporter-v0-res.json @@ -1 +1 @@ -{"results": {"pile_nih-exporter": {"bits_per_byte": 0.00024394433346975716, "byte_perplexity": 1.0002439740903082, "word_perplexity": 1.0016712202288802}}, "versions": {"pile_nih-exporter": 0}} +{"results": {"pile_nih-exporter": {"bits_per_byte": 0.00024394433346975716, "byte_perplexity": 1.0002439740903082, "word_perplexity": 1.0016712202288802}}, "versions": {"pile_nih-exporter": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_nih-exporter-v1-loglikelihood_rolling b/tests/testdata/pile_nih-exporter-v1-loglikelihood_rolling index ffc7508b50..5f76588a81 100644 --- a/tests/testdata/pile_nih-exporter-v1-loglikelihood_rolling +++ b/tests/testdata/pile_nih-exporter-v1-loglikelihood_rolling @@ -1 +1 @@ -520ea6e04e8a39dc0b5f63a837429a78a40e63d39d109096101feb8c5b2cf8d8 +520ea6e04e8a39dc0b5f63a837429a78a40e63d39d109096101feb8c5b2cf8d8 \ No newline at end of file diff --git a/tests/testdata/pile_nih-exporter-v1-res.json b/tests/testdata/pile_nih-exporter-v1-res.json index f7135da8d7..0e40fc8268 100644 --- a/tests/testdata/pile_nih-exporter-v1-res.json +++ b/tests/testdata/pile_nih-exporter-v1-res.json @@ -1 +1 @@ -{"results": {"pile_nih-exporter": {"bits_per_byte": 0.00035193728014978225, "byte_perplexity": 1.0002439740903082, "word_perplexity": 1.0016712202288802}}, "versions": {"pile_nih-exporter": 1}} +{"results": {"pile_nih-exporter": {"bits_per_byte": 0.00035193728014978225, "byte_perplexity": 1.0002439740903082, "word_perplexity": 1.0016712202288802}}, "versions": {"pile_nih-exporter": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_opensubtitles-v0-loglikelihood_rolling b/tests/testdata/pile_opensubtitles-v0-loglikelihood_rolling index 059c1ee54a..47805d3b5f 100644 --- a/tests/testdata/pile_opensubtitles-v0-loglikelihood_rolling +++ b/tests/testdata/pile_opensubtitles-v0-loglikelihood_rolling @@ -1 +1 @@ -0f1c23a1f4ddec0c2b1ff34de8d1505b0eb9e2868d8edbcc1b6de13d02f32036 +0f1c23a1f4ddec0c2b1ff34de8d1505b0eb9e2868d8edbcc1b6de13d02f32036 \ No newline at end of file diff --git a/tests/testdata/pile_opensubtitles-v0-res.json b/tests/testdata/pile_opensubtitles-v0-res.json index 27d85cd66c..f718e515ba 100644 --- a/tests/testdata/pile_opensubtitles-v0-res.json +++ b/tests/testdata/pile_opensubtitles-v0-res.json @@ -1 +1 @@ -{"results": {"pile_opensubtitles": {"bits_per_byte": 1.5213441136639177e-05, "byte_perplexity": 1.0000152135568616, "word_perplexity": 1.0000856162053249}}, "versions": {"pile_opensubtitles": 0}} +{"results": {"pile_opensubtitles": {"bits_per_byte": 1.5213441136639177e-05, "byte_perplexity": 1.0000152135568616, "word_perplexity": 1.0000856162053249}}, "versions": {"pile_opensubtitles": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_opensubtitles-v1-loglikelihood_rolling b/tests/testdata/pile_opensubtitles-v1-loglikelihood_rolling index 059c1ee54a..47805d3b5f 100644 --- a/tests/testdata/pile_opensubtitles-v1-loglikelihood_rolling +++ b/tests/testdata/pile_opensubtitles-v1-loglikelihood_rolling @@ -1 +1 @@ -0f1c23a1f4ddec0c2b1ff34de8d1505b0eb9e2868d8edbcc1b6de13d02f32036 +0f1c23a1f4ddec0c2b1ff34de8d1505b0eb9e2868d8edbcc1b6de13d02f32036 \ No newline at end of file diff --git a/tests/testdata/pile_opensubtitles-v1-res.json b/tests/testdata/pile_opensubtitles-v1-res.json index 16f0881b8e..1468294732 100644 --- a/tests/testdata/pile_opensubtitles-v1-res.json +++ b/tests/testdata/pile_opensubtitles-v1-res.json @@ -1 +1 @@ -{"results": {"pile_opensubtitles": {"bits_per_byte": 2.1948356082685497e-05, "byte_perplexity": 1.0000152135568616, "word_perplexity": 1.0000856162053249}}, "versions": {"pile_opensubtitles": 1}} +{"results": {"pile_opensubtitles": {"bits_per_byte": 2.1948356082685497e-05, "byte_perplexity": 1.0000152135568616, "word_perplexity": 1.0000856162053249}}, "versions": {"pile_opensubtitles": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_openwebtext2-v0-loglikelihood_rolling b/tests/testdata/pile_openwebtext2-v0-loglikelihood_rolling index 8b66d7a70f..22046e4405 100644 --- a/tests/testdata/pile_openwebtext2-v0-loglikelihood_rolling +++ b/tests/testdata/pile_openwebtext2-v0-loglikelihood_rolling @@ -1 +1 @@ -5d6c19665f429ab1ccbe027da67f42bdaf219f819ab093673976eee55e015ff4 +5d6c19665f429ab1ccbe027da67f42bdaf219f819ab093673976eee55e015ff4 \ No newline at end of file diff --git a/tests/testdata/pile_openwebtext2-v0-res.json b/tests/testdata/pile_openwebtext2-v0-res.json index 187af3c076..ead8d0b0bf 100644 --- a/tests/testdata/pile_openwebtext2-v0-res.json +++ b/tests/testdata/pile_openwebtext2-v0-res.json @@ -1 +1 @@ -{"results": {"pile_openwebtext2": {"bits_per_byte": 0.00012809520662477846, "byte_perplexity": 1.000128103411166, "word_perplexity": 1.0007951516532847}}, "versions": {"pile_openwebtext2": 0}} +{"results": {"pile_openwebtext2": {"bits_per_byte": 0.00012809520662477846, "byte_perplexity": 1.000128103411166, "word_perplexity": 1.0007951516532847}}, "versions": {"pile_openwebtext2": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_openwebtext2-v1-loglikelihood_rolling b/tests/testdata/pile_openwebtext2-v1-loglikelihood_rolling index 8b66d7a70f..22046e4405 100644 --- a/tests/testdata/pile_openwebtext2-v1-loglikelihood_rolling +++ b/tests/testdata/pile_openwebtext2-v1-loglikelihood_rolling @@ -1 +1 @@ -5d6c19665f429ab1ccbe027da67f42bdaf219f819ab093673976eee55e015ff4 +5d6c19665f429ab1ccbe027da67f42bdaf219f819ab093673976eee55e015ff4 \ No newline at end of file diff --git a/tests/testdata/pile_openwebtext2-v1-res.json b/tests/testdata/pile_openwebtext2-v1-res.json index 5718273e0a..ca433e3c85 100644 --- a/tests/testdata/pile_openwebtext2-v1-res.json +++ b/tests/testdata/pile_openwebtext2-v1-res.json @@ -1 +1 @@ -{"results": {"pile_openwebtext2": {"bits_per_byte": 0.000184802319359215, "byte_perplexity": 1.000128103411166, "word_perplexity": 1.0007951516532847}}, "versions": {"pile_openwebtext2": 1}} +{"results": {"pile_openwebtext2": {"bits_per_byte": 0.000184802319359215, "byte_perplexity": 1.000128103411166, "word_perplexity": 1.0007951516532847}}, "versions": {"pile_openwebtext2": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_philpapers-v0-loglikelihood_rolling b/tests/testdata/pile_philpapers-v0-loglikelihood_rolling index 5719d858e2..4fbbc241ba 100644 --- a/tests/testdata/pile_philpapers-v0-loglikelihood_rolling +++ b/tests/testdata/pile_philpapers-v0-loglikelihood_rolling @@ -1 +1 @@ -339ba5d8c044c4a3ff9b9a8eaa24da1d6c01b72972074eb671a7da049eeb7047 +339ba5d8c044c4a3ff9b9a8eaa24da1d6c01b72972074eb671a7da049eeb7047 \ No newline at end of file diff --git a/tests/testdata/pile_philpapers-v0-res.json b/tests/testdata/pile_philpapers-v0-res.json index 31c6775002..be561fe2f8 100644 --- a/tests/testdata/pile_philpapers-v0-res.json +++ b/tests/testdata/pile_philpapers-v0-res.json @@ -1 +1 @@ -{"results": {"pile_philpapers": {"bits_per_byte": 6.241575895982095e-06, "byte_perplexity": 1.0000062415953748, "word_perplexity": 1.0000409888564146}}, "versions": {"pile_philpapers": 0}} +{"results": {"pile_philpapers": {"bits_per_byte": 6.241575895982095e-06, "byte_perplexity": 1.0000062415953748, "word_perplexity": 1.0000409888564146}}, "versions": {"pile_philpapers": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_philpapers-v1-loglikelihood_rolling b/tests/testdata/pile_philpapers-v1-loglikelihood_rolling index 5719d858e2..4fbbc241ba 100644 --- a/tests/testdata/pile_philpapers-v1-loglikelihood_rolling +++ b/tests/testdata/pile_philpapers-v1-loglikelihood_rolling @@ -1 +1 @@ -339ba5d8c044c4a3ff9b9a8eaa24da1d6c01b72972074eb671a7da049eeb7047 +339ba5d8c044c4a3ff9b9a8eaa24da1d6c01b72972074eb671a7da049eeb7047 \ No newline at end of file diff --git a/tests/testdata/pile_philpapers-v1-res.json b/tests/testdata/pile_philpapers-v1-res.json index c4ae2664ff..5a2f77678a 100644 --- a/tests/testdata/pile_philpapers-v1-res.json +++ b/tests/testdata/pile_philpapers-v1-res.json @@ -1 +1 @@ -{"results": {"pile_philpapers": {"bits_per_byte": 9.004690592465457e-06, "byte_perplexity": 1.0000062415953748, "word_perplexity": 1.0000409888564146}}, "versions": {"pile_philpapers": 1}} +{"results": {"pile_philpapers": {"bits_per_byte": 9.004690592465457e-06, "byte_perplexity": 1.0000062415953748, "word_perplexity": 1.0000409888564146}}, "versions": {"pile_philpapers": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_pile-cc-v0-loglikelihood_rolling b/tests/testdata/pile_pile-cc-v0-loglikelihood_rolling index 13ed12a480..d5369ed3c9 100644 --- a/tests/testdata/pile_pile-cc-v0-loglikelihood_rolling +++ b/tests/testdata/pile_pile-cc-v0-loglikelihood_rolling @@ -1 +1 @@ -731fdef4a43949b179ba0c540148ebc2fa41583dd583ef580dd812076c66a451 +731fdef4a43949b179ba0c540148ebc2fa41583dd583ef580dd812076c66a451 \ No newline at end of file diff --git a/tests/testdata/pile_pile-cc-v0-res.json b/tests/testdata/pile_pile-cc-v0-res.json index b115ee6e40..383233f259 100644 --- a/tests/testdata/pile_pile-cc-v0-res.json +++ b/tests/testdata/pile_pile-cc-v0-res.json @@ -1 +1 @@ -{"results": {"pile_pile-cc": {"bits_per_byte": 0.00011234131907228174, "byte_perplexity": 1.0001123476295946, "word_perplexity": 1.0006738958554477}}, "versions": {"pile_pile-cc": 0}} +{"results": {"pile_pile-cc": {"bits_per_byte": 0.00011234131907228174, "byte_perplexity": 1.0001123476295946, "word_perplexity": 1.0006738958554477}}, "versions": {"pile_pile-cc": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_pile-cc-v1-loglikelihood_rolling b/tests/testdata/pile_pile-cc-v1-loglikelihood_rolling index 13ed12a480..d5369ed3c9 100644 --- a/tests/testdata/pile_pile-cc-v1-loglikelihood_rolling +++ b/tests/testdata/pile_pile-cc-v1-loglikelihood_rolling @@ -1 +1 @@ -731fdef4a43949b179ba0c540148ebc2fa41583dd583ef580dd812076c66a451 +731fdef4a43949b179ba0c540148ebc2fa41583dd583ef580dd812076c66a451 \ No newline at end of file diff --git a/tests/testdata/pile_pile-cc-v1-res.json b/tests/testdata/pile_pile-cc-v1-res.json index c14dcba1c2..bd2772e32a 100644 --- a/tests/testdata/pile_pile-cc-v1-res.json +++ b/tests/testdata/pile_pile-cc-v1-res.json @@ -1 +1 @@ -{"results": {"pile_pile-cc": {"bits_per_byte": 0.0001620742639125056, "byte_perplexity": 1.0001123476295946, "word_perplexity": 1.0006738958554477}}, "versions": {"pile_pile-cc": 1}} +{"results": {"pile_pile-cc": {"bits_per_byte": 0.0001620742639125056, "byte_perplexity": 1.0001123476295946, "word_perplexity": 1.0006738958554477}}, "versions": {"pile_pile-cc": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_pubmed-abstracts-v0-loglikelihood_rolling b/tests/testdata/pile_pubmed-abstracts-v0-loglikelihood_rolling index 61e6ef3e2e..de5660d60a 100644 --- a/tests/testdata/pile_pubmed-abstracts-v0-loglikelihood_rolling +++ b/tests/testdata/pile_pubmed-abstracts-v0-loglikelihood_rolling @@ -1 +1 @@ -66436569a43163afb2caf422d32c5f329899e74c49865d4d13881fd465fd9976 +66436569a43163afb2caf422d32c5f329899e74c49865d4d13881fd465fd9976 \ No newline at end of file diff --git a/tests/testdata/pile_pubmed-abstracts-v0-res.json b/tests/testdata/pile_pubmed-abstracts-v0-res.json index 9b13a860a4..333c2970fa 100644 --- a/tests/testdata/pile_pubmed-abstracts-v0-res.json +++ b/tests/testdata/pile_pubmed-abstracts-v0-res.json @@ -1 +1 @@ -{"results": {"pile_pubmed-abstracts": {"bits_per_byte": 0.00037553733051528816, "byte_perplexity": 1.0003756078534862, "word_perplexity": 1.0025884332779}}, "versions": {"pile_pubmed-abstracts": 0}} +{"results": {"pile_pubmed-abstracts": {"bits_per_byte": 0.00037553733051528816, "byte_perplexity": 1.0003756078534862, "word_perplexity": 1.0025884332779}}, "versions": {"pile_pubmed-abstracts": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_pubmed-abstracts-v1-loglikelihood_rolling b/tests/testdata/pile_pubmed-abstracts-v1-loglikelihood_rolling index 61e6ef3e2e..de5660d60a 100644 --- a/tests/testdata/pile_pubmed-abstracts-v1-loglikelihood_rolling +++ b/tests/testdata/pile_pubmed-abstracts-v1-loglikelihood_rolling @@ -1 +1 @@ -66436569a43163afb2caf422d32c5f329899e74c49865d4d13881fd465fd9976 +66436569a43163afb2caf422d32c5f329899e74c49865d4d13881fd465fd9976 \ No newline at end of file diff --git a/tests/testdata/pile_pubmed-abstracts-v1-res.json b/tests/testdata/pile_pubmed-abstracts-v1-res.json index 9a3736f685..21b6bb451f 100644 --- a/tests/testdata/pile_pubmed-abstracts-v1-res.json +++ b/tests/testdata/pile_pubmed-abstracts-v1-res.json @@ -1 +1 @@ -{"results": {"pile_pubmed-abstracts": {"bits_per_byte": 0.0005417858444030858, "byte_perplexity": 1.0003756078534862, "word_perplexity": 1.0025884332779}}, "versions": {"pile_pubmed-abstracts": 1}} +{"results": {"pile_pubmed-abstracts": {"bits_per_byte": 0.0005417858444030858, "byte_perplexity": 1.0003756078534862, "word_perplexity": 1.0025884332779}}, "versions": {"pile_pubmed-abstracts": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_pubmed-central-v0-loglikelihood_rolling b/tests/testdata/pile_pubmed-central-v0-loglikelihood_rolling index d2bb7746ec..283109f32e 100644 --- a/tests/testdata/pile_pubmed-central-v0-loglikelihood_rolling +++ b/tests/testdata/pile_pubmed-central-v0-loglikelihood_rolling @@ -1 +1 @@ -40b39d120d99a145690444e86acc3e3e24d41e6e0538a75e26929ad84926e5e0 +40b39d120d99a145690444e86acc3e3e24d41e6e0538a75e26929ad84926e5e0 \ No newline at end of file diff --git a/tests/testdata/pile_pubmed-central-v0-res.json b/tests/testdata/pile_pubmed-central-v0-res.json index 0e4cd3b916..6e5f1efe49 100644 --- a/tests/testdata/pile_pubmed-central-v0-res.json +++ b/tests/testdata/pile_pubmed-central-v0-res.json @@ -1 +1 @@ -{"results": {"pile_pubmed-central": {"bits_per_byte": 1.5812411832795375e-05, "byte_perplexity": 1.0000158125368497, "word_perplexity": 1.000123107107861}}, "versions": {"pile_pubmed-central": 0}} +{"results": {"pile_pubmed-central": {"bits_per_byte": 1.5812411832795375e-05, "byte_perplexity": 1.0000158125368497, "word_perplexity": 1.000123107107861}}, "versions": {"pile_pubmed-central": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_pubmed-central-v1-loglikelihood_rolling b/tests/testdata/pile_pubmed-central-v1-loglikelihood_rolling index d2bb7746ec..283109f32e 100644 --- a/tests/testdata/pile_pubmed-central-v1-loglikelihood_rolling +++ b/tests/testdata/pile_pubmed-central-v1-loglikelihood_rolling @@ -1 +1 @@ -40b39d120d99a145690444e86acc3e3e24d41e6e0538a75e26929ad84926e5e0 +40b39d120d99a145690444e86acc3e3e24d41e6e0538a75e26929ad84926e5e0 \ No newline at end of file diff --git a/tests/testdata/pile_pubmed-central-v1-res.json b/tests/testdata/pile_pubmed-central-v1-res.json index be7fb6a056..4d4a241ace 100644 --- a/tests/testdata/pile_pubmed-central-v1-res.json +++ b/tests/testdata/pile_pubmed-central-v1-res.json @@ -1 +1 @@ -{"results": {"pile_pubmed-central": {"bits_per_byte": 2.2812488135667854e-05, "byte_perplexity": 1.0000158125368497, "word_perplexity": 1.000123107107861}}, "versions": {"pile_pubmed-central": 1}} +{"results": {"pile_pubmed-central": {"bits_per_byte": 2.2812488135667854e-05, "byte_perplexity": 1.0000158125368497, "word_perplexity": 1.000123107107861}}, "versions": {"pile_pubmed-central": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_stackexchange-v0-loglikelihood_rolling b/tests/testdata/pile_stackexchange-v0-loglikelihood_rolling index fba76fc9fb..dcf0e64cf0 100644 --- a/tests/testdata/pile_stackexchange-v0-loglikelihood_rolling +++ b/tests/testdata/pile_stackexchange-v0-loglikelihood_rolling @@ -1 +1 @@ -e524bfb3e21cbdaddc117403a50df598520c7bf5b2c60ad8f2372cfa564e79be +e524bfb3e21cbdaddc117403a50df598520c7bf5b2c60ad8f2372cfa564e79be \ No newline at end of file diff --git a/tests/testdata/pile_stackexchange-v0-res.json b/tests/testdata/pile_stackexchange-v0-res.json index 8c64f66569..76fdd0a6dd 100644 --- a/tests/testdata/pile_stackexchange-v0-res.json +++ b/tests/testdata/pile_stackexchange-v0-res.json @@ -1 +1 @@ -{"results": {"pile_stackexchange": {"bits_per_byte": 0.0002288815898835956, "byte_perplexity": 1.0002289077852733, "word_perplexity": 1.0016993562258851}}, "versions": {"pile_stackexchange": 0}} +{"results": {"pile_stackexchange": {"bits_per_byte": 0.0002288815898835956, "byte_perplexity": 1.0002289077852733, "word_perplexity": 1.0016993562258851}}, "versions": {"pile_stackexchange": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_stackexchange-v1-loglikelihood_rolling b/tests/testdata/pile_stackexchange-v1-loglikelihood_rolling index fba76fc9fb..dcf0e64cf0 100644 --- a/tests/testdata/pile_stackexchange-v1-loglikelihood_rolling +++ b/tests/testdata/pile_stackexchange-v1-loglikelihood_rolling @@ -1 +1 @@ -e524bfb3e21cbdaddc117403a50df598520c7bf5b2c60ad8f2372cfa564e79be +e524bfb3e21cbdaddc117403a50df598520c7bf5b2c60ad8f2372cfa564e79be \ No newline at end of file diff --git a/tests/testdata/pile_stackexchange-v1-res.json b/tests/testdata/pile_stackexchange-v1-res.json index aa550a119d..2773302990 100644 --- a/tests/testdata/pile_stackexchange-v1-res.json +++ b/tests/testdata/pile_stackexchange-v1-res.json @@ -1 +1 @@ -{"results": {"pile_stackexchange": {"bits_per_byte": 0.0003302063346758449, "byte_perplexity": 1.0002289077852733, "word_perplexity": 1.0016993562258851}}, "versions": {"pile_stackexchange": 1}} +{"results": {"pile_stackexchange": {"bits_per_byte": 0.0003302063346758449, "byte_perplexity": 1.0002289077852733, "word_perplexity": 1.0016993562258851}}, "versions": {"pile_stackexchange": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_ubuntu-irc-v0-loglikelihood_rolling b/tests/testdata/pile_ubuntu-irc-v0-loglikelihood_rolling index cdbcfd3fd4..ce04199863 100644 --- a/tests/testdata/pile_ubuntu-irc-v0-loglikelihood_rolling +++ b/tests/testdata/pile_ubuntu-irc-v0-loglikelihood_rolling @@ -1 +1 @@ -4eb69e314f0864ec8890e2323d7e76f8a8309692c4f090e2b41bf4be681a811d +4eb69e314f0864ec8890e2323d7e76f8a8309692c4f090e2b41bf4be681a811d \ No newline at end of file diff --git a/tests/testdata/pile_ubuntu-irc-v0-res.json b/tests/testdata/pile_ubuntu-irc-v0-res.json index 74cd951ae7..dff51cba76 100644 --- a/tests/testdata/pile_ubuntu-irc-v0-res.json +++ b/tests/testdata/pile_ubuntu-irc-v0-res.json @@ -1 +1 @@ -{"results": {"pile_ubuntu-irc": {"bits_per_byte": 1.6298315496830533e-06, "byte_perplexity": 1.0000016298328778, "word_perplexity": 1.0000108866656874}}, "versions": {"pile_ubuntu-irc": 0}} +{"results": {"pile_ubuntu-irc": {"bits_per_byte": 1.6298315496830533e-06, "byte_perplexity": 1.0000016298328778, "word_perplexity": 1.0000108866656874}}, "versions": {"pile_ubuntu-irc": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_ubuntu-irc-v1-loglikelihood_rolling b/tests/testdata/pile_ubuntu-irc-v1-loglikelihood_rolling index cdbcfd3fd4..ce04199863 100644 --- a/tests/testdata/pile_ubuntu-irc-v1-loglikelihood_rolling +++ b/tests/testdata/pile_ubuntu-irc-v1-loglikelihood_rolling @@ -1 +1 @@ -4eb69e314f0864ec8890e2323d7e76f8a8309692c4f090e2b41bf4be681a811d +4eb69e314f0864ec8890e2323d7e76f8a8309692c4f090e2b41bf4be681a811d \ No newline at end of file diff --git a/tests/testdata/pile_ubuntu-irc-v1-res.json b/tests/testdata/pile_ubuntu-irc-v1-res.json index d5b6788d25..0e3b1b2597 100644 --- a/tests/testdata/pile_ubuntu-irc-v1-res.json +++ b/tests/testdata/pile_ubuntu-irc-v1-res.json @@ -1 +1 @@ -{"results": {"pile_ubuntu-irc": {"bits_per_byte": 2.3513498942121155e-06, "byte_perplexity": 1.0000016298328778, "word_perplexity": 1.0000108866656874}}, "versions": {"pile_ubuntu-irc": 1}} +{"results": {"pile_ubuntu-irc": {"bits_per_byte": 2.3513498942121155e-06, "byte_perplexity": 1.0000016298328778, "word_perplexity": 1.0000108866656874}}, "versions": {"pile_ubuntu-irc": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_uspto-v0-loglikelihood_rolling b/tests/testdata/pile_uspto-v0-loglikelihood_rolling index 64e43b699b..4649d3b9b7 100644 --- a/tests/testdata/pile_uspto-v0-loglikelihood_rolling +++ b/tests/testdata/pile_uspto-v0-loglikelihood_rolling @@ -1 +1 @@ -789b2bdb31564d512b70f801316f49320a26c83ba361226bac0afb255341d477 +789b2bdb31564d512b70f801316f49320a26c83ba361226bac0afb255341d477 \ No newline at end of file diff --git a/tests/testdata/pile_uspto-v0-res.json b/tests/testdata/pile_uspto-v0-res.json index aefc74637e..c13dfc73f5 100644 --- a/tests/testdata/pile_uspto-v0-res.json +++ b/tests/testdata/pile_uspto-v0-res.json @@ -1 +1 @@ -{"results": {"pile_uspto": {"bits_per_byte": 0.00012062434384130924, "byte_perplexity": 1.00012063161925, "word_perplexity": 1.0007716198916954}}, "versions": {"pile_uspto": 0}} +{"results": {"pile_uspto": {"bits_per_byte": 0.00012062434384130924, "byte_perplexity": 1.00012063161925, "word_perplexity": 1.0007716198916954}}, "versions": {"pile_uspto": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_uspto-v1-loglikelihood_rolling b/tests/testdata/pile_uspto-v1-loglikelihood_rolling index 64e43b699b..4649d3b9b7 100644 --- a/tests/testdata/pile_uspto-v1-loglikelihood_rolling +++ b/tests/testdata/pile_uspto-v1-loglikelihood_rolling @@ -1 +1 @@ -789b2bdb31564d512b70f801316f49320a26c83ba361226bac0afb255341d477 +789b2bdb31564d512b70f801316f49320a26c83ba361226bac0afb255341d477 \ No newline at end of file diff --git a/tests/testdata/pile_uspto-v1-res.json b/tests/testdata/pile_uspto-v1-res.json index 48a5b98029..599ae44ef4 100644 --- a/tests/testdata/pile_uspto-v1-res.json +++ b/tests/testdata/pile_uspto-v1-res.json @@ -1 +1 @@ -{"results": {"pile_uspto": {"bits_per_byte": 0.000174024142670342, "byte_perplexity": 1.00012063161925, "word_perplexity": 1.0007716198916954}}, "versions": {"pile_uspto": 1}} +{"results": {"pile_uspto": {"bits_per_byte": 0.000174024142670342, "byte_perplexity": 1.00012063161925, "word_perplexity": 1.0007716198916954}}, "versions": {"pile_uspto": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_wikipedia-v0-loglikelihood_rolling b/tests/testdata/pile_wikipedia-v0-loglikelihood_rolling index 1174a961e7..e44bd27628 100644 --- a/tests/testdata/pile_wikipedia-v0-loglikelihood_rolling +++ b/tests/testdata/pile_wikipedia-v0-loglikelihood_rolling @@ -1 +1 @@ -ef9ec0dd408316ca6537228a6812e839f14b30608973081d41efc47c138338da +ef9ec0dd408316ca6537228a6812e839f14b30608973081d41efc47c138338da \ No newline at end of file diff --git a/tests/testdata/pile_wikipedia-v0-res.json b/tests/testdata/pile_wikipedia-v0-res.json index d04bd589a3..bfffde9938 100644 --- a/tests/testdata/pile_wikipedia-v0-res.json +++ b/tests/testdata/pile_wikipedia-v0-res.json @@ -1 +1 @@ -{"results": {"pile_wikipedia": {"bits_per_byte": 0.00016834722287561703, "byte_perplexity": 1.0001683613940646, "word_perplexity": 1.001084677949439}}, "versions": {"pile_wikipedia": 0}} +{"results": {"pile_wikipedia": {"bits_per_byte": 0.00016834722287561703, "byte_perplexity": 1.0001683613940646, "word_perplexity": 1.001084677949439}}, "versions": {"pile_wikipedia": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_wikipedia-v1-loglikelihood_rolling b/tests/testdata/pile_wikipedia-v1-loglikelihood_rolling index 1174a961e7..e44bd27628 100644 --- a/tests/testdata/pile_wikipedia-v1-loglikelihood_rolling +++ b/tests/testdata/pile_wikipedia-v1-loglikelihood_rolling @@ -1 +1 @@ -ef9ec0dd408316ca6537228a6812e839f14b30608973081d41efc47c138338da +ef9ec0dd408316ca6537228a6812e839f14b30608973081d41efc47c138338da \ No newline at end of file diff --git a/tests/testdata/pile_wikipedia-v1-res.json b/tests/testdata/pile_wikipedia-v1-res.json index 9985d55eb3..4f2314e66b 100644 --- a/tests/testdata/pile_wikipedia-v1-res.json +++ b/tests/testdata/pile_wikipedia-v1-res.json @@ -1 +1 @@ -{"results": {"pile_wikipedia": {"bits_per_byte": 0.00024287370359008176, "byte_perplexity": 1.0001683613940646, "word_perplexity": 1.001084677949439}}, "versions": {"pile_wikipedia": 1}} +{"results": {"pile_wikipedia": {"bits_per_byte": 0.00024287370359008176, "byte_perplexity": 1.0001683613940646, "word_perplexity": 1.001084677949439}}, "versions": {"pile_wikipedia": 1}} \ No newline at end of file diff --git a/tests/testdata/pile_youtubesubtitles-v0-loglikelihood_rolling b/tests/testdata/pile_youtubesubtitles-v0-loglikelihood_rolling index b5db202c2d..81c2e5ed06 100644 --- a/tests/testdata/pile_youtubesubtitles-v0-loglikelihood_rolling +++ b/tests/testdata/pile_youtubesubtitles-v0-loglikelihood_rolling @@ -1 +1 @@ -68263c52adc0086011e2220b619983935cabb1cc1f5f9f8ee1a74ab2a7457967 +68263c52adc0086011e2220b619983935cabb1cc1f5f9f8ee1a74ab2a7457967 \ No newline at end of file diff --git a/tests/testdata/pile_youtubesubtitles-v0-res.json b/tests/testdata/pile_youtubesubtitles-v0-res.json index 4a8a5fdf40..b58ce148f0 100644 --- a/tests/testdata/pile_youtubesubtitles-v0-res.json +++ b/tests/testdata/pile_youtubesubtitles-v0-res.json @@ -1 +1 @@ -{"results": {"pile_youtubesubtitles": {"bits_per_byte": 2.3447170928931888e-05, "byte_perplexity": 1.000023447445816, "word_perplexity": 1.0001529192262875}}, "versions": {"pile_youtubesubtitles": 0}} +{"results": {"pile_youtubesubtitles": {"bits_per_byte": 2.3447170928931888e-05, "byte_perplexity": 1.000023447445816, "word_perplexity": 1.0001529192262875}}, "versions": {"pile_youtubesubtitles": 0}} \ No newline at end of file diff --git a/tests/testdata/pile_youtubesubtitles-v1-loglikelihood_rolling b/tests/testdata/pile_youtubesubtitles-v1-loglikelihood_rolling index b5db202c2d..81c2e5ed06 100644 --- a/tests/testdata/pile_youtubesubtitles-v1-loglikelihood_rolling +++ b/tests/testdata/pile_youtubesubtitles-v1-loglikelihood_rolling @@ -1 +1 @@ -68263c52adc0086011e2220b619983935cabb1cc1f5f9f8ee1a74ab2a7457967 +68263c52adc0086011e2220b619983935cabb1cc1f5f9f8ee1a74ab2a7457967 \ No newline at end of file diff --git a/tests/testdata/pile_youtubesubtitles-v1-res.json b/tests/testdata/pile_youtubesubtitles-v1-res.json index f4c82da451..fcf2faa8bc 100644 --- a/tests/testdata/pile_youtubesubtitles-v1-res.json +++ b/tests/testdata/pile_youtubesubtitles-v1-res.json @@ -1 +1 @@ -{"results": {"pile_youtubesubtitles": {"bits_per_byte": 3.3827117222045906e-05, "byte_perplexity": 1.000023447445816, "word_perplexity": 1.0001529192262875}}, "versions": {"pile_youtubesubtitles": 1}} +{"results": {"pile_youtubesubtitles": {"bits_per_byte": 3.3827117222045906e-05, "byte_perplexity": 1.000023447445816, "word_perplexity": 1.0001529192262875}}, "versions": {"pile_youtubesubtitles": 1}} \ No newline at end of file diff --git a/tests/testdata/piqa-v0-loglikelihood b/tests/testdata/piqa-v0-loglikelihood index 27950464e4..b01b1fe5d8 100644 --- a/tests/testdata/piqa-v0-loglikelihood +++ b/tests/testdata/piqa-v0-loglikelihood @@ -1 +1 @@ -6048a3a2bb3ad1e6a3d98139618e06b4d7de766edd685bd38837596199c3f69f +6048a3a2bb3ad1e6a3d98139618e06b4d7de766edd685bd38837596199c3f69f \ No newline at end of file diff --git a/tests/testdata/piqa-v0-res.json b/tests/testdata/piqa-v0-res.json index 9c06db138d..bb6ebfb9a2 100644 --- a/tests/testdata/piqa-v0-res.json +++ b/tests/testdata/piqa-v0-res.json @@ -1 +1 @@ -{"results": {"piqa": {"acc": 0.514145810663765, "acc_norm": 0.5114254624591947, "acc_norm_stderr": 0.01166277802645167, "acc_stderr": 0.011661154475524836}}, "versions": {"piqa": 0}} +{"results": {"piqa": {"acc": 0.514145810663765, "acc_norm": 0.5114254624591947, "acc_norm_stderr": 0.01166277802645167, "acc_stderr": 0.011661154475524836}}, "versions": {"piqa": 0}} \ No newline at end of file diff --git a/tests/testdata/prost-v0-loglikelihood b/tests/testdata/prost-v0-loglikelihood index 57c5931f49..a94b8cdec9 100644 --- a/tests/testdata/prost-v0-loglikelihood +++ b/tests/testdata/prost-v0-loglikelihood @@ -1 +1 @@ -7c475f5b36a8b79f94c2be035441e7fd59dac021b0713b1fc72d256424c70b0b +7c475f5b36a8b79f94c2be035441e7fd59dac021b0713b1fc72d256424c70b0b \ No newline at end of file diff --git a/tests/testdata/prost-v0-res.json b/tests/testdata/prost-v0-res.json index 5c2ef8a3a2..ff99d83f40 100644 --- a/tests/testdata/prost-v0-res.json +++ b/tests/testdata/prost-v0-res.json @@ -1 +1 @@ -{"results": {"prost": {"acc": 0.24631725021349274, "acc_norm": 0.2581127241673783, "acc_norm_stderr": 0.00319703079646546, "acc_stderr": 0.003147855968061357}}, "versions": {"prost": 0}} +{"results": {"prost": {"acc": 0.24631725021349274, "acc_norm": 0.2581127241673783, "acc_norm_stderr": 0.00319703079646546, "acc_stderr": 0.003147855968061357}}, "versions": {"prost": 0}} \ No newline at end of file diff --git a/tests/testdata/pubmedqa-v0-loglikelihood b/tests/testdata/pubmedqa-v0-loglikelihood index 2c839c632b..97db87ce2b 100644 --- a/tests/testdata/pubmedqa-v0-loglikelihood +++ b/tests/testdata/pubmedqa-v0-loglikelihood @@ -1 +1 @@ -7a04a1fb1d2b19db84fd15c224015d6c0306a41195a4e71fe6abd48fb4d53b9f +7a04a1fb1d2b19db84fd15c224015d6c0306a41195a4e71fe6abd48fb4d53b9f \ No newline at end of file diff --git a/tests/testdata/pubmedqa-v0-res.json b/tests/testdata/pubmedqa-v0-res.json index 75acb8f095..bb39463a4a 100644 --- a/tests/testdata/pubmedqa-v0-res.json +++ b/tests/testdata/pubmedqa-v0-res.json @@ -1 +1 @@ -{"results": {"pubmedqa": {"acc": 0.324, "acc_stderr": 0.01480686473373886}}, "versions": {"pubmedqa": 0}} +{"results": {"pubmedqa": {"acc": 0.324, "acc_stderr": 0.01480686473373886}}, "versions": {"pubmedqa": 0}} \ No newline at end of file diff --git a/tests/testdata/qa4mre_2011-v0-loglikelihood b/tests/testdata/qa4mre_2011-v0-loglikelihood index d030906e56..049134c7a1 100644 --- a/tests/testdata/qa4mre_2011-v0-loglikelihood +++ b/tests/testdata/qa4mre_2011-v0-loglikelihood @@ -1 +1 @@ -0d09f17c65768e797633494d2d218e4e46a26f718cab8b0bf3d156b073a8c437 +0d09f17c65768e797633494d2d218e4e46a26f718cab8b0bf3d156b073a8c437 \ No newline at end of file diff --git a/tests/testdata/qa4mre_2011-v0-res.json b/tests/testdata/qa4mre_2011-v0-res.json index 44be8afc49..601c4eb763 100644 --- a/tests/testdata/qa4mre_2011-v0-res.json +++ b/tests/testdata/qa4mre_2011-v0-res.json @@ -1 +1 @@ -{"results": {"qa4mre_2011": {"acc": 0.225, "acc_norm": 0.23333333333333334, "acc_norm_stderr": 0.03877199986918664, "acc_stderr": 0.0382797091741014}}, "versions": {"qa4mre_2011": 0}} +{"results": {"qa4mre_2011": {"acc": 0.225, "acc_norm": 0.23333333333333334, "acc_norm_stderr": 0.03877199986918664, "acc_stderr": 0.0382797091741014}}, "versions": {"qa4mre_2011": 0}} \ No newline at end of file diff --git a/tests/testdata/qa4mre_2012-v0-loglikelihood b/tests/testdata/qa4mre_2012-v0-loglikelihood index dab729c1bb..0e67fac5f7 100644 --- a/tests/testdata/qa4mre_2012-v0-loglikelihood +++ b/tests/testdata/qa4mre_2012-v0-loglikelihood @@ -1 +1 @@ -7e17261820acb365966cb9431d93aec983b14393eaeefbc96e30a11cf58bc6df +7e17261820acb365966cb9431d93aec983b14393eaeefbc96e30a11cf58bc6df \ No newline at end of file diff --git a/tests/testdata/qa4mre_2012-v0-res.json b/tests/testdata/qa4mre_2012-v0-res.json index 8b6dc364ee..91d8f36604 100644 --- a/tests/testdata/qa4mre_2012-v0-res.json +++ b/tests/testdata/qa4mre_2012-v0-res.json @@ -1 +1 @@ -{"results": {"qa4mre_2012": {"acc": 0.15625, "acc_norm": 0.16875, "acc_norm_stderr": 0.029702236908328808, "acc_stderr": 0.02879508360159146}}, "versions": {"qa4mre_2012": 0}} +{"results": {"qa4mre_2012": {"acc": 0.15625, "acc_norm": 0.16875, "acc_norm_stderr": 0.029702236908328808, "acc_stderr": 0.02879508360159146}}, "versions": {"qa4mre_2012": 0}} \ No newline at end of file diff --git a/tests/testdata/qa4mre_2013-v0-loglikelihood b/tests/testdata/qa4mre_2013-v0-loglikelihood index 59c31926de..43243706d9 100644 --- a/tests/testdata/qa4mre_2013-v0-loglikelihood +++ b/tests/testdata/qa4mre_2013-v0-loglikelihood @@ -1 +1 @@ -52fc431e94c67f983e28ebc70cf45e6c14116b0ae77dc1bf22347c705a65d054 +52fc431e94c67f983e28ebc70cf45e6c14116b0ae77dc1bf22347c705a65d054 \ No newline at end of file diff --git a/tests/testdata/qa4mre_2013-v0-res.json b/tests/testdata/qa4mre_2013-v0-res.json index e0f65ff125..c87e487e9a 100644 --- a/tests/testdata/qa4mre_2013-v0-res.json +++ b/tests/testdata/qa4mre_2013-v0-res.json @@ -1 +1 @@ -{"results": {"qa4mre_2013": {"acc": 0.18309859154929578, "acc_norm": 0.22183098591549297, "acc_norm_stderr": 0.02469760575535269, "acc_stderr": 0.022989742475464973}}, "versions": {"qa4mre_2013": 0}} +{"results": {"qa4mre_2013": {"acc": 0.18309859154929578, "acc_norm": 0.22183098591549297, "acc_norm_stderr": 0.02469760575535269, "acc_stderr": 0.022989742475464973}}, "versions": {"qa4mre_2013": 0}} \ No newline at end of file diff --git a/tests/testdata/qnli-v0-loglikelihood b/tests/testdata/qnli-v0-loglikelihood index bb6554ad5c..883202c385 100644 --- a/tests/testdata/qnli-v0-loglikelihood +++ b/tests/testdata/qnli-v0-loglikelihood @@ -1 +1 @@ -4281d4ff5cf1244358b0ea0220c67863c69fbade850696b43e8ff05138e01e12 +4281d4ff5cf1244358b0ea0220c67863c69fbade850696b43e8ff05138e01e12 \ No newline at end of file diff --git a/tests/testdata/qnli-v0-res.json b/tests/testdata/qnli-v0-res.json index 64d731ccfe..31c3097605 100644 --- a/tests/testdata/qnli-v0-res.json +++ b/tests/testdata/qnli-v0-res.json @@ -1 +1 @@ -{"results": {"qnli": {"acc": 0.5108914515833791, "acc_stderr": 0.00676380528502966}}, "versions": {"qnli": 0}} +{"results": {"qnli": {"acc": 0.5108914515833791, "acc_stderr": 0.00676380528502966}}, "versions": {"qnli": 0}} \ No newline at end of file diff --git a/tests/testdata/qqp-v0-loglikelihood b/tests/testdata/qqp-v0-loglikelihood index 30e142b2a0..ecc86dc396 100644 --- a/tests/testdata/qqp-v0-loglikelihood +++ b/tests/testdata/qqp-v0-loglikelihood @@ -1 +1 @@ -97b551b0fc3d239aad4929a2e8e79c986891aefd9fcd19441fea0382d507889e +97b551b0fc3d239aad4929a2e8e79c986891aefd9fcd19441fea0382d507889e \ No newline at end of file diff --git a/tests/testdata/qqp-v0-res.json b/tests/testdata/qqp-v0-res.json index ebf8ada06f..b7b31355e6 100644 --- a/tests/testdata/qqp-v0-res.json +++ b/tests/testdata/qqp-v0-res.json @@ -1 +1 @@ -{"results": {"qqp": {"acc": 0.49782339846648527, "acc_stderr": 0.0024866770696239894, "f1": 0.42322661288031593, "f1_stderr": 0.002695903831328166}}, "versions": {"qqp": 0}} +{"results": {"qqp": {"acc": 0.49782339846648527, "acc_stderr": 0.0024866770696239894, "f1": 0.42322661288031593, "f1_stderr": 0.002695903831328166}}, "versions": {"qqp": 0}} \ No newline at end of file diff --git a/tests/testdata/race-v0-loglikelihood b/tests/testdata/race-v0-loglikelihood index ddc6e6e9e7..5fe1ce356b 100644 --- a/tests/testdata/race-v0-loglikelihood +++ b/tests/testdata/race-v0-loglikelihood @@ -1 +1 @@ -bdfdfab7fa1c7af0c1e161785e347b1b8071a15cbf971f6f2a9ae8c8e845199f +bdfdfab7fa1c7af0c1e161785e347b1b8071a15cbf971f6f2a9ae8c8e845199f \ No newline at end of file diff --git a/tests/testdata/race-v0-res.json b/tests/testdata/race-v0-res.json index e70be2cd10..017b00669b 100644 --- a/tests/testdata/race-v0-res.json +++ b/tests/testdata/race-v0-res.json @@ -1 +1 @@ -{"results": {"race": {"acc": 0.23253588516746412, "acc_stderr": 0.013074460615265295}}, "versions": {"race": 0}} +{"results": {"race": {"acc": 0.23253588516746412, "acc_stderr": 0.013074460615265295}}, "versions": {"race": 0}} \ No newline at end of file diff --git a/tests/testdata/random_insertion-v0-greedy_until b/tests/testdata/random_insertion-v0-greedy_until index 11a07276b4..4844e5393b 100644 --- a/tests/testdata/random_insertion-v0-greedy_until +++ b/tests/testdata/random_insertion-v0-greedy_until @@ -1 +1 @@ -6c48baa6924f3635120f33062251c4b571b3d4e9fe46b14d91f54ddd1c857997 +6c48baa6924f3635120f33062251c4b571b3d4e9fe46b14d91f54ddd1c857997 \ No newline at end of file diff --git a/tests/testdata/random_insertion-v0-res.json b/tests/testdata/random_insertion-v0-res.json index be1ac2fb3a..9b5f507f67 100644 --- a/tests/testdata/random_insertion-v0-res.json +++ b/tests/testdata/random_insertion-v0-res.json @@ -1 +1 @@ -{"results": {"random_insertion": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"random_insertion": 0}} +{"results": {"random_insertion": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"random_insertion": 0}} \ No newline at end of file diff --git a/tests/testdata/record-v0-loglikelihood b/tests/testdata/record-v0-loglikelihood index dbd898d2a8..a54fa05cd1 100644 --- a/tests/testdata/record-v0-loglikelihood +++ b/tests/testdata/record-v0-loglikelihood @@ -1 +1 @@ -a3e378fbde4e28f375cac1561bbfc7d7673c2af193628a774ad012d5192393aa +a3e378fbde4e28f375cac1561bbfc7d7673c2af193628a774ad012d5192393aa \ No newline at end of file diff --git a/tests/testdata/record-v0-res.json b/tests/testdata/record-v0-res.json index 0712b6a69d..006c381372 100644 --- a/tests/testdata/record-v0-res.json +++ b/tests/testdata/record-v0-res.json @@ -1 +1 @@ -{"results": {"record": {"em": 0.1521, "em_stderr": 0.0035913575128186616, "f1": 0.1581870634920636, "f1_stderr": 0.0036146895141474576}}, "versions": {"record": 0}} +{"results": {"record": {"em": 0.1521, "em_stderr": 0.0035913575128186616, "f1": 0.1581870634920636, "f1_stderr": 0.0036146895141474576}}, "versions": {"record": 0}} \ No newline at end of file diff --git a/tests/testdata/reversed_words-v0-greedy_until b/tests/testdata/reversed_words-v0-greedy_until index 633391b66b..3f28488a90 100644 --- a/tests/testdata/reversed_words-v0-greedy_until +++ b/tests/testdata/reversed_words-v0-greedy_until @@ -1 +1 @@ -1d79fc4f0177f9624a487b9973f4e0e1d3f8404993b419a7b807a690ebbbb290 +1d79fc4f0177f9624a487b9973f4e0e1d3f8404993b419a7b807a690ebbbb290 \ No newline at end of file diff --git a/tests/testdata/reversed_words-v0-res.json b/tests/testdata/reversed_words-v0-res.json index 1349728bdd..9285ff2694 100644 --- a/tests/testdata/reversed_words-v0-res.json +++ b/tests/testdata/reversed_words-v0-res.json @@ -1 +1 @@ -{"results": {"reversed_words": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"reversed_words": 0}} +{"results": {"reversed_words": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"reversed_words": 0}} \ No newline at end of file diff --git a/tests/testdata/rte-v0-loglikelihood b/tests/testdata/rte-v0-loglikelihood index 8bed1472c9..c239923e4f 100644 --- a/tests/testdata/rte-v0-loglikelihood +++ b/tests/testdata/rte-v0-loglikelihood @@ -1 +1 @@ -c80ce13c8c736087f1557f8736d5d318b540ff01e4bb7f55e568890dc8b0393e +c80ce13c8c736087f1557f8736d5d318b540ff01e4bb7f55e568890dc8b0393e \ No newline at end of file diff --git a/tests/testdata/rte-v0-res.json b/tests/testdata/rte-v0-res.json index be36edbcad..10314dd047 100644 --- a/tests/testdata/rte-v0-res.json +++ b/tests/testdata/rte-v0-res.json @@ -1 +1 @@ -{"results": {"rte": {"acc": 0.5379061371841155, "acc_stderr": 0.030009848912529117}}, "versions": {"rte": 0}} +{"results": {"rte": {"acc": 0.5379061371841155, "acc_stderr": 0.030009848912529117}}, "versions": {"rte": 0}} \ No newline at end of file diff --git a/tests/testdata/sciq-v0-loglikelihood b/tests/testdata/sciq-v0-loglikelihood index 577a753162..25ce988773 100644 --- a/tests/testdata/sciq-v0-loglikelihood +++ b/tests/testdata/sciq-v0-loglikelihood @@ -1 +1 @@ -71cbb6e2a7ac4512c3761ea801d420eb3fac49d158c7e4deaa3ab8727bea923c +71cbb6e2a7ac4512c3761ea801d420eb3fac49d158c7e4deaa3ab8727bea923c \ No newline at end of file diff --git a/tests/testdata/sciq-v0-res.json b/tests/testdata/sciq-v0-res.json index 334111fe02..7071515827 100644 --- a/tests/testdata/sciq-v0-res.json +++ b/tests/testdata/sciq-v0-res.json @@ -1 +1 @@ -{"results": {"sciq": {"acc": 0.234, "acc_norm": 0.239, "acc_norm_stderr": 0.01349300044693758, "acc_stderr": 0.01339490288966001}}, "versions": {"sciq": 0}} +{"results": {"sciq": {"acc": 0.234, "acc_norm": 0.239, "acc_norm_stderr": 0.01349300044693758, "acc_stderr": 0.01339490288966001}}, "versions": {"sciq": 0}} \ No newline at end of file diff --git a/tests/testdata/squad2-v0-greedy_until b/tests/testdata/squad2-v0-greedy_until index 8160d9e4f2..024652e0a3 100644 --- a/tests/testdata/squad2-v0-greedy_until +++ b/tests/testdata/squad2-v0-greedy_until @@ -1 +1 @@ -b261e8885c11750ce6911bb11e8693de03d53758297c26fb14cfc1ef508862cb +b261e8885c11750ce6911bb11e8693de03d53758297c26fb14cfc1ef508862cb \ No newline at end of file diff --git a/tests/testdata/squad2-v0-loglikelihood b/tests/testdata/squad2-v0-loglikelihood index 6b95d6c0c1..41300bc19f 100644 --- a/tests/testdata/squad2-v0-loglikelihood +++ b/tests/testdata/squad2-v0-loglikelihood @@ -1 +1 @@ -287e87cc6878debcc80d9b6df4e2d0a74ed29068e0e0a80906c8441843a17cee +287e87cc6878debcc80d9b6df4e2d0a74ed29068e0e0a80906c8441843a17cee \ No newline at end of file diff --git a/tests/testdata/squad2-v0-res.json b/tests/testdata/squad2-v0-res.json index a813f14bda..2b370553ac 100644 --- a/tests/testdata/squad2-v0-res.json +++ b/tests/testdata/squad2-v0-res.json @@ -1 +1 @@ -{"results": {"squad2": {"HasAns_exact": 0.0, "HasAns_f1": 0.0, "NoAns_exact": 0.0, "NoAns_f1": 0.0, "best_exact": 50.07159100480081, "best_f1": 50.07159100480081, "exact": 0.0, "f1": 0.0}}, "versions": {"squad2": 0}} +{"results": {"squad2": {"HasAns_exact": 0.0, "HasAns_f1": 0.0, "NoAns_exact": 0.0, "NoAns_f1": 0.0, "best_exact": 50.07159100480081, "best_f1": 50.07159100480081, "exact": 0.0, "f1": 0.0}}, "versions": {"squad2": 0}} \ No newline at end of file diff --git a/tests/testdata/squad2-v1-greedy_until b/tests/testdata/squad2-v1-greedy_until index caf8511cf1..70df2fd6ae 100644 --- a/tests/testdata/squad2-v1-greedy_until +++ b/tests/testdata/squad2-v1-greedy_until @@ -1 +1 @@ -e17e3d85c1d5adaf2d6b4b752c4babc2e0b3a6e144e6de70cb3b2287e85109b8 +e17e3d85c1d5adaf2d6b4b752c4babc2e0b3a6e144e6de70cb3b2287e85109b8 \ No newline at end of file diff --git a/tests/testdata/squad2-v1-loglikelihood b/tests/testdata/squad2-v1-loglikelihood index fa1d967cf8..2c970f7583 100644 --- a/tests/testdata/squad2-v1-loglikelihood +++ b/tests/testdata/squad2-v1-loglikelihood @@ -1 +1 @@ -f5da6173402b274dc89130755c222c6ca6b2a3bacaaa4e4ab07be9322b7bad65 +f5da6173402b274dc89130755c222c6ca6b2a3bacaaa4e4ab07be9322b7bad65 \ No newline at end of file diff --git a/tests/testdata/squad2-v1-res.json b/tests/testdata/squad2-v1-res.json index e095ea76bf..dd69f00abb 100644 --- a/tests/testdata/squad2-v1-res.json +++ b/tests/testdata/squad2-v1-res.json @@ -1 +1 @@ -{"results": {"squad2": {"HasAns_exact": 0.0, "HasAns_f1": 0.0, "NoAns_exact": 0.0, "NoAns_f1": 0.0, "best_exact": 50.07159100480081, "best_f1": 50.07159100480081, "exact": 0.0, "f1": 0.0}}, "versions": {"squad2": 1}} +{"results": {"squad2": {"HasAns_exact": 0.0, "HasAns_f1": 0.0, "NoAns_exact": 0.0, "NoAns_f1": 0.0, "best_exact": 50.07159100480081, "best_f1": 50.07159100480081, "exact": 0.0, "f1": 0.0}}, "versions": {"squad2": 1}} \ No newline at end of file diff --git a/tests/testdata/sst-v0-loglikelihood b/tests/testdata/sst-v0-loglikelihood index f45014645d..52050de16b 100644 --- a/tests/testdata/sst-v0-loglikelihood +++ b/tests/testdata/sst-v0-loglikelihood @@ -1 +1 @@ -d2ebe3a63517d1d481aa1513bebe124c57a0904554a1e95f566979cfe67b1a7f +d2ebe3a63517d1d481aa1513bebe124c57a0904554a1e95f566979cfe67b1a7f \ No newline at end of file diff --git a/tests/testdata/sst-v0-res.json b/tests/testdata/sst-v0-res.json index a21393bcde..5fe3c62a20 100644 --- a/tests/testdata/sst-v0-res.json +++ b/tests/testdata/sst-v0-res.json @@ -1 +1 @@ -{"results": {"sst": {"acc": 0.5172018348623854, "acc_stderr": 0.016931824425903734}}, "versions": {"sst": 0}} +{"results": {"sst": {"acc": 0.5172018348623854, "acc_stderr": 0.016931824425903734}}, "versions": {"sst": 0}} \ No newline at end of file diff --git a/tests/testdata/swag-v0-loglikelihood b/tests/testdata/swag-v0-loglikelihood index 861cb0d72a..c8152027dc 100644 --- a/tests/testdata/swag-v0-loglikelihood +++ b/tests/testdata/swag-v0-loglikelihood @@ -1 +1 @@ -be4fcbad876124c4ba3c71970538a97fec0e36a9cc677c70b6c9243a7bcee0ec +be4fcbad876124c4ba3c71970538a97fec0e36a9cc677c70b6c9243a7bcee0ec \ No newline at end of file diff --git a/tests/testdata/swag-v0-res.json b/tests/testdata/swag-v0-res.json index f31caf72ef..a1aeee972e 100644 --- a/tests/testdata/swag-v0-res.json +++ b/tests/testdata/swag-v0-res.json @@ -1 +1 @@ -{"results": {"swag": {"acc": 0.2482255323402979, "acc_norm": 0.24882535239428172, "acc_norm_stderr": 0.00305666959496067, "acc_stderr": 0.003054201832644171}}, "versions": {"swag": 0}} +{"results": {"swag": {"acc": 0.2482255323402979, "acc_norm": 0.24882535239428172, "acc_norm_stderr": 0.00305666959496067, "acc_stderr": 0.003054201832644171}}, "versions": {"swag": 0}} \ No newline at end of file diff --git a/tests/testdata/triviaqa-v0-loglikelihood b/tests/testdata/triviaqa-v0-loglikelihood index 0a28fcfa69..d576c4977f 100644 --- a/tests/testdata/triviaqa-v0-loglikelihood +++ b/tests/testdata/triviaqa-v0-loglikelihood @@ -1 +1 @@ -f8ec05b306b9f6187c0f8117cae441fb85a7a2e4670f4f9a1a3b632b1978421a +f8ec05b306b9f6187c0f8117cae441fb85a7a2e4670f4f9a1a3b632b1978421a \ No newline at end of file diff --git a/tests/testdata/triviaqa-v0-res.json b/tests/testdata/triviaqa-v0-res.json index dba83d2c40..ab98847da6 100644 --- a/tests/testdata/triviaqa-v0-res.json +++ b/tests/testdata/triviaqa-v0-res.json @@ -1 +1 @@ -{"results": {"triviaqa": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"triviaqa": 0}} +{"results": {"triviaqa": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"triviaqa": 0}} \ No newline at end of file diff --git a/tests/testdata/truthfulqa_gen-v0-greedy_until b/tests/testdata/truthfulqa_gen-v0-greedy_until index 08a6b49eee..52156c8507 100644 --- a/tests/testdata/truthfulqa_gen-v0-greedy_until +++ b/tests/testdata/truthfulqa_gen-v0-greedy_until @@ -1 +1 @@ -0d7c56e1aa71ffd8f94bde28f6e8dfdd35f7aaadffa0620bd2a27704253d6c14 +0d7c56e1aa71ffd8f94bde28f6e8dfdd35f7aaadffa0620bd2a27704253d6c14 \ No newline at end of file diff --git a/tests/testdata/truthfulqa_gen-v0-res.json b/tests/testdata/truthfulqa_gen-v0-res.json index 24cdb8de49..5e68fa8dc6 100644 --- a/tests/testdata/truthfulqa_gen-v0-res.json +++ b/tests/testdata/truthfulqa_gen-v0-res.json @@ -1 +1 @@ -{"results": {"truthfulqa_gen": {"bleu_acc": 0.0, "bleu_acc_stderr": 0.0, "bleu_diff": 0.0, "bleu_diff_stderr": 0.0, "bleu_max": 0.0, "bleu_max_stderr": 0.0, "bleurt_acc": 0.8372093023255814, "bleurt_acc_stderr": 0.012923696051772253, "bleurt_diff": 0.13967358205134603, "bleurt_diff_stderr": 0.00532907098769571, "bleurt_max": -1.4402793981454072, "bleurt_max_stderr": 0.0021884846359458963, "rouge1_acc": 0.0, "rouge1_acc_stderr": 0.0, "rouge1_diff": 0.0, "rouge1_diff_stderr": 0.0, "rouge1_max": 0.0, "rouge1_max_stderr": 0.0, "rouge2_acc": 0.0, "rouge2_acc_stderr": 0.0, "rouge2_diff": 0.0, "rouge2_diff_stderr": 0.0, "rouge2_max": 0.0, "rouge2_max_stderr": 0.0, "rougeL_acc": 0.0, "rougeL_acc_stderr": 0.0, "rougeL_diff": 0.0, "rougeL_diff_stderr": 0.0, "rougeL_max": 0.0, "rougeL_max_stderr": 0.0}}, "versions": {"truthfulqa_gen": 0}} +{"results": {"truthfulqa_gen": {"bleu_acc": 0.0, "bleu_acc_stderr": 0.0, "bleu_diff": 0.0, "bleu_diff_stderr": 0.0, "bleu_max": 0.0, "bleu_max_stderr": 0.0, "bleurt_acc": 0.8372093023255814, "bleurt_acc_stderr": 0.012923696051772253, "bleurt_diff": 0.13967358205134603, "bleurt_diff_stderr": 0.00532907098769571, "bleurt_max": -1.4402793981454072, "bleurt_max_stderr": 0.0021884846359458963, "rouge1_acc": 0.0, "rouge1_acc_stderr": 0.0, "rouge1_diff": 0.0, "rouge1_diff_stderr": 0.0, "rouge1_max": 0.0, "rouge1_max_stderr": 0.0, "rouge2_acc": 0.0, "rouge2_acc_stderr": 0.0, "rouge2_diff": 0.0, "rouge2_diff_stderr": 0.0, "rouge2_max": 0.0, "rouge2_max_stderr": 0.0, "rougeL_acc": 0.0, "rougeL_acc_stderr": 0.0, "rougeL_diff": 0.0, "rougeL_diff_stderr": 0.0, "rougeL_max": 0.0, "rougeL_max_stderr": 0.0}}, "versions": {"truthfulqa_gen": 0}} \ No newline at end of file diff --git a/tests/testdata/truthfulqa_gen-v1-greedy_until b/tests/testdata/truthfulqa_gen-v1-greedy_until index 08cb423cac..d5261f2213 100644 --- a/tests/testdata/truthfulqa_gen-v1-greedy_until +++ b/tests/testdata/truthfulqa_gen-v1-greedy_until @@ -1 +1 @@ -1a280973bbac2b7ac29dd64dddac474fb4749585f7de893483b4034814466c67 +1a280973bbac2b7ac29dd64dddac474fb4749585f7de893483b4034814466c67 \ No newline at end of file diff --git a/tests/testdata/truthfulqa_gen-v1-res.json b/tests/testdata/truthfulqa_gen-v1-res.json index b932ddc30b..30aa72f2ba 100644 --- a/tests/testdata/truthfulqa_gen-v1-res.json +++ b/tests/testdata/truthfulqa_gen-v1-res.json @@ -1 +1 @@ -{"results": {"truthfulqa_gen": {"bleu_acc": 0.0, "bleu_acc_stderr": 0.0, "bleu_diff": 0.0, "bleu_diff_stderr": 0.0, "bleu_max": 0.0, "bleu_max_stderr": 0.0, "bleurt_acc": 0.835985312117503, "bleurt_acc_stderr": 0.012962704327492454, "bleurt_diff": 0.14077322143090107, "bleurt_diff_stderr": 0.005459888909582694, "bleurt_max": -1.4399358725752065, "bleurt_max_stderr": 0.0022126992369197133, "rouge1_acc": 0.0, "rouge1_acc_stderr": 0.0, "rouge1_diff": 0.0, "rouge1_diff_stderr": 0.0, "rouge1_max": 0.0, "rouge1_max_stderr": 0.0, "rouge2_acc": 0.0, "rouge2_acc_stderr": 0.0, "rouge2_diff": 0.0, "rouge2_diff_stderr": 0.0, "rouge2_max": 0.0, "rouge2_max_stderr": 0.0, "rougeL_acc": 0.0, "rougeL_acc_stderr": 0.0, "rougeL_diff": 0.0, "rougeL_diff_stderr": 0.0, "rougeL_max": 0.0, "rougeL_max_stderr": 0.0}}, "versions": {"truthfulqa_gen": 1}} +{"results": {"truthfulqa_gen": {"bleu_acc": 0.0, "bleu_acc_stderr": 0.0, "bleu_diff": 0.0, "bleu_diff_stderr": 0.0, "bleu_max": 0.0, "bleu_max_stderr": 0.0, "bleurt_acc": 0.835985312117503, "bleurt_acc_stderr": 0.012962704327492454, "bleurt_diff": 0.14077322143090107, "bleurt_diff_stderr": 0.005459888909582694, "bleurt_max": -1.4399358725752065, "bleurt_max_stderr": 0.0022126992369197133, "rouge1_acc": 0.0, "rouge1_acc_stderr": 0.0, "rouge1_diff": 0.0, "rouge1_diff_stderr": 0.0, "rouge1_max": 0.0, "rouge1_max_stderr": 0.0, "rouge2_acc": 0.0, "rouge2_acc_stderr": 0.0, "rouge2_diff": 0.0, "rouge2_diff_stderr": 0.0, "rouge2_max": 0.0, "rouge2_max_stderr": 0.0, "rougeL_acc": 0.0, "rougeL_acc_stderr": 0.0, "rougeL_diff": 0.0, "rougeL_diff_stderr": 0.0, "rougeL_max": 0.0, "rougeL_max_stderr": 0.0}}, "versions": {"truthfulqa_gen": 1}} \ No newline at end of file diff --git a/tests/testdata/truthfulqa_mc-v0-loglikelihood b/tests/testdata/truthfulqa_mc-v0-loglikelihood index 9aedee2285..51303977a9 100644 --- a/tests/testdata/truthfulqa_mc-v0-loglikelihood +++ b/tests/testdata/truthfulqa_mc-v0-loglikelihood @@ -1 +1 @@ -226a6783976177dc9ceda5688623ff37023242eff30ddf270b886bf7b9b32228 +226a6783976177dc9ceda5688623ff37023242eff30ddf270b886bf7b9b32228 \ No newline at end of file diff --git a/tests/testdata/truthfulqa_mc-v0-res.json b/tests/testdata/truthfulqa_mc-v0-res.json index 56247084c8..b12b4765cc 100644 --- a/tests/testdata/truthfulqa_mc-v0-res.json +++ b/tests/testdata/truthfulqa_mc-v0-res.json @@ -1 +1 @@ -{"results": {"truthfulqa_mc": {"mc1": 0.2141982864137087, "mc1_stderr": 0.01436214815569045, "mc2": 0.465436996173817, "mc2_stderr": 0.0048422530880316405}}, "versions": {"truthfulqa_mc": 0}} +{"results": {"truthfulqa_mc": {"mc1": 0.2141982864137087, "mc1_stderr": 0.01436214815569045, "mc2": 0.465436996173817, "mc2_stderr": 0.0048422530880316405}}, "versions": {"truthfulqa_mc": 0}} \ No newline at end of file diff --git a/tests/testdata/truthfulqa_mc-v1-loglikelihood b/tests/testdata/truthfulqa_mc-v1-loglikelihood index f43b62372c..4bab2d1f4d 100644 --- a/tests/testdata/truthfulqa_mc-v1-loglikelihood +++ b/tests/testdata/truthfulqa_mc-v1-loglikelihood @@ -1 +1 @@ -1e07020e9cf41d46ed65312eb39d2b8e6599673d4f0d6b67c0d0eba0efb493bb +1e07020e9cf41d46ed65312eb39d2b8e6599673d4f0d6b67c0d0eba0efb493bb \ No newline at end of file diff --git a/tests/testdata/truthfulqa_mc-v1-res.json b/tests/testdata/truthfulqa_mc-v1-res.json index bf29a1b958..c1b1854c2e 100644 --- a/tests/testdata/truthfulqa_mc-v1-res.json +++ b/tests/testdata/truthfulqa_mc-v1-res.json @@ -1 +1 @@ -{"results": {"truthfulqa_mc": {"mc1": 0.23255813953488372, "mc1_stderr": 0.01478915753108052, "mc2": 0.4462325560722362, "mc2_stderr": 0.004986523944692003}}, "versions": {"truthfulqa_mc": 1}} +{"results": {"truthfulqa_mc": {"mc1": 0.23255813953488372, "mc1_stderr": 0.01478915753108052, "mc2": 0.4462325560722362, "mc2_stderr": 0.004986523944692003}}, "versions": {"truthfulqa_mc": 1}} \ No newline at end of file diff --git a/tests/testdata/webqs-v0-loglikelihood b/tests/testdata/webqs-v0-loglikelihood index 201bf657dc..4d604d438d 100644 --- a/tests/testdata/webqs-v0-loglikelihood +++ b/tests/testdata/webqs-v0-loglikelihood @@ -1 +1 @@ -96b218173468cc94552a0b946193bda89faba51f1bfc3e7945531f9dff8d6fe9 +96b218173468cc94552a0b946193bda89faba51f1bfc3e7945531f9dff8d6fe9 \ No newline at end of file diff --git a/tests/testdata/webqs-v0-res.json b/tests/testdata/webqs-v0-res.json index a9778832f6..9f0fdc76ca 100644 --- a/tests/testdata/webqs-v0-res.json +++ b/tests/testdata/webqs-v0-res.json @@ -1 +1 @@ -{"results": {"webqs": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"webqs": 0}} +{"results": {"webqs": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"webqs": 0}} \ No newline at end of file diff --git a/tests/testdata/wic-v0-loglikelihood b/tests/testdata/wic-v0-loglikelihood index 3f63702ced..d27430a9a2 100644 --- a/tests/testdata/wic-v0-loglikelihood +++ b/tests/testdata/wic-v0-loglikelihood @@ -1 +1 @@ -403a08da05e4c44d7e3dd3358382a7ba489c41d223e24cd1a9ed82ef1a2d004b +403a08da05e4c44d7e3dd3358382a7ba489c41d223e24cd1a9ed82ef1a2d004b \ No newline at end of file diff --git a/tests/testdata/wic-v0-res.json b/tests/testdata/wic-v0-res.json index 224f3da0ec..eadc573ed3 100644 --- a/tests/testdata/wic-v0-res.json +++ b/tests/testdata/wic-v0-res.json @@ -1 +1 @@ -{"results": {"wic": {"acc": 0.49216300940438873, "acc_stderr": 0.01980828765781383}}, "versions": {"wic": 0}} +{"results": {"wic": {"acc": 0.49216300940438873, "acc_stderr": 0.01980828765781383}}, "versions": {"wic": 0}} \ No newline at end of file diff --git a/tests/testdata/wikitext-v0-loglikelihood_rolling b/tests/testdata/wikitext-v0-loglikelihood_rolling index ee3e5942dc..f09af45a38 100644 --- a/tests/testdata/wikitext-v0-loglikelihood_rolling +++ b/tests/testdata/wikitext-v0-loglikelihood_rolling @@ -1 +1 @@ -b6f83e6cf7535ee41b0057c3e2ec2cf7f2fa5a9119b305c479a83091d1142b2c +b6f83e6cf7535ee41b0057c3e2ec2cf7f2fa5a9119b305c479a83091d1142b2c \ No newline at end of file diff --git a/tests/testdata/wikitext-v0-res.json b/tests/testdata/wikitext-v0-res.json index 2c3aa13525..9ac0c37bb5 100644 --- a/tests/testdata/wikitext-v0-res.json +++ b/tests/testdata/wikitext-v0-res.json @@ -1 +1 @@ -{"results": {"wikitext": {"bits_per_byte": 2.219817611605802e-05, "byte_perplexity": 1.0000221984224973, "word_perplexity": 1.000118710696617}}, "versions": {"wikitext": 0}} +{"results": {"wikitext": {"bits_per_byte": 2.219817611605802e-05, "byte_perplexity": 1.0000221984224973, "word_perplexity": 1.000118710696617}}, "versions": {"wikitext": 0}} \ No newline at end of file diff --git a/tests/testdata/wikitext-v1-loglikelihood_rolling b/tests/testdata/wikitext-v1-loglikelihood_rolling index ee3e5942dc..f09af45a38 100644 --- a/tests/testdata/wikitext-v1-loglikelihood_rolling +++ b/tests/testdata/wikitext-v1-loglikelihood_rolling @@ -1 +1 @@ -b6f83e6cf7535ee41b0057c3e2ec2cf7f2fa5a9119b305c479a83091d1142b2c +b6f83e6cf7535ee41b0057c3e2ec2cf7f2fa5a9119b305c479a83091d1142b2c \ No newline at end of file diff --git a/tests/testdata/wikitext-v1-res.json b/tests/testdata/wikitext-v1-res.json index 8e8e1fbd7a..122098aec2 100644 --- a/tests/testdata/wikitext-v1-res.json +++ b/tests/testdata/wikitext-v1-res.json @@ -1 +1 @@ -{"results": {"wikitext": {"bits_per_byte": 3.202519859941674e-05, "byte_perplexity": 1.0000221984224973, "word_perplexity": 1.000118710696617}}, "versions": {"wikitext": 1}} +{"results": {"wikitext": {"bits_per_byte": 3.202519859941674e-05, "byte_perplexity": 1.0000221984224973, "word_perplexity": 1.000118710696617}}, "versions": {"wikitext": 1}} \ No newline at end of file diff --git a/tests/testdata/winogrande-v0-loglikelihood b/tests/testdata/winogrande-v0-loglikelihood index b9405d7a2f..97866f6ce4 100644 --- a/tests/testdata/winogrande-v0-loglikelihood +++ b/tests/testdata/winogrande-v0-loglikelihood @@ -1 +1 @@ -90a3eff49de9173964d46f5ed57bcf9a78a72dd1bfe0e5323b25cebb40b49ea9 +90a3eff49de9173964d46f5ed57bcf9a78a72dd1bfe0e5323b25cebb40b49ea9 \ No newline at end of file diff --git a/tests/testdata/winogrande-v0-res.json b/tests/testdata/winogrande-v0-res.json index cac4dc632a..9fa7903a56 100644 --- a/tests/testdata/winogrande-v0-res.json +++ b/tests/testdata/winogrande-v0-res.json @@ -1 +1 @@ -{"results": {"winogrande": {"acc": 0.516179952644041, "acc_stderr": 0.014045126130978606}}, "versions": {"winogrande": 0}} +{"results": {"winogrande": {"acc": 0.516179952644041, "acc_stderr": 0.014045126130978606}}, "versions": {"winogrande": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt14-en-fr-v0-greedy_until b/tests/testdata/wmt14-en-fr-v0-greedy_until index 73c0f39eaa..6d48d5579e 100644 --- a/tests/testdata/wmt14-en-fr-v0-greedy_until +++ b/tests/testdata/wmt14-en-fr-v0-greedy_until @@ -1 +1 @@ -368ae7eec0f902b5123f2d5197caa5109a23942011c53fe68d9eaeee20180e46 +368ae7eec0f902b5123f2d5197caa5109a23942011c53fe68d9eaeee20180e46 \ No newline at end of file diff --git a/tests/testdata/wmt14-en-fr-v0-res.json b/tests/testdata/wmt14-en-fr-v0-res.json index b175c5405c..1aa13f0285 100644 --- a/tests/testdata/wmt14-en-fr-v0-res.json +++ b/tests/testdata/wmt14-en-fr-v0-res.json @@ -1 +1 @@ -{"results": {"wmt14-en-fr": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.011284118461117099, "chrf_stderr": 7.340651275964445e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt14-en-fr": 0}} +{"results": {"wmt14-en-fr": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.011284118461117099, "chrf_stderr": 7.340651275964445e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt14-en-fr": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt14-fr-en-v0-greedy_until b/tests/testdata/wmt14-fr-en-v0-greedy_until index ac8f4b4361..7249d39990 100644 --- a/tests/testdata/wmt14-fr-en-v0-greedy_until +++ b/tests/testdata/wmt14-fr-en-v0-greedy_until @@ -1 +1 @@ -c1d9f7283755fbdd7ecd6cc4278b0ac25a80ac256b7071ea5f839ccd038e5974 +c1d9f7283755fbdd7ecd6cc4278b0ac25a80ac256b7071ea5f839ccd038e5974 \ No newline at end of file diff --git a/tests/testdata/wmt14-fr-en-v0-res.json b/tests/testdata/wmt14-fr-en-v0-res.json index f327e96164..5261876f55 100644 --- a/tests/testdata/wmt14-fr-en-v0-res.json +++ b/tests/testdata/wmt14-fr-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt14-fr-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.01275083169440515, "chrf_stderr": 8.45474998563806e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt14-fr-en": 0}} +{"results": {"wmt14-fr-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.01275083169440515, "chrf_stderr": 8.45474998563806e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt14-fr-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt16-de-en-v0-greedy_until b/tests/testdata/wmt16-de-en-v0-greedy_until index ecdb510f51..75f1072b6e 100644 --- a/tests/testdata/wmt16-de-en-v0-greedy_until +++ b/tests/testdata/wmt16-de-en-v0-greedy_until @@ -1 +1 @@ -d30e23e38d9a45b9c31e1dfd14b58d0b7020df4b9c8a1c697aa6bc5fba8ce08a +d30e23e38d9a45b9c31e1dfd14b58d0b7020df4b9c8a1c697aa6bc5fba8ce08a \ No newline at end of file diff --git a/tests/testdata/wmt16-de-en-v0-res.json b/tests/testdata/wmt16-de-en-v0-res.json index c2d02476ca..826e0382ab 100644 --- a/tests/testdata/wmt16-de-en-v0-res.json +++ b/tests/testdata/wmt16-de-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt16-de-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.013700416764482968, "chrf_stderr": 0.00016071651360909355, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt16-de-en": 0}} +{"results": {"wmt16-de-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.013700416764482968, "chrf_stderr": 0.00016071651360909355, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt16-de-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt16-en-de-v0-greedy_until b/tests/testdata/wmt16-en-de-v0-greedy_until index 0fdfd2f668..45eaaaca8c 100644 --- a/tests/testdata/wmt16-en-de-v0-greedy_until +++ b/tests/testdata/wmt16-en-de-v0-greedy_until @@ -1 +1 @@ -d71e2074af3770e9b29ac561caf2e1c29ad6b0dc50ec2e7bcc5501747b11f0da +d71e2074af3770e9b29ac561caf2e1c29ad6b0dc50ec2e7bcc5501747b11f0da \ No newline at end of file diff --git a/tests/testdata/wmt16-en-de-v0-res.json b/tests/testdata/wmt16-en-de-v0-res.json index 9facc33e24..88bee7ffa6 100644 --- a/tests/testdata/wmt16-en-de-v0-res.json +++ b/tests/testdata/wmt16-en-de-v0-res.json @@ -1 +1 @@ -{"results": {"wmt16-en-de": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.010909486120840577, "chrf_stderr": 0.000122611124711072, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt16-en-de": 0}} +{"results": {"wmt16-en-de": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.010909486120840577, "chrf_stderr": 0.000122611124711072, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt16-en-de": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt16-en-ro-v0-greedy_until b/tests/testdata/wmt16-en-ro-v0-greedy_until index 0dd42926c0..291492556e 100644 --- a/tests/testdata/wmt16-en-ro-v0-greedy_until +++ b/tests/testdata/wmt16-en-ro-v0-greedy_until @@ -1 +1 @@ -4be7fdda313394f19b5995b00ada1dfa3bb158ee1f020ef8d07ecea260fa60b2 +4be7fdda313394f19b5995b00ada1dfa3bb158ee1f020ef8d07ecea260fa60b2 \ No newline at end of file diff --git a/tests/testdata/wmt16-en-ro-v0-res.json b/tests/testdata/wmt16-en-ro-v0-res.json index 878c584f63..babb8d2d74 100644 --- a/tests/testdata/wmt16-en-ro-v0-res.json +++ b/tests/testdata/wmt16-en-ro-v0-res.json @@ -1 +1 @@ -{"results": {"wmt16-en-ro": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.012004814364156886, "chrf_stderr": 6.424423961332661e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt16-en-ro": 0}} +{"results": {"wmt16-en-ro": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.012004814364156886, "chrf_stderr": 6.424423961332661e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt16-en-ro": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt16-ro-en-v0-greedy_until b/tests/testdata/wmt16-ro-en-v0-greedy_until index 745a5fdd81..fbcac1b7e3 100644 --- a/tests/testdata/wmt16-ro-en-v0-greedy_until +++ b/tests/testdata/wmt16-ro-en-v0-greedy_until @@ -1 +1 @@ -d1b7c50751b0d5d7470b7f49f2bab9d09792c91460fc92cc34f06617013d7c65 +d1b7c50751b0d5d7470b7f49f2bab9d09792c91460fc92cc34f06617013d7c65 \ No newline at end of file diff --git a/tests/testdata/wmt16-ro-en-v0-res.json b/tests/testdata/wmt16-ro-en-v0-res.json index 415aece638..267763793d 100644 --- a/tests/testdata/wmt16-ro-en-v0-res.json +++ b/tests/testdata/wmt16-ro-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt16-ro-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.01262029828861831, "chrf_stderr": 0.00014507496111350828, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt16-ro-en": 0}} +{"results": {"wmt16-ro-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.01262029828861831, "chrf_stderr": 0.00014507496111350828, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt16-ro-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-cs-en-v0-greedy_until b/tests/testdata/wmt20-cs-en-v0-greedy_until index 4d0ddb177a..7bcf240b70 100644 --- a/tests/testdata/wmt20-cs-en-v0-greedy_until +++ b/tests/testdata/wmt20-cs-en-v0-greedy_until @@ -1 +1 @@ -bfead9efdb1b2402a414c55929c8d8f956585f938a35466931d44e81d89cfe00 +bfead9efdb1b2402a414c55929c8d8f956585f938a35466931d44e81d89cfe00 \ No newline at end of file diff --git a/tests/testdata/wmt20-cs-en-v0-res.json b/tests/testdata/wmt20-cs-en-v0-res.json index 27bcea3ed0..70c80afe5b 100644 --- a/tests/testdata/wmt20-cs-en-v0-res.json +++ b/tests/testdata/wmt20-cs-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-cs-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.006212086270964023, "chrf_stderr": 0.0001119165191795531, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-cs-en": 0}} +{"results": {"wmt20-cs-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.006212086270964023, "chrf_stderr": 0.0001119165191795531, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-cs-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-de-en-v0-greedy_until b/tests/testdata/wmt20-de-en-v0-greedy_until index e102ccfac1..c02fb9875d 100644 --- a/tests/testdata/wmt20-de-en-v0-greedy_until +++ b/tests/testdata/wmt20-de-en-v0-greedy_until @@ -1 +1 @@ -d13b5a6915ca86ac6c6ebc50d9be0d0be3dfca600c12e896df53190d875de74d +d13b5a6915ca86ac6c6ebc50d9be0d0be3dfca600c12e896df53190d875de74d \ No newline at end of file diff --git a/tests/testdata/wmt20-de-en-v0-res.json b/tests/testdata/wmt20-de-en-v0-res.json index 36246c0f33..790424fe4f 100644 --- a/tests/testdata/wmt20-de-en-v0-res.json +++ b/tests/testdata/wmt20-de-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-de-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.006703243310670055, "chrf_stderr": 0.0001292711927988445, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-de-en": 0}} +{"results": {"wmt20-de-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.006703243310670055, "chrf_stderr": 0.0001292711927988445, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-de-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-de-fr-v0-greedy_until b/tests/testdata/wmt20-de-fr-v0-greedy_until index 3b0e21daaf..7cb9424082 100644 --- a/tests/testdata/wmt20-de-fr-v0-greedy_until +++ b/tests/testdata/wmt20-de-fr-v0-greedy_until @@ -1 +1 @@ -7f197bc281d6dbf9425900ef0dee7175021c43e355050f149f43b161c52bf0b0 +7f197bc281d6dbf9425900ef0dee7175021c43e355050f149f43b161c52bf0b0 \ No newline at end of file diff --git a/tests/testdata/wmt20-de-fr-v0-res.json b/tests/testdata/wmt20-de-fr-v0-res.json index 820b75bcd1..79a0d12fe6 100644 --- a/tests/testdata/wmt20-de-fr-v0-res.json +++ b/tests/testdata/wmt20-de-fr-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-de-fr": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.011897164096796364, "chrf_stderr": 0.00010158164726118333, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-de-fr": 0}} +{"results": {"wmt20-de-fr": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.011897164096796364, "chrf_stderr": 0.00010158164726118333, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-de-fr": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-cs-v0-greedy_until b/tests/testdata/wmt20-en-cs-v0-greedy_until index fad767f930..d14fc4939a 100644 --- a/tests/testdata/wmt20-en-cs-v0-greedy_until +++ b/tests/testdata/wmt20-en-cs-v0-greedy_until @@ -1 +1 @@ -5a34e6863bf6965afd31653de50bac5fecf58db65dbaba46921504a2b7463786 +5a34e6863bf6965afd31653de50bac5fecf58db65dbaba46921504a2b7463786 \ No newline at end of file diff --git a/tests/testdata/wmt20-en-cs-v0-res.json b/tests/testdata/wmt20-en-cs-v0-res.json index b9998954e3..2ba9db70d3 100644 --- a/tests/testdata/wmt20-en-cs-v0-res.json +++ b/tests/testdata/wmt20-en-cs-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-cs": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.009879653442394573, "chrf_stderr": 8.210293331159994e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-cs": 0}} +{"results": {"wmt20-en-cs": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.009879653442394573, "chrf_stderr": 8.210293331159994e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-cs": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-de-v0-greedy_until b/tests/testdata/wmt20-en-de-v0-greedy_until index 46a0dc5f51..c4078efd99 100644 --- a/tests/testdata/wmt20-en-de-v0-greedy_until +++ b/tests/testdata/wmt20-en-de-v0-greedy_until @@ -1 +1 @@ -b6e9c305766ea23ce1027309f83c6d4c2ce8948d70b63a7858586ca34050d7fb +b6e9c305766ea23ce1027309f83c6d4c2ce8948d70b63a7858586ca34050d7fb \ No newline at end of file diff --git a/tests/testdata/wmt20-en-de-v0-res.json b/tests/testdata/wmt20-en-de-v0-res.json index 78059f723f..183e66270a 100644 --- a/tests/testdata/wmt20-en-de-v0-res.json +++ b/tests/testdata/wmt20-en-de-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-de": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.007148103038872972, "chrf_stderr": 9.594096858911254e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-de": 0}} +{"results": {"wmt20-en-de": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.007148103038872972, "chrf_stderr": 9.594096858911254e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-de": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-iu-v0-greedy_until b/tests/testdata/wmt20-en-iu-v0-greedy_until index 3039491584..d26bb4f92a 100644 --- a/tests/testdata/wmt20-en-iu-v0-greedy_until +++ b/tests/testdata/wmt20-en-iu-v0-greedy_until @@ -1 +1 @@ -f5688199890a48f73f2cc04a2152e35190f0e0ddd40e629fa24ee39d423ea389 +f5688199890a48f73f2cc04a2152e35190f0e0ddd40e629fa24ee39d423ea389 \ No newline at end of file diff --git a/tests/testdata/wmt20-en-iu-v0-res.json b/tests/testdata/wmt20-en-iu-v0-res.json index 72dcfbeea2..22f042eb4e 100644 --- a/tests/testdata/wmt20-en-iu-v0-res.json +++ b/tests/testdata/wmt20-en-iu-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-iu": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.00011803644548940443, "chrf_stderr": 2.175287038623409e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-iu": 0}} +{"results": {"wmt20-en-iu": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.00011803644548940443, "chrf_stderr": 2.175287038623409e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-iu": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-ja-v0-greedy_until b/tests/testdata/wmt20-en-ja-v0-greedy_until index ba67a22399..9777002c79 100644 --- a/tests/testdata/wmt20-en-ja-v0-greedy_until +++ b/tests/testdata/wmt20-en-ja-v0-greedy_until @@ -1 +1 @@ -7fe61f5847a51e93e97c84b39f4420978727754e4b6cf636a27851c615857530 +7fe61f5847a51e93e97c84b39f4420978727754e4b6cf636a27851c615857530 \ No newline at end of file diff --git a/tests/testdata/wmt20-en-ja-v0-res.json b/tests/testdata/wmt20-en-ja-v0-res.json index 9b00c0fb16..57bad300d7 100644 --- a/tests/testdata/wmt20-en-ja-v0-res.json +++ b/tests/testdata/wmt20-en-ja-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-ja": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 4.1308658294778584e-05, "chrf_stderr": 2.0456539027807417e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ja": 0}} +{"results": {"wmt20-en-ja": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 4.1308658294778584e-05, "chrf_stderr": 2.0456539027807417e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ja": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-ja-v1-greedy_until b/tests/testdata/wmt20-en-ja-v1-greedy_until index ba67a22399..9777002c79 100644 --- a/tests/testdata/wmt20-en-ja-v1-greedy_until +++ b/tests/testdata/wmt20-en-ja-v1-greedy_until @@ -1 +1 @@ -7fe61f5847a51e93e97c84b39f4420978727754e4b6cf636a27851c615857530 +7fe61f5847a51e93e97c84b39f4420978727754e4b6cf636a27851c615857530 \ No newline at end of file diff --git a/tests/testdata/wmt20-en-ja-v1-res.json b/tests/testdata/wmt20-en-ja-v1-res.json index 8eda5824b7..be5e56abcf 100644 --- a/tests/testdata/wmt20-en-ja-v1-res.json +++ b/tests/testdata/wmt20-en-ja-v1-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-ja": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 4.1305928226819116e-05, "chrf_stderr": 2.0455354158878388e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ja": 1}} +{"results": {"wmt20-en-ja": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 4.1305928226819116e-05, "chrf_stderr": 2.0455354158878388e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ja": 1}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-km-v0-greedy_until b/tests/testdata/wmt20-en-km-v0-greedy_until index a77668f1ed..ddce46a79f 100644 --- a/tests/testdata/wmt20-en-km-v0-greedy_until +++ b/tests/testdata/wmt20-en-km-v0-greedy_until @@ -1 +1 @@ -eb5365c46f22ffec9a157991627d6e1fd1117fccffaedfc73619e93bafb5a408 +eb5365c46f22ffec9a157991627d6e1fd1117fccffaedfc73619e93bafb5a408 \ No newline at end of file diff --git a/tests/testdata/wmt20-en-km-v0-res.json b/tests/testdata/wmt20-en-km-v0-res.json index f9f0799431..e5ee2e9be9 100644 --- a/tests/testdata/wmt20-en-km-v0-res.json +++ b/tests/testdata/wmt20-en-km-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-km": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 1.9008351315007364e-05, "chrf_stderr": 7.136657625458525e-06, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-km": 0}} +{"results": {"wmt20-en-km": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 1.9008351315007364e-05, "chrf_stderr": 7.136657625458525e-06, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-km": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-pl-v0-greedy_until b/tests/testdata/wmt20-en-pl-v0-greedy_until index 17d5f4dade..bd431d61c4 100644 --- a/tests/testdata/wmt20-en-pl-v0-greedy_until +++ b/tests/testdata/wmt20-en-pl-v0-greedy_until @@ -1 +1 @@ -952f02575d4936d93c4d2808d86c4bf5f1f3a0901212acee6cbc1f9cbd30d39e +952f02575d4936d93c4d2808d86c4bf5f1f3a0901212acee6cbc1f9cbd30d39e \ No newline at end of file diff --git a/tests/testdata/wmt20-en-pl-v0-res.json b/tests/testdata/wmt20-en-pl-v0-res.json index 2a9f3b9621..13bfd5b552 100644 --- a/tests/testdata/wmt20-en-pl-v0-res.json +++ b/tests/testdata/wmt20-en-pl-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-pl": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.009006977773147825, "chrf_stderr": 0.00023387733367766675, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-pl": 0}} +{"results": {"wmt20-en-pl": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.009006977773147825, "chrf_stderr": 0.00023387733367766675, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-pl": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-ps-v0-greedy_until b/tests/testdata/wmt20-en-ps-v0-greedy_until index 36570999ff..77b600c49a 100644 --- a/tests/testdata/wmt20-en-ps-v0-greedy_until +++ b/tests/testdata/wmt20-en-ps-v0-greedy_until @@ -1 +1 @@ -8411c2cb73114cbd0c6e0f17eab2625d486cc3a601105deb0ea1338a401df689 +8411c2cb73114cbd0c6e0f17eab2625d486cc3a601105deb0ea1338a401df689 \ No newline at end of file diff --git a/tests/testdata/wmt20-en-ps-v0-res.json b/tests/testdata/wmt20-en-ps-v0-res.json index 98cad37e31..fcfb51f053 100644 --- a/tests/testdata/wmt20-en-ps-v0-res.json +++ b/tests/testdata/wmt20-en-ps-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-ps": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 2.1193813610582323e-06, "chrf_stderr": 2.113911466119111e-06, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ps": 0}} +{"results": {"wmt20-en-ps": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 2.1193813610582323e-06, "chrf_stderr": 2.113911466119111e-06, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ps": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-ru-v0-greedy_until b/tests/testdata/wmt20-en-ru-v0-greedy_until index ee0ab5524f..d21d39ac9f 100644 --- a/tests/testdata/wmt20-en-ru-v0-greedy_until +++ b/tests/testdata/wmt20-en-ru-v0-greedy_until @@ -1 +1 @@ -a1613831f69c1679a54670092af40ce76617b79d7cc837984803b0fc52bb8bde +a1613831f69c1679a54670092af40ce76617b79d7cc837984803b0fc52bb8bde \ No newline at end of file diff --git a/tests/testdata/wmt20-en-ru-v0-res.json b/tests/testdata/wmt20-en-ru-v0-res.json index f8da87036c..af339eda5d 100644 --- a/tests/testdata/wmt20-en-ru-v0-res.json +++ b/tests/testdata/wmt20-en-ru-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-ru": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.0007327811114614671, "chrf_stderr": 4.43155903515048e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ru": 0}} +{"results": {"wmt20-en-ru": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.0007327811114614671, "chrf_stderr": 4.43155903515048e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ru": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-ta-v0-greedy_until b/tests/testdata/wmt20-en-ta-v0-greedy_until index 47a7be0129..1b40263f15 100644 --- a/tests/testdata/wmt20-en-ta-v0-greedy_until +++ b/tests/testdata/wmt20-en-ta-v0-greedy_until @@ -1 +1 @@ -5fc556fa90bca7f1b1396e97e392eac8080b0ad53488358799b8fc0b21a94cb1 +5fc556fa90bca7f1b1396e97e392eac8080b0ad53488358799b8fc0b21a94cb1 \ No newline at end of file diff --git a/tests/testdata/wmt20-en-ta-v0-res.json b/tests/testdata/wmt20-en-ta-v0-res.json index 6b121c2e40..b04f968d76 100644 --- a/tests/testdata/wmt20-en-ta-v0-res.json +++ b/tests/testdata/wmt20-en-ta-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-ta": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.0, "chrf_stderr": 0.0, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ta": 0}} +{"results": {"wmt20-en-ta": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.0, "chrf_stderr": 0.0, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-ta": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-zh-v0-greedy_until b/tests/testdata/wmt20-en-zh-v0-greedy_until index 5a2eebe6d8..db79b7f03f 100644 --- a/tests/testdata/wmt20-en-zh-v0-greedy_until +++ b/tests/testdata/wmt20-en-zh-v0-greedy_until @@ -1 +1 @@ -67f0333ddbcb07d7a9ac12919129a18fe4fea24e4826a11bbdde4fd5ed5ed83f +67f0333ddbcb07d7a9ac12919129a18fe4fea24e4826a11bbdde4fd5ed5ed83f \ No newline at end of file diff --git a/tests/testdata/wmt20-en-zh-v0-res.json b/tests/testdata/wmt20-en-zh-v0-res.json index 18b5ff551e..24db35e62f 100644 --- a/tests/testdata/wmt20-en-zh-v0-res.json +++ b/tests/testdata/wmt20-en-zh-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-zh": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.00014170297316825535, "chrf_stderr": 6.590669847391838e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-zh": 0}} +{"results": {"wmt20-en-zh": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.00014170297316825535, "chrf_stderr": 6.590669847391838e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-zh": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-en-zh-v1-greedy_until b/tests/testdata/wmt20-en-zh-v1-greedy_until index 5a2eebe6d8..db79b7f03f 100644 --- a/tests/testdata/wmt20-en-zh-v1-greedy_until +++ b/tests/testdata/wmt20-en-zh-v1-greedy_until @@ -1 +1 @@ -67f0333ddbcb07d7a9ac12919129a18fe4fea24e4826a11bbdde4fd5ed5ed83f +67f0333ddbcb07d7a9ac12919129a18fe4fea24e4826a11bbdde4fd5ed5ed83f \ No newline at end of file diff --git a/tests/testdata/wmt20-en-zh-v1-res.json b/tests/testdata/wmt20-en-zh-v1-res.json index e06c0dbe96..a7a56daf0e 100644 --- a/tests/testdata/wmt20-en-zh-v1-res.json +++ b/tests/testdata/wmt20-en-zh-v1-res.json @@ -1 +1 @@ -{"results": {"wmt20-en-zh": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.00014170297316825535, "chrf_stderr": 6.590669847391838e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-zh": 1}} +{"results": {"wmt20-en-zh": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.00014170297316825535, "chrf_stderr": 6.590669847391838e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-en-zh": 1}} \ No newline at end of file diff --git a/tests/testdata/wmt20-fr-de-v0-greedy_until b/tests/testdata/wmt20-fr-de-v0-greedy_until index 10b82f1b97..7353ad4475 100644 --- a/tests/testdata/wmt20-fr-de-v0-greedy_until +++ b/tests/testdata/wmt20-fr-de-v0-greedy_until @@ -1 +1 @@ -8a4b65c59dcac6591d46261909ee92ebcf41c19ee7442b12842302b2d8aeb36f +8a4b65c59dcac6591d46261909ee92ebcf41c19ee7442b12842302b2d8aeb36f \ No newline at end of file diff --git a/tests/testdata/wmt20-fr-de-v0-res.json b/tests/testdata/wmt20-fr-de-v0-res.json index 3b3c168ae2..d5d06a02a3 100644 --- a/tests/testdata/wmt20-fr-de-v0-res.json +++ b/tests/testdata/wmt20-fr-de-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-fr-de": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.01143193767396364, "chrf_stderr": 0.00012555271954563658, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-fr-de": 0}} +{"results": {"wmt20-fr-de": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.01143193767396364, "chrf_stderr": 0.00012555271954563658, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-fr-de": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-iu-en-v0-greedy_until b/tests/testdata/wmt20-iu-en-v0-greedy_until index 35df9319d4..87a1981e79 100644 --- a/tests/testdata/wmt20-iu-en-v0-greedy_until +++ b/tests/testdata/wmt20-iu-en-v0-greedy_until @@ -1 +1 @@ -97bf664a8efa54b5366b8341f77b418106dd0cb26169d5b2d0144e4d3d2bc5c9 +97bf664a8efa54b5366b8341f77b418106dd0cb26169d5b2d0144e4d3d2bc5c9 \ No newline at end of file diff --git a/tests/testdata/wmt20-iu-en-v0-res.json b/tests/testdata/wmt20-iu-en-v0-res.json index ce0cf39dfa..e94cac8876 100644 --- a/tests/testdata/wmt20-iu-en-v0-res.json +++ b/tests/testdata/wmt20-iu-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-iu-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.012204628007572778, "chrf_stderr": 8.944407532175802e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-iu-en": 0}} +{"results": {"wmt20-iu-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.012204628007572778, "chrf_stderr": 8.944407532175802e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-iu-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-ja-en-v0-greedy_until b/tests/testdata/wmt20-ja-en-v0-greedy_until index 7f2363fa46..3a89d7fcdf 100644 --- a/tests/testdata/wmt20-ja-en-v0-greedy_until +++ b/tests/testdata/wmt20-ja-en-v0-greedy_until @@ -1 +1 @@ -1fd846f3c0104e794eb380dae7f648592092ab8bf59234c26d0a671bbbc28df1 +1fd846f3c0104e794eb380dae7f648592092ab8bf59234c26d0a671bbbc28df1 \ No newline at end of file diff --git a/tests/testdata/wmt20-ja-en-v0-res.json b/tests/testdata/wmt20-ja-en-v0-res.json index 4e19eda5ef..4344b7cd8a 100644 --- a/tests/testdata/wmt20-ja-en-v0-res.json +++ b/tests/testdata/wmt20-ja-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-ja-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.010703148854351403, "chrf_stderr": 0.00022242113108130186, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-ja-en": 0}} +{"results": {"wmt20-ja-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.010703148854351403, "chrf_stderr": 0.00022242113108130186, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-ja-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-km-en-v0-greedy_until b/tests/testdata/wmt20-km-en-v0-greedy_until index 867aa63de7..a6f1486610 100644 --- a/tests/testdata/wmt20-km-en-v0-greedy_until +++ b/tests/testdata/wmt20-km-en-v0-greedy_until @@ -1 +1 @@ -fb4ec81bb89c70df7e21b43e0e882915b7b71a2a85bb8d4b59e0c7938baaa4c2 +fb4ec81bb89c70df7e21b43e0e882915b7b71a2a85bb8d4b59e0c7938baaa4c2 \ No newline at end of file diff --git a/tests/testdata/wmt20-km-en-v0-res.json b/tests/testdata/wmt20-km-en-v0-res.json index cf4e8dc7a6..4f6dc98604 100644 --- a/tests/testdata/wmt20-km-en-v0-res.json +++ b/tests/testdata/wmt20-km-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-km-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.015142474534585969, "chrf_stderr": 0.0001518735048829897, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-km-en": 0}} +{"results": {"wmt20-km-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.015142474534585969, "chrf_stderr": 0.0001518735048829897, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-km-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-pl-en-v0-greedy_until b/tests/testdata/wmt20-pl-en-v0-greedy_until index 5ab5b017b7..899ce01919 100644 --- a/tests/testdata/wmt20-pl-en-v0-greedy_until +++ b/tests/testdata/wmt20-pl-en-v0-greedy_until @@ -1 +1 @@ -89274499d84176b1ffe4eaec06f2c89ca807342384dc946c2e348d00116aaade +89274499d84176b1ffe4eaec06f2c89ca807342384dc946c2e348d00116aaade \ No newline at end of file diff --git a/tests/testdata/wmt20-pl-en-v0-res.json b/tests/testdata/wmt20-pl-en-v0-res.json index b1c165bdcb..a2f5cb31be 100644 --- a/tests/testdata/wmt20-pl-en-v0-res.json +++ b/tests/testdata/wmt20-pl-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-pl-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.01353367757716276, "chrf_stderr": 0.00018386199249976465, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-pl-en": 0}} +{"results": {"wmt20-pl-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.01353367757716276, "chrf_stderr": 0.00018386199249976465, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-pl-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-ps-en-v0-greedy_until b/tests/testdata/wmt20-ps-en-v0-greedy_until index b77563a655..7776c59523 100644 --- a/tests/testdata/wmt20-ps-en-v0-greedy_until +++ b/tests/testdata/wmt20-ps-en-v0-greedy_until @@ -1 +1 @@ -c3976465e3709b4bc371175cc1494c69fe096ea4ba7d114da779d2baa0a47466 +c3976465e3709b4bc371175cc1494c69fe096ea4ba7d114da779d2baa0a47466 \ No newline at end of file diff --git a/tests/testdata/wmt20-ps-en-v0-res.json b/tests/testdata/wmt20-ps-en-v0-res.json index f01519d2dd..00c9c742e4 100644 --- a/tests/testdata/wmt20-ps-en-v0-res.json +++ b/tests/testdata/wmt20-ps-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-ps-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.015192865365105723, "chrf_stderr": 0.00011334541381539086, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-ps-en": 0}} +{"results": {"wmt20-ps-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.015192865365105723, "chrf_stderr": 0.00011334541381539086, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-ps-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-ru-en-v0-greedy_until b/tests/testdata/wmt20-ru-en-v0-greedy_until index ca51f07216..27c60fb721 100644 --- a/tests/testdata/wmt20-ru-en-v0-greedy_until +++ b/tests/testdata/wmt20-ru-en-v0-greedy_until @@ -1 +1 @@ -1477ab6542c26bd0222cc1aded174f33bf8d04d1cf6a1c0959aeca4ff3779adc +1477ab6542c26bd0222cc1aded174f33bf8d04d1cf6a1c0959aeca4ff3779adc \ No newline at end of file diff --git a/tests/testdata/wmt20-ru-en-v0-res.json b/tests/testdata/wmt20-ru-en-v0-res.json index 1a5633a53d..b6d0c71ad7 100644 --- a/tests/testdata/wmt20-ru-en-v0-res.json +++ b/tests/testdata/wmt20-ru-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-ru-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.013344639906399232, "chrf_stderr": 7.583552652374546e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-ru-en": 0}} +{"results": {"wmt20-ru-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.013344639906399232, "chrf_stderr": 7.583552652374546e-05, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-ru-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-ta-en-v0-greedy_until b/tests/testdata/wmt20-ta-en-v0-greedy_until index 96e62e8bf7..f0f6597245 100644 --- a/tests/testdata/wmt20-ta-en-v0-greedy_until +++ b/tests/testdata/wmt20-ta-en-v0-greedy_until @@ -1 +1 @@ -111ea3efdc08f1cf536631b9426c3a20e482c575d009d2a8c71f59c027578eec +111ea3efdc08f1cf536631b9426c3a20e482c575d009d2a8c71f59c027578eec \ No newline at end of file diff --git a/tests/testdata/wmt20-ta-en-v0-res.json b/tests/testdata/wmt20-ta-en-v0-res.json index 2e0c1cadd3..a2ad506bf9 100644 --- a/tests/testdata/wmt20-ta-en-v0-res.json +++ b/tests/testdata/wmt20-ta-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-ta-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.013841110664859798, "chrf_stderr": 0.00018476696850880766, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-ta-en": 0}} +{"results": {"wmt20-ta-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.013841110664859798, "chrf_stderr": 0.00018476696850880766, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-ta-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wmt20-zh-en-v0-greedy_until b/tests/testdata/wmt20-zh-en-v0-greedy_until index 8792a4c1fa..41a1e91515 100644 --- a/tests/testdata/wmt20-zh-en-v0-greedy_until +++ b/tests/testdata/wmt20-zh-en-v0-greedy_until @@ -1 +1 @@ -07dbadfd6f2b2b9462ab6187dbfaabae6e5192ab89a8e4ede9237834b9364dd1 +07dbadfd6f2b2b9462ab6187dbfaabae6e5192ab89a8e4ede9237834b9364dd1 \ No newline at end of file diff --git a/tests/testdata/wmt20-zh-en-v0-res.json b/tests/testdata/wmt20-zh-en-v0-res.json index 341812febe..11b8df7f87 100644 --- a/tests/testdata/wmt20-zh-en-v0-res.json +++ b/tests/testdata/wmt20-zh-en-v0-res.json @@ -1 +1 @@ -{"results": {"wmt20-zh-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.008438201290981157, "chrf_stderr": 0.0001109053964076822, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-zh-en": 0}} +{"results": {"wmt20-zh-en": {"bleu": 0.0, "bleu_stderr": 0.0, "chrf": 0.008438201290981157, "chrf_stderr": 0.0001109053964076822, "ter": 1.0, "ter_stderr": 0.0}}, "versions": {"wmt20-zh-en": 0}} \ No newline at end of file diff --git a/tests/testdata/wnli-v0-loglikelihood b/tests/testdata/wnli-v0-loglikelihood index 512acc556f..0c5c0b8ceb 100644 --- a/tests/testdata/wnli-v0-loglikelihood +++ b/tests/testdata/wnli-v0-loglikelihood @@ -1 +1 @@ -2ffd304d6096416eb29607e2e7642b1d6043163624967bcf4c4fc00fddc6c721 +2ffd304d6096416eb29607e2e7642b1d6043163624967bcf4c4fc00fddc6c721 \ No newline at end of file diff --git a/tests/testdata/wnli-v0-res.json b/tests/testdata/wnli-v0-res.json index 8be722cd63..8841cb74d1 100644 --- a/tests/testdata/wnli-v0-res.json +++ b/tests/testdata/wnli-v0-res.json @@ -1 +1 @@ -{"results": {"wnli": {"acc": 0.3380281690140845, "acc_stderr": 0.05653887739133514}}, "versions": {"wnli": 0}} +{"results": {"wnli": {"acc": 0.3380281690140845, "acc_stderr": 0.05653887739133514}}, "versions": {"wnli": 0}} \ No newline at end of file diff --git a/tests/testdata/wnli-v1-loglikelihood b/tests/testdata/wnli-v1-loglikelihood index e782f305e0..cbf4ad3777 100644 --- a/tests/testdata/wnli-v1-loglikelihood +++ b/tests/testdata/wnli-v1-loglikelihood @@ -1 +1 @@ -8a0f81661d2ab2334bbc8031fac31c0c8882f1d9271dd51599d21dfdbb726dea +8a0f81661d2ab2334bbc8031fac31c0c8882f1d9271dd51599d21dfdbb726dea \ No newline at end of file diff --git a/tests/testdata/wnli-v1-res.json b/tests/testdata/wnli-v1-res.json index c0b77b5469..d12348e0ae 100644 --- a/tests/testdata/wnli-v1-res.json +++ b/tests/testdata/wnli-v1-res.json @@ -1 +1 @@ -{"results": {"wnli": {"acc": 0.5633802816901409, "acc_stderr": 0.0592793555841297}}, "versions": {"wnli": 1}} +{"results": {"wnli": {"acc": 0.5633802816901409, "acc_stderr": 0.0592793555841297}}, "versions": {"wnli": 1}} \ No newline at end of file diff --git a/tests/testdata/wsc-v0-loglikelihood b/tests/testdata/wsc-v0-loglikelihood index a1fce6b236..d0d2963fe9 100644 --- a/tests/testdata/wsc-v0-loglikelihood +++ b/tests/testdata/wsc-v0-loglikelihood @@ -1 +1 @@ -45865468eff5ca31e6a050947a6b3310d9d5ed19d0f2e578a32ecaf1c768600f +45865468eff5ca31e6a050947a6b3310d9d5ed19d0f2e578a32ecaf1c768600f \ No newline at end of file diff --git a/tests/testdata/wsc-v0-res.json b/tests/testdata/wsc-v0-res.json index fbedf192d6..84be596241 100644 --- a/tests/testdata/wsc-v0-res.json +++ b/tests/testdata/wsc-v0-res.json @@ -1 +1 @@ -{"results": {"wsc": {"acc": 0.5480769230769231, "acc_stderr": 0.049038186969314335}}, "versions": {"wsc": 0}} +{"results": {"wsc": {"acc": 0.5480769230769231, "acc_stderr": 0.049038186969314335}}, "versions": {"wsc": 0}} \ No newline at end of file diff --git a/tests/testdata/wsc273-v0-loglikelihood b/tests/testdata/wsc273-v0-loglikelihood index a78c1ba104..9d592917bd 100644 --- a/tests/testdata/wsc273-v0-loglikelihood +++ b/tests/testdata/wsc273-v0-loglikelihood @@ -1 +1 @@ -26450d414c4581feb51a09882080e7a9b95882e7eab47b1751a4a6024b5a60ee +26450d414c4581feb51a09882080e7a9b95882e7eab47b1751a4a6024b5a60ee \ No newline at end of file diff --git a/tests/testdata/wsc273-v0-res.json b/tests/testdata/wsc273-v0-res.json index 726eb141d8..8f023b422a 100644 --- a/tests/testdata/wsc273-v0-res.json +++ b/tests/testdata/wsc273-v0-res.json @@ -1 +1 @@ -{"results": {"wsc273": {"acc": 0.5164835164835165, "acc_stderr": 0.0303004740355766}}, "versions": {"wsc273": 0}} +{"results": {"wsc273": {"acc": 0.5164835164835165, "acc_stderr": 0.0303004740355766}}, "versions": {"wsc273": 0}} \ No newline at end of file