-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path1_3_pruning_quantile_lengths.py
83 lines (67 loc) · 3.2 KB
/
1_3_pruning_quantile_lengths.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/Users/cai/Applications/miniconda3/bin/python
"""
===========================
Edge lengths by quantile and graph size.
===========================
Dr. Cai Wingfield
---------------------------
Embodied Cognition Lab
Department of Psychology
University of Lancaster
caiwingfield.net
---------------------------
2018
---------------------------
"""
import argparse
import sys
from os import path
from pandas import DataFrame
from framework.cli.lookups import get_corpus_from_name, get_model_from_params
from framework.cognitive_model.ldm.corpus.indexing import FreqDist
from framework.cognitive_model.ldm.utils.maths import DistanceType
from framework.cognitive_model.utils.logging import logger
from framework.cognitive_model.graph import iter_edges_from_edgelist
from framework.cognitive_model.utils.maths import nearest_value_at_quantile
from framework.cognitive_model.preferences.preferences import Preferences
def main(n_words: int, length_factor: int, corpus_name: str, distance_type_name: str, model_name: str, radius: int):
logger.info("")
corpus = get_corpus_from_name(corpus_name)
freq_dist = FreqDist.load(corpus.freq_dist_path)
distance_type = DistanceType.from_name(distance_type_name)
distributional_model = get_model_from_params(corpus, freq_dist, model_name, radius)
graph_file_name = f"{distributional_model.name} {distance_type.name} {n_words} words length {length_factor}.edgelist"
quantile_file_name = f"{distributional_model.name} {distance_type.name} {n_words} words length {length_factor} edge length quantiles.csv"
data = []
# Prune by quantile
for i, top_quantile in enumerate([.0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1.0]):
pruning_length = nearest_value_at_quantile(
[length for _edge, length in iter_edges_from_edgelist(path.join(Preferences.graphs_dir, graph_file_name))],
top_quantile)
logger.info(f"Edges above the {int(100*top_quantile)}% percentile are those longer than {pruning_length}).")
data.append((top_quantile, pruning_length))
# Save the data
DataFrame(data, columns=["Top quantile", "Pruning length"]).to_csv(
path.join(Preferences.graphs_dir, quantile_file_name),
header=True,
index=False
)
if __name__ == '__main__':
logger.info("Running %s" % " ".join(sys.argv))
parser = argparse.ArgumentParser(description="Run temporal spreading activation on a graph.")
parser.add_argument("-l", "--length_factor", required=True, type=int)
parser.add_argument("-c", "--corpus_name", required=True, type=str)
parser.add_argument("-d", "--distance_type", required=True, type=str)
parser.add_argument("-m", "--model_name", required=True, type=str)
parser.add_argument("-r", "--radius", required=True, type=int)
parser.add_argument("-w", "--words", type=int, required=True,
help="The number of words to use from the corpus. (Top n words.)")
args = parser.parse_args()
main(length_factor=args.length_factor,
corpus_name=args.corpus_name,
model_name=args.model_name,
radius=args.radius,
distance_type_name=args.distance_type,
n_words=args.words)
logger.info("Done!")