-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsm_cosine_vs_minkowski3.py
86 lines (67 loc) · 3.38 KB
/
sm_cosine_vs_minkowski3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
"""
===========================
Compare cosine and Minkowski3 distances for modelling category production data.
===========================
Dr. Cai Wingfield
---------------------------
Embodied Cognition Lab
Department of Psychology
University of Lancaster
caiwingfield.net
---------------------------
2019
---------------------------
"""
from collections import defaultdict
from os import path
from typing import Dict, DefaultDict
from numpy import array, nan
from pandas import DataFrame
from framework.cognitive_model.ldm.utils.maths import DistanceType, distance
from framework.category_production.category_production import CategoryProduction
from framework.category_production.category_production import ColNames as CPColNames
from framework.cognitive_model.preferences.preferences import Preferences
from framework.cognitive_model.sensorimotor_norms.exceptions import WordNotInNormsError
from framework.cognitive_model.sensorimotor_norms.sensorimotor_norms import SensorimotorNorms
from framework.cognitive_model.utils.logging import logger
_COSINE_DISTANCE = "Cosine distance"
_MINKOWSKI_DISTANCE = "Minkowski-3 distance"
def main():
sensorimotor_norms = SensorimotorNorms()
category_production = CategoryProduction()
main_dataframe: DataFrame = category_production.data.copy()
# category -> sm_response -> distance
cosine_distances: Dict[str, DefaultDict[str, float]] = dict()
minkowski_distances: Dict[str, DefaultDict[str, float]] = dict()
for sm_category in category_production.category_labels_sensorimotor:
cosine_distances[sm_category] = defaultdict(lambda: nan)
minkowski_distances[sm_category] = defaultdict(lambda: nan)
try:
category_sm_vector = array(sensorimotor_norms.vector_for_word(sm_category))
except WordNotInNormsError:
continue
logger.info(f"Category: {sm_category}")
for sm_response in category_production.responses_for_category(sm_category,
use_sensorimotor=True,
single_word_only=True):
try:
response_sm_vector = array(sensorimotor_norms.vector_for_word(sm_response))
except WordNotInNormsError:
continue
cosine_distances[sm_category][sm_response] = distance(category_sm_vector, response_sm_vector,
DistanceType.cosine)
minkowski_distances[sm_category][sm_response] = distance(category_sm_vector, response_sm_vector,
DistanceType.Minkowski3)
main_dataframe[_COSINE_DISTANCE] = main_dataframe.apply(
lambda row: cosine_distances[row[CPColNames.CategorySensorimotor]][row[CPColNames.ResponseSensorimotor]],
axis=1)
main_dataframe[_MINKOWSKI_DISTANCE] = main_dataframe.apply(
lambda row: minkowski_distances[row[CPColNames.CategorySensorimotor]][row[CPColNames.ResponseSensorimotor]],
axis=1)
main_dataframe.to_csv(path.join(Preferences.evaluation_dir,
"Category production fit sensorimotor",
"item-level data (cosine vs Minkowski-3).csv"),
index=False)
if __name__ == '__main__':
main()