Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreiNesterov committed Sep 12, 2023
1 parent cfef163 commit 3edb896
Show file tree
Hide file tree
Showing 64 changed files with 231 additions and 45,507 deletions.
91 changes: 17 additions & 74 deletions Wikidata/01_hits.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,59 +7,28 @@
"## Getting N of entities by term\n",
"* no filtering\n",
"* filtering with keywords\n",
"* filtering with keywords and statements"
"* filtering with keywords and statements"
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import csv"
"import csv\n",
"from LODlit import wd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'bows'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-2-22d0861704d7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mwd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimportlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutil\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodule_from_spec\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodules\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"LODlitParser.wd\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexec_module\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwd\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/opt/anaconda3/lib/python3.8/importlib/_bootstrap_external.py\u001b[0m in \u001b[0;36mexec_module\u001b[0;34m(self, module)\u001b[0m\n",
"\u001b[0;32m~/opt/anaconda3/lib/python3.8/importlib/_bootstrap.py\u001b[0m in \u001b[0;36m_call_with_frames_removed\u001b[0;34m(f, *args, **kwds)\u001b[0m\n",
"\u001b[0;32m~/reps/LODlit/LODlitParser/wd.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mgzip\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mwarnings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mbows\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'bows'"
]
}
],
"source": [
"# Importing LODlitParser Wikidata module \n",
"# this code is taken from\n",
"# https://stackoverflow.com/questions/67631/how-can-i-import-a-module-dynamically-given-the-full-path\n",
"import importlib.util\n",
"import sys\n",
"spec = importlib.util.spec_from_file_location(\"LODlitParser.wd\", \"/Users/anesterov/reps/LODlit/LODlitParser/wd.py\")\n",
"wd = importlib.util.module_from_spec(spec)\n",
"sys.modules[\"LODlitParser.wd\"] = wd\n",
"spec.loader.exec_module(wd)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# importing query terms\n",
"with open(\"/Users/anesterov/reps/LODlit/query_terms.json\",\"r\") as jf:\n",
"with open(\"/LODlit/query_terms.json\",\"r\") as jf:\n",
" query_terms = json.load(jf)"
]
},
Expand Down Expand Up @@ -161,7 +130,7 @@
"outputs": [],
"source": [
"# importing statements to filter out\n",
"with open(\"/Users/anesterov/reps/LODlit/Wikidata/statements_filter.json\",\"r\") as jf:\n",
"with open(\"statements_filter.json\",\"r\") as jf:\n",
" statements_filter = json.load(jf)"
]
},
Expand Down Expand Up @@ -226,7 +195,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -236,18 +205,18 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('/Users/anesterov/reps/LODlit/Wikidata/n_entities_by_term.csv')\n",
"df = pd.read_csv('n_entities_by_term.csv')\n",
"en_df = df.loc[df['lang'] == 'en']\n",
"nl_df = df.loc[df['lang'] == 'nl']"
]
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -256,7 +225,7 @@
},
{
"cell_type": "code",
"execution_count": 64,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -265,24 +234,11 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/anesterov/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexing.py:1765: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" isetter(loc, value)\n"
]
}
],
"outputs": [],
"source": [
"# EN\n",
"for lemma, wordforms in query_terms['en'].items():\n",
Expand All @@ -293,22 +249,9 @@
},
{
"cell_type": "code",
"execution_count": 65,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/anesterov/opt/anaconda3/lib/python3.8/site-packages/pandas/core/indexing.py:1765: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" isetter(loc, value)\n"
]
}
],
"outputs": [],
"source": [
"# NL\n",
"for lemma, wordforms in query_terms['nl'].items():\n",
Expand All @@ -319,7 +262,7 @@
},
{
"cell_type": "code",
"execution_count": 67,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down
38 changes: 11 additions & 27 deletions Wikidata/02_literals.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,8 @@
"metadata": {},
"outputs": [],
"source": [
"import json"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Importing LODlitParser Wikidata module \n",
"# this code is taken from\n",
"# https://stackoverflow.com/questions/67631/how-can-i-import-a-module-dynamically-given-the-full-path\n",
"import importlib.util\n",
"import sys\n",
"spec = importlib.util.spec_from_file_location(\"LODlitParser.wd\", \"/Users/anesterov/reps/LODlit/LODlitParser/wd.py\")\n",
"wd = importlib.util.module_from_spec(spec)\n",
"sys.modules[\"LODlitParser.wd\"] = wd\n",
"spec.loader.exec_module(wd)"
"import json\n",
"from LODlit import wd"
]
},
{
Expand All @@ -49,7 +33,7 @@
"outputs": [],
"source": [
"# importing query terms\n",
"with open(\"/Users/anesterov/reps/LODlit/query_terms.json\",\"r\") as jf:\n",
"with open(\"/LODlit/query_terms.json\",\"r\") as jf:\n",
" query_terms = json.load(jf)"
]
},
Expand Down Expand Up @@ -82,7 +66,7 @@
"outputs": [],
"source": [
"# importing statements to filter out\n",
"with open(\"/Users/anesterov/reps/LODlit/Wikidata/statements_filter.json\",\"r\") as jf:\n",
"with open(\"statements_filter.json\",\"r\") as jf:\n",
" statements_filter = json.load(jf)"
]
},
Expand Down Expand Up @@ -119,7 +103,7 @@
"outputs": [],
"source": [
"# exporting EN search results\n",
"with open('/Users/anesterov/wd/jan31/search_results_en.json', 'w') as jf:\n",
"with open('search_results_en.json', 'w') as jf:\n",
" json.dump(literals_en, jf)"
]
},
Expand All @@ -145,7 +129,7 @@
"outputs": [],
"source": [
"# exporting NL search results\n",
"with open('/Users/anesterov/wd/jan31/search_results_nl.json', 'w') as jf:\n",
"with open('search_results_nl.json', 'w') as jf:\n",
" json.dump(literals_nl, jf)"
]
},
Expand All @@ -164,7 +148,7 @@
"outputs": [],
"source": [
"# EN\n",
"path_to_query_results_en = '/Users/anesterov/wd/jan31/search_results_en.json'\n",
"path_to_query_results_en = 'search_results_en.json'\n",
"terms_found_en = wd.find_where_query_term_appears(path_to_query_results_en,'en')"
]
},
Expand All @@ -175,7 +159,7 @@
"outputs": [],
"source": [
"# exporting EN file\n",
"with open('/Users/anesterov/wd/jan31/terms_found_en.json', 'w') as jf:\n",
"with open('terms_found_en.json', 'w') as jf:\n",
" json.dump(terms_found_en, jf)"
]
},
Expand All @@ -186,7 +170,7 @@
"outputs": [],
"source": [
"# NL\n",
"path_to_query_results_nl = '/Users/anesterov/wd/jan31/search_results_nl.json'\n",
"path_to_query_results_nl = 'search_results_nl.json'\n",
"terms_found_nl = wd.find_where_query_term_appears(path_to_query_results_nl,'nl')"
]
},
Expand All @@ -197,14 +181,14 @@
"outputs": [],
"source": [
"# exporting NL file\n",
"with open('/Users/anesterov/wd/jan31/terms_found_nl.json', 'w') as jf:\n",
"with open('terms_found_nl.json', 'w') as jf:\n",
" json.dump(terms_found_nl, jf)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand Down
36 changes: 4 additions & 32 deletions Wikidata/03_claims.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,7 @@
"outputs": [],
"source": [
"import json\n",
"import os\n",
"from os import listdir\n",
"from os.path import isfile,join"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Importing LODlitParser Wikidata module \n",
"# this code is taken from\n",
"# https://stackoverflow.com/questions/67631/how-can-i-import-a-module-dynamically-given-the-full-path\n",
"import importlib.util\n",
"import sys\n",
"spec = importlib.util.spec_from_file_location(\"LODlitParser.wd\", \"/Users/anesterov/reps/LODlit/LODlitParser/wd.py\")\n",
"wd = importlib.util.module_from_spec(spec)\n",
"sys.modules[\"LODlitParser.wd\"] = wd\n",
"spec.loader.exec_module(wd)"
"from LODlit import wd"
]
},
{
Expand All @@ -43,9 +24,9 @@
"outputs": [],
"source": [
"# importing the search results\n",
"with open(\"/Users/anesterov/wd/jan31/terms_found_en.json\",\"r\") as jf:\n",
"with open(\"terms_found_en.json\",\"r\") as jf:\n",
" entities_en = json.load(jf)\n",
"with open(\"/Users/anesterov/wd/jan31/terms_found_nl.json\",\"r\") as jf:\n",
"with open(\"terms_found_nl.json\",\"r\") as jf:\n",
" entities_nl = json.load(jf)"
]
},
Expand All @@ -72,15 +53,6 @@
"unique_qids = list(set(qids))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(unique_qids)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -253,7 +225,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand Down
8 changes: 3 additions & 5 deletions Wikidata/bar_charts.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,10 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [],
"source": [
"bar_chart = draw_bar_chart('/Users/anesterov/reps/LODlit/n_hits/subset_wd_nl_lemmas.csv','Figure 7. NL. Lemmas by properties, absolute',True)"
"bar_chart = draw_bar_chart('/n_hits/subset_wd_nl_lemmas.csv','Figure 7. NL. Lemmas by properties, absolute',True)"
]
},
{
Expand All @@ -153,7 +151,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand Down
Loading

0 comments on commit 3edb896

Please sign in to comment.