-
Notifications
You must be signed in to change notification settings - Fork 4
/
thesis.toc
77 lines (77 loc) · 6.96 KB
/
thesis.toc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
\contentsline {section}{\numberline {1}Introduction}{1}{section.1}
\contentsline {section}{\numberline {2}Low Resource Languages}{5}{section.2}
\contentsline {subsection}{\numberline {2.1}Definitions}{5}{subsection.2.1}
\contentsline {subsubsection}{\numberline {2.1.1}Endangered, revitalised, and extinct languages}{5}{subsubsection.2.1.1}
\contentsline {subsubsection}{\numberline {2.1.2}Official, \textit {de facto}, \textit {de jure}, majority, and minority languages}{10}{subsubsection.2.1.2}
\contentsline {subsubsection}{\numberline {2.1.3}Low resource, under resourced and incident languages}{12}{subsubsection.2.1.3}
\contentsline {subsubsection}{\numberline {2.1.4}Computer languages}{14}{subsubsection.2.1.4}
\contentsline {subsection}{\numberline {2.2}Metrics for language vitality}{14}{subsection.2.2}
\contentsline {subsubsection}{\numberline {2.2.1}The Graded Intergenerational Disruption Scale (GIDS)}{15}{subsubsection.2.2.1}
\contentsline {subsubsection}{\numberline {2.2.2}The UNESCO measurement scale}{15}{subsubsection.2.2.2}
\contentsline {subsubsection}{\numberline {2.2.3}The Extended GIDS (EGIDS)}{18}{subsubsection.2.2.3}
\contentsline {subsubsection}{\numberline {2.2.4}The Language Endangerment Index (LEI)}{23}{subsubsection.2.2.4}
\contentsline {subsubsection}{\numberline {2.2.5}A response to qualitative metrics}{24}{subsubsection.2.2.5}
\contentsline {subsection}{\numberline {2.3}Digital presence}{26}{subsection.2.3}
\contentsline {subsubsection}{\numberline {2.3.1}Finding resources on the web}{27}{subsubsection.2.3.1}
\contentsline {subsubsection}{\numberline {2.3.2}Metrics for digital presence}{29}{subsubsection.2.3.2}
\contentsline {paragraph}{\citepos {kornai2013digital} metric}{29}{section*.8}
\contentsline {paragraph}{\citepos {gibson2016assessing} extension}{31}{section*.10}
\contentsline {paragraph}{\citepos {soria2017digital} metric}{32}{section*.11}
\contentsline {subsection}{\numberline {2.4}Summary}{34}{subsection.2.4}
\contentsline {section}{\numberline {3}Language Resources}{36}{section.3}
\contentsline {subsection}{\numberline {3.1}Types of language resources}{36}{subsection.3.1}
\contentsline {subsubsection}{\numberline {3.1.1}Corpora}{36}{subsubsection.3.1.1}
\contentsline {subsubsection}{\numberline {3.1.2}Code}{39}{subsubsection.3.1.2}
\contentsline {subsection}{\numberline {3.2}Resource aggregators}{41}{subsection.3.2}
\contentsline {subsection}{\numberline {3.3}BLARK and LRE maps}{44}{subsection.3.3}
\contentsline {subsection}{\numberline {3.4}Who makes resources for languages?}{49}{subsection.3.4}
\contentsline {subsection}{\numberline {3.5}Non-English programming languages}{56}{subsection.3.5}
\contentsline {subsection}{\numberline {3.6}Summary}{57}{subsection.3.6}
\contentsline {section}{\numberline {4}Open Source Code}{58}{section.4}
\contentsline {subsection}{\numberline {4.1}Defining {\it open source}}{58}{subsection.4.1}
\contentsline {subsection}{\numberline {4.2}Open source licenses}{62}{subsection.4.2}
\contentsline {subsection}{\numberline {4.3}Where is open source code?}{64}{subsection.4.3}
\contentsline {subsection}{\numberline {4.4}Digital permanence and storage}{67}{subsection.4.4}
\contentsline {subsection}{\numberline {4.5}Funding}{69}{subsection.4.5}
\contentsline {subsection}{\numberline {4.6}Summary}{71}{subsection.4.6}
\contentsline {section}{\numberline {5}Open Source Code for Low Resource Languages}{73}{section.5}
\contentsline {subsection}{\numberline {5.1}Case study: Mapping linguistic co\"ordinates}{73}{subsection.5.1}
\contentsline {subsection}{\numberline {5.2}LRL NLP available through data providers}{79}{subsection.5.2}
\contentsline {subsubsection}{\numberline {5.2.1}Few to no computational resources}{79}{subsubsection.5.2.1}
\contentsline {subsubsection}{\numberline {5.2.2}Some scoped computational resources}{80}{subsubsection.5.2.2}
\contentsline {subsubsection}{\numberline {5.2.3}Many computational resources}{82}{subsubsection.5.2.3}
\contentsline {subsubsection}{\numberline {5.2.4}Summary of computational resources}{82}{subsubsection.5.2.4}
\contentsline {subsection}{\numberline {5.3}Linked open data}{83}{subsection.5.3}
\contentsline {subsection}{\numberline {5.4}Multilingual NLP libraries}{85}{subsection.5.4}
\contentsline {subsection}{\numberline {5.5}A GitHub database for open source code}{88}{subsection.5.5}
\contentsline {subsection}{\numberline {5.6}Summary}{92}{subsection.5.6}
\contentsline {section}{\numberline {6}Case Studies}{94}{section.6}
\contentsline {subsection}{\numberline {6.1}Scottish Gaelic}{94}{subsection.6.1}
\contentsline {subsubsection}{\numberline {6.1.1}Language vitality status}{95}{subsubsection.6.1.1}
\contentsline {subsubsection}{\numberline {6.1.2}Language resources}{96}{subsubsection.6.1.2}
\contentsline {subsection}{\numberline {6.2}Naskapi}{101}{subsection.6.2}
\contentsline {subsubsection}{\numberline {6.2.1}Language background}{102}{subsubsection.6.2.1}
\contentsline {subsubsection}{\numberline {6.2.2}Language vitality status}{103}{subsubsection.6.2.2}
\contentsline {subsubsection}{\numberline {6.2.3}Orthography}{104}{subsubsection.6.2.3}
\contentsline {subsubsection}{\numberline {6.2.4}Corpora creation}{107}{subsubsection.6.2.4}
\contentsline {subsection}{\numberline {6.3}Summary}{109}{subsection.6.3}
\contentsline {section}{\numberline {7}Best Practice Recommendations}{110}{section.7}
\contentsline {subsection}{\numberline {7.1}Choosing a license}{110}{subsection.7.1}
\contentsline {subsection}{\numberline {7.2}Choosing repositories}{112}{subsection.7.2}
\contentsline {subsection}{\numberline {7.3}Sharing code without a platform}{114}{subsection.7.3}
\contentsline {subsection}{\numberline {7.4}Summary}{117}{subsection.7.4}
\contentsline {section}{\numberline {8}Discussion}{118}{section.8}
\contentsline {subsection}{\numberline {8.1}Is digital presence necessary?}{118}{subsection.8.1}
\contentsline {subsection}{\numberline {8.2}Ethics and open source}{119}{subsection.8.2}
\contentsline {subsection}{\numberline {8.3}Data and privacy}{120}{subsection.8.3}
\contentsline {subsection}{\numberline {8.4}Open Source as a tool for saving languages}{123}{subsection.8.4}
\contentsline {section}{\numberline {9}Future Work}{125}{section.9}
\contentsline {subsection}{\numberline {9.1}Extending databases of FLOSS code for LRLs}{125}{subsection.9.1}
\contentsline {subsection}{\numberline {9.2}Rethinking metrics for digital presence}{125}{subsection.9.2}
\contentsline {subsection}{\numberline {9.3}Rethinking language diversity and typological relation}{126}{subsection.9.3}
\contentsline {subsection}{\numberline {9.4}Metrics for code usage in LREC or ACL papers}{127}{subsection.9.4}
\contentsline {subsection}{\numberline {9.5}Development of a p2p storage system for linguistics code}{127}{subsection.9.5}
\contentsline {subsection}{\numberline {9.6}Extending Gaelic and Naskapi resources}{128}{subsection.9.6}
\contentsline {subsection}{\numberline {9.7}Beyond Ethnologue}{128}{subsection.9.7}
\contentsline {section}{\numberline {10}Conclusion}{130}{section.10}
\contentsline {section}{References}{132}{section*.20}