-
Notifications
You must be signed in to change notification settings - Fork 0
/
misspelling.py
150 lines (130 loc) · 6.42 KB
/
misspelling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# -*- coding: utf-8 -*-
"""
This script works similar to solve_disambiguation.py. It is supposed to fix
links that contain common spelling mistakes. This is only possible on wikis
that have a template for these misspellings.
Command line options:
-always:XY instead of asking the user what to do, always perform the same
action. For example, XY can be "r0", "u" or "2". Be careful with
this option, and check the changes made by the bot. Note that
some choices for XY don't make sense and will result in a loop,
e.g. "l" or "m".
-start:XY goes through all misspellings in the category on your wiki
that is defined (to the bot) as the category containing
misspelling pages, starting at XY. If the -start argument is not
given, it starts at the beginning.
-main only check pages in the main namespace, not in the talk,
wikipedia, user, etc. namespaces.
"""
__version__ = '$Id$'
# (C) Daniel Herding, 2007
#
# Distributed under the terms of the MIT license.
import wikipedia as pywikibot
import catlib, pagegenerators
import solve_disambiguation
class MisspellingRobot(solve_disambiguation.DisambiguationRobot):
misspellingTemplate = {
'da': None, # uses simple redirects
'de': u'Falschschreibung',
#'en': u'Template:Misspelling', # rarely used on en:
'en': None, # uses simple redirects
'hu': None, # uses simple redirects
'nl': None,
#'pt': u'Pseudo-redirect', # replaced by another system on pt:
}
# Optional: if there is a category, one can use the -start
# parameter.
misspellingCategory = {
'da': u'Omdirigeringer af fejlstavninger', # only contains date redirects at the moment
'de': u'Kategorie:Wikipedia:Falschschreibung',
'en': u'Redirects from misspellings',
'hu': u'Átirányítások hibás névről',
'nl': u'Categorie:Wikipedia:Redirect voor spelfout',
#'pt': u'Categoria:!Pseudo-redirects',
}
msg = {
'ar': u'روبوت: إصلاح وصلة خاطئة إلى %s',
'da': u'Omdirigeringer af fejlstavninger',
'de': u'Bot: korrigiere Link auf Falschschreibung: %s',
'en': u'Robot: Fixing misspelled link to %s',
'he': u'בוט: מתקן קישור עם שגיאה לדף %s',
'nds': u'Bot: rut mit verkehrt schreven Lenk op %s',
'nl': u'Bot: verkeerd gespelde verwijzing naar %s gecorrigeerd',
'pl': u'Robot poprawia literówkę w linku do %s',
'pt': u'Bot: Corrigindo link com erro ortográfico para %s'
}
def __init__(self, always, firstPageTitle, main_only):
solve_disambiguation.DisambiguationRobot.__init__(
self, always, [], True, False, self.createPageGenerator(firstPageTitle),
False, main_only)
def createPageGenerator(self, firstPageTitle):
if pywikibot.getSite().lang in self.misspellingCategory:
misspellingCategoryTitle = self.misspellingCategory[pywikibot.getSite().lang]
misspellingCategory = catlib.Category(pywikibot.getSite(),
misspellingCategoryTitle)
generator = pagegenerators.CategorizedPageGenerator(
misspellingCategory, recurse = True, start=firstPageTitle)
else:
misspellingTemplateName = 'Template:%s' \
% self.misspellingTemplate[pywikibot.getSite().lang]
misspellingTemplate = pywikibot.Page(pywikibot.getSite(),
misspellingTemplateName)
generator = pagegenerators.ReferringPageGenerator(
misspellingTemplate, onlyTemplateInclusion=True)
if firstPageTitle:
pywikibot.output(
u'-start parameter unsupported on this wiki because there is no category for misspellings.')
preloadingGen = pagegenerators.PreloadingGenerator(generator)
return preloadingGen
# Overrides the DisambiguationRobot method.
def findAlternatives(self, disambPage):
if disambPage.isRedirectPage():
self.alternatives.append(disambPage.getRedirectTarget().title())
return True
elif self.misspellingTemplate[disambPage.site().lang] is not None:
for templateName, params in disambPage.templatesWithParams():
if templateName in self.misspellingTemplate[pywikibot.getSite().lang]:
# The correct spelling is in the last paramter.
correctSpelling = params[-1]
# On de.wikipedia, there are some cases where the
# misspelling is ambigous, see for example:
# http://de.wikipedia.org/wiki/Buthan
for match in self.linkR.finditer(correctSpelling):
self.alternatives.append(match.group('title'))
if not self.alternatives:
# There were no links in the parameter, so there is
# only one correct spelling.
self.alternatives.append(correctSpelling)
return True
# Overrides the DisambiguationRobot method.
def setSummaryMessage(self, disambPage, new_targets=[], unlink=False, dn=False):
# TODO: setSummaryMessage() in solve_disambiguation now has parameters
# new_targets and unlink. Make use of these here.
comment = pywikibot.translate(self.mysite, self.msg) \
% disambPage.title()
pywikibot.setAction(comment)
def main():
# the option that's always selected when the bot wonders what to do with
# a link. If it's None, the user is prompted (default behaviour).
always = None
main_only = False
firstPageTitle = None
for arg in pywikibot.handleArgs():
if arg.startswith('-always:'):
always = arg[8:]
elif arg.startswith('-start'):
if len(arg) == 6:
firstPageTitle = pywikibot.input(
u'At which page do you want to start?')
else:
firstPageTitle = arg[7:]
elif arg == '-main':
main_only = True
bot = MisspellingRobot(always, firstPageTitle, main_only)
bot.run()
if __name__ == "__main__":
try:
main()
finally:
pywikibot.stopme()