-
Notifications
You must be signed in to change notification settings - Fork 18
/
pinyin-comp
executable file
·263 lines (203 loc) · 7.29 KB
/
pinyin-comp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
complete path by acronym of pinyin initials
a fork of chsdir by [email protected]
(http://code.google.com/p/easyscripts/wiki/chsdir)
用拼音补全命令行中的中文名称和路径
实验目录:$ ls ./
SVN培训 全球眼 浙江建行 浙江农信
使用: (输完后按 TAB 键自动补全)
cd S <tab> 进入[SVN培训]
cd q <tab> 进入[全球眼]
cd z <tab> 自动补全[浙江]
cd zj <tab><tab> 提示[浙江建行 浙江农信]备选
cd zj1 <tab> 进入[浙江建行]
cd zj2 <tab> 进入[浙江农信]
cd 浙江j <tab> 进入[浙江建行]
cd zjj <tab> 进入[浙江建行]
如需模糊拼音支持,定义环境变量FUZZY
例如,对 n/l 进行模糊匹配:export FUZZY="{ 'n':'l' }"
多个匹配对之间用逗号隔开: export FUZZY="{ 'n':'l','f':'p' }"
Changelog
2009-05-08修订 取不到拼音的汉字匹配任何字符
2009-05-07新增 名称中有中文全角,可以用英文符号补全
2009-05-06修订 解决数字序号定位时借位的问题
2009-08-30新增 增加多音字支持,比如“音乐”
((遇到有未被收入的多音字,请邮件联系我))
2009-08-31新增 增加模糊拼音支持,比如'n'->'l'
((需手工增加环境变量,配置方法见后面说明))
2010-11-17 fork :D
2010-11-18 use pre-generated pinyin table to get pinyin initial,
it make the logic cleaner and easier to maintain,
though it introduce extra delay which user can hardly feel.
"""
import os
import sys
import locale
import pinyin
_, default_encoding = locale.getdefaultlocale()
DOUBLE_WIDTH = {
u"~" : u"~" ,
u"!" : u"!" ,
u"@" : u"@" ,
u"#" : u"#" ,
u"$" : u"$" ,
u"%" : u"%" ,
u"&" : u"&" ,
u"*" : u"*" ,
u"(" : u"(" ,
u")" : u")" ,
u"_" : u"_" ,
u"-" : u"-" ,
u"+" : u"+" ,
u"[" : u"[" ,
u"]" : u"]" ,
u"<" : u"<" ,
u">" : u">" ,
u"?" : u"?" ,
u"," : u"," ,
u"。" : u"." ,
u"/" : u"/" ,
u"、" : "u" ,
}
# fuzzy pinyin
FUZZY = {}
try:
FUZZY = eval( os.getenv("FUZZY") )
FUZZY.keys()
except StandardError:
FUZZY = {}
def transform_double_width(uni_char):
"transform double-width char into its single-width equivalent "
try :
return DOUBLE_WIDTH[uni_char]
except KeyError:
return uni_char
def fuzzynize(pinyin):
"fuzzy one pinyin to another"
try :
return FUZZY[pinyin]
except KeyError:
return pinyin
def get_pinyin_initials(uni_char):
"get the initial of Chinese pinyin"
try:
pinyin_initial = pinyin.pinyin_initial[uni_char]
return "".join(pinyin_initial)
except KeyError:
return uni_char
def acronymize(uni_char):
"get single-byte acronym for one unicode char"
# replace double-width chars with its single-width equivalents
uni_char = transform_double_width(uni_char)
# if ascii, return immediately
if uni_char < u"\x80" :
return uni_char
pinyin_initial = get_pinyin_initials(uni_char)
# special case for chars having polyphone
if len(pinyin_initial) > 1:
return u"`%s`" % pinyin_initial
# for most cases.
return fuzzynize( pinyin_initial )
def get_acronym(text):
"get acronym for text string"
text = unicodelize(text)
acronym = u""
for char in text:
acronym += acronymize(char)
return acronym
def unicodelize(text):
"try to convert string into unicode string."
if not isinstance(text, unicode):
try:
return unicode(text, default_encoding)
except UnicodeDecodeError:
pass
return text
def stringlize(text):
"try to convert unicode string back into string"
if isinstance(text, unicode):
try:
return text.encode(default_encoding)
except UnicodeEncodeError:
pass
return text
def expand_leading_tilda(path):
"expand leading ~/ or ~user/"
return os.path.expanduser(path)
if __name__ == '__main__':
# chsdir <dirattr> <already_input_part>
if len(sys.argv) != 3 :
sys.exit(1)
dironly = sys.argv[1]
path = sys.argv[2].replace("\\","")
path = expand_leading_tilda(path)
path = unicodelize(path)
# support fuzzy pinyin
path = "".join( [ fuzzynize(x) for x in path] )
index = None
effective_path = path
# deal with special form such as 'xxx/zj1'
if len(path) > 1 and '0' < path[-1] <= '9':
index = int(path[-1])
effective_path = path[:-1]
dirname = os.path.dirname(path)
basename = os.path.basename(path)
effective_basename = os.path.basename(effective_path)
if not dirname :
dirname = u"./"
# get all top-level subentries(non-recursive)
try:
entries = os.listdir(dirname)
except OSError:
sys.exit(0)
# if an entry with the exact basename already exist, do nothing
if basename in entries or effective_basename in entries :
sys.exit(0)
basename_acronym = get_acronym(effective_basename).replace("\\","")
reply = []
for entry in entries:
entry_acronym = get_acronym(entry).replace("\\","")
# ignore entry which does not contain Chinese character.
if entry_acronym == entry :
continue
i = j = 0
while i < len(basename_acronym) and j < len(entry_acronym) :
# dealing with polyphone
if entry_acronym[j] == "`":
end = entry_acronym.index("`", j+1)
if entry_acronym.find( basename_acronym[i], j, end ) > 0 :
i += 1
j = end + 1
continue
else:
if ( basename_acronym[i] == entry_acronym[j] or
basename_acronym[i] == "?" ):
i += 1
j += 1
continue
if basename_acronym[i] != entry[i] :
break
# one match is found
if i == len(basename_acronym) :
candicate = os.path.join(dirname, entry).replace("./", "")
# if the caller is only interested with folders
if dironly == "x-d" and not os.path.isdir(candicate):
continue
reply.append( candicate )
try:
locale.setlocale(locale.LC_ALL, "")
except StandardError:
pass
# when dealing with outer world, always use native encoding
reply = [ stringlize(x) for x in reply]
reply.sort( key=locale.strxfrm )
if index :
try:
print ( reply[index - 1] )
except IndexError:
# return the last candidate when index is out of range
print ( reply[:-1])
else:
print ( "\n".join(reply) )