Skip to content

Commit

Permalink
split chinese character range into smaller ones that dont exceed Vim'…
Browse files Browse the repository at this point in the history
…s limit (#130) :|
  • Loading branch information
SidOfc committed Sep 28, 2020
1 parent d6cd5e2 commit cf8a451
Showing 1 changed file with 22 additions and 1 deletion.
23 changes: 22 additions & 1 deletion autoload/mkdx.vim
Original file line number Diff line number Diff line change
Expand Up @@ -872,6 +872,27 @@ fun! s:util.ToggleLineType(line, type)
return substitute(a:line, repl[0], repl[1], repl[2])
endfun

" work around vim limitation of max char range size of 256 for chinese:
" http://vim.1045645.n5.nabble.com/how-to-match-all-Chinese-chars-td5708582.html
" solution by Christian Brabandt, modified by me for use in a single character
" group
function! s:util.split_into_ranges(start, end)
let start = '0x'. a:start
let end = '0x'. a:end
let patt = ''
while (end - start) > 256
let temp = start + 256
let patt .= printf('\u%X-\u%X', start, temp)
let start = temp + 1
endwhile

if (end - start) > 0
let patt .= printf('\u%X-\u%X', start, end)
endif

return patt
endfunction

let s:util.transformations = {
\ 'trailing-space': [[' \+$', '', 'g']],
\ 'escape-tags': [['>', '\&gt;', 'g'], ['<', '\&lt;', 'g']],
Expand All @@ -881,7 +902,7 @@ let s:util.transformations = {
\ 'clean-header': [['^[ {{tokens.header}}]\+\| \+$', '', 'g'], ['\[!\[\([^\]]\+\)\](\([^\)]\+\))\](\([^\)]\+\))', '', 'g'],
\ ['<a.*>\(.*\)</a>', '\1', 'g'], ['!\?\[\([^\]]\+\)]([^)]\+)', '\1', 'g']],
\ 'header-to-hash': [['`<kbd>\(.*\)<\/kbd>`', 'kbd\1kbd', 'g'], ['<kbd>\(.*\)<\/kbd>', '\1', 'g'],
\ ['\%#=2[^0-9[:lower:]\u4e00-\u9fff_\- ]\+', '', 'g'], ['[.,!@#$%^&*()=+"]', '', 'g'], [' ', '-', 'g']],
\ ['\%#=2[^0-9[:lower:]' . s:util.split_into_ranges('4e00', '9fbb') . '_\- ]\+', '', 'g'], ['[.,!@#$%^&*()=+"]', '', 'g'], [' ', '-', 'g']],
\ 'toggle-quote': [['^\(> \)\?', '\=(submatch(1) == "> " ? "" : "> ")', '']]
\ }

Expand Down

0 comments on commit cf8a451

Please sign in to comment.