summaryrefslogtreecommitdiff
path: root/maintenance/language/zhtable/Makefile.py
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@sbcglobal.net>2016-05-01 15:32:59 -0400
committerLuke Shumaker <lukeshu@sbcglobal.net>2016-05-01 15:32:59 -0400
commit6dc1997577fab2c366781fd7048144935afa0012 (patch)
tree8918d28c7ab4342f0738985e37af1dfc42d0e93a /maintenance/language/zhtable/Makefile.py
parent150f94f051128f367bc89f6b7e5f57eb2a69fc62 (diff)
parentfa89acd685cb09cdbe1c64cbb721ec64975bbbc1 (diff)
Merge commit 'fa89acd'
# Conflicts: # .gitignore # extensions/ArchInterWiki.sql
Diffstat (limited to 'maintenance/language/zhtable/Makefile.py')
-rw-r--r--maintenance/language/zhtable/Makefile.py13
1 files changed, 8 insertions, 5 deletions
diff --git a/maintenance/language/zhtable/Makefile.py b/maintenance/language/zhtable/Makefile.py
index 71641ef1..4ab57d40 100644
--- a/maintenance/language/zhtable/Makefile.py
+++ b/maintenance/language/zhtable/Makefile.py
@@ -203,13 +203,16 @@ def customRules( path ):
fp = open( path, 'r', encoding = 'U8' )
ret = dict()
for line in fp:
- elems = line.split( '#' )[0].split()
+ line = line.rstrip( '\r\n' )
+ if '#' in line:
+ line = line.split( '#' )[0].rstrip()
+ elems = line.split( '\t' )
if len( elems ) > 1:
ret[elems[0]] = elems[1]
return ret
def dictToSortedList( src_table, pos ):
- return sorted( src_table.items(), key = lambda m: m[pos] )
+ return sorted( src_table.items(), key = lambda m: ( m[pos], m[1 - pos] ) )
def translate( text, conv_table ):
i = 0
@@ -229,7 +232,7 @@ def manualWordsTable( path, conv_table, reconv_table ):
reconv_table = {}
wordlist = [line.split( '#' )[0].strip() for line in fp]
wordlist = list( set( wordlist ) )
- wordlist.sort( key = len, reverse = True )
+ wordlist.sort( key = lambda w: ( len(w), w ), reverse = True )
while wordlist:
word = wordlist.pop()
new_word = translate( word, conv_table )
@@ -241,7 +244,7 @@ def manualWordsTable( path, conv_table, reconv_table ):
def defaultWordsTable( src_wordlist, src_tomany, char_conv_table, char_reconv_table ):
wordlist = list( src_wordlist )
- wordlist.sort( key = len, reverse = True )
+ wordlist.sort( key = lambda w: ( len(w), w ), reverse = True )
word_conv_table = {}
word_reconv_table = {}
conv_table = char_conv_table.copy()
@@ -276,7 +279,7 @@ def PHPArray( table ):
def main():
#Get Unihan.zip:
url = 'http://www.unicode.org/Public/%s/ucd/Unihan.zip' % UNIHAN_VER
- han_dest = 'Unihan.zip'
+ han_dest = 'Unihan-%s.zip' % UNIHAN_VER
download( url, han_dest )
# Get scim-tables-$(SCIM_TABLES_VER).tar.gz: