金山词霸新旧版生词本转换程序
Pubdate:2009-09-20 14:03:50 Categories: python 3206 ViewsTags: google, python
老爸用visural Foxpro 6.0做的让外甥背单词的软件,要用到老版的金山词霸2001版的生词本。但是金山词霸和XP有些冲突。我强迫老爸用了新版的谷歌金山词霸。老爸老是报怨生词本的音标不一样了。我看了一下,新版导出的生词本是utf-16编码的。音标也是有了单独的字符。老版的是通过字体解决的。而且,VFP似乎不支持unicode。于是给老爸用python做了个转换程序,把新版的生词本转换成老版格式的。
老是劝老爸用linux,用python,他是坚决不听啊。把程序贴出来,不知道会有人有用处没?
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""金山词霸生词本转换程序
@version: $Id$
@author: U{Liu Qing}
"""
__author__ = '刘清'
__version__= '1.0'
__nonsense__ = ''
import os, sys, getopt, codecs, string
trans_dict = {u'æ':u'A',u'ɑ':u'B',u'ə':u'E',u'ʃ':u'F',u'ŋ':u'N',
u'ʌ':u'Q',u'ɔ':u'R',u'ð':u'T',u'ʒ':u'V',u'θ':u'W',u'ɛ':u'Z',u'ˈ':u"5",u'ˌ':u'7',u'ː':u':'}
def convert_phonetic(new_phonetic):
for new_char in trans_dict.keys():
if new_char in new_phonetic:
#print new_char,type(new_char)
#print unicode(new_char),trans_dict[new_char]
#print new_char,trans_dict[new_char]
new_phonetic = new_phonetic.replace(new_char,trans_dict[new_char])
return new_phonetic
if __name__ == '__main__':
tmp = raw_input(u'按回车键开始'.encode('gbk'))
inputfile = codecs.open(u'c:/Documents and Settings/lhj.WWW-61E1397B48C/桌面/我的生词本.txt','r','utf-16')
outputfile = codecs.open(u'H:\gsq\追加.txt','w','gbk')
#outputfile = codecs.open(u'c:/Documents and Settings/lhj.WWW-61E1397B48C/桌面/我的生词本old.txt','w','gbk')
for line in inputfile.readlines():
if line[0:1] == '+' :
wordline = line[1:len(line)-2] + '#'
if line[0:1] == '#':
wordline += line[1:len(line)-2]
if line[0:1] == '&' and line[1:]!=None:
line = convert_phonetic(line)
wordline += '#'
wordline += line[1:len(line)-2]
if line[0:1] == '$':
wordline += '\r\n'
try:
outputfile.write(wordline)
except UnicodeEncodeError:
print line.encode('utf-8')
print line.encode('gbk')
outputfile.close
tmp = raw_input(u'按回车键退出'.encode('gbk'))
Comments(0)