安装 PyEnchant:
sudo pip install pyenchant
初步使用:
>>> import enchant
>>> d = enchant.Dict("en_US")
>>> d.check("Hello")
True
>>> d.check("Helo")
False
>>> d = enchant.Dict()
>>> d.tag
'en_AU'
>>> print d.tag
en_AU
>>> enchant.dict_exists("fake")
False
>>> enchant.dict_exists("en_US")
True
>>> d = enchant.request_dict("en_US")
>>> d
<enchant.Dict object at 0x2aaaabdffa50>cl
>>> enchant.list_languages()
['en', 'en_CA', 'en_GB', 'en_US', 'eo', 'fr', 'fr_CH', 'fr_FR']
# 拼写检查,给出所有可能的情况
>>> d.suggest("Helo")
['He lo', 'He-lo', 'Hello', 'Helot', 'Help', 'Halo', 'Hell', 'Held', 'Helm', 'Hero', "He'll"]
# 自定义字典,字典内容直接就是一行一个单词
pwl = enchant.request_pwl_dict("mywords.txt")
# 将自定义字典和系统内置字典结合起来
>>> d2 = enchant.DictWithPWL("en_US","mywords.txt")
>>> d2.check("Hello")
True
我写的一段支持拼音的纠错功能:
from collections import defaultdict
from xpinyin import Pinyin
import enchant
class SpellCheck:
instance = None # 单实例
pwl = None # 中文 enchant对象
pwl_pinyin = None # 拼音 enchant对象
pinyin_map = None # 拼音对汉字的字典
all_words = [] # 所有中文词
all_pinyins = [] # 所有拼音
@classmethod
def sugguest(self, word):
''''
拼写纠错
:param word: 需要纠正我词,中文或是拼音
:return: 返回所有可能正确的词,列表
'''
spell_check = SpellCheck.get_instance()
in_words = [w for w in spell_check.all_words if word in w] # 完全匹配的
zh_sugguests = sorted(in_words, key=lambda x: len(x))
zh_sugguests2 = spell_check.pwl.suggest(word) # 拼写纠错的
zh_sugguests.extend(zh_sugguests2)
result = filter(lambda x: x not in word, zh_sugguests)
p = Pinyin()
py = p.get_pinyin(word)
in_words = [w for w in spell_check.all_pinyins if py in w] # 完全匹配的-拼音
py_sugguests = sorted(in_words, key=lambda x: len(x))
py_sugguests2 = spell_check.pwl_pinyin.suggest(py) # 拼写纠错的-拼音
py_sugguests.extend(py_sugguests2)
for sugguest in py_sugguests:
result.extend(spell_check.pinyin_map[sugguest])
result = list(set(result))
return result
官方教程:http://pythonhosted.org/pyenchant/tutorial.html
其它类似的库有,Hunspell,ASpell,ISpell等。其中 Hunspell 为 Chrome、Firefox、OpenOffice 等做为拼写检查器。