安装 PyEnchant:
sudo pip install pyenchant
初步使用:
>>> import enchant >>> d = enchant.Dict("en_US") >>> d.check("Hello") True >>> d.check("Helo") False >>> d = enchant.Dict() >>> d.tag 'en_AU' >>> print d.tag en_AU >>> enchant.dict_exists("fake") False >>> enchant.dict_exists("en_US") True >>> d = enchant.request_dict("en_US") >>> d <enchant.Dict object at 0x2aaaabdffa50>cl >>> enchant.list_languages() ['en', 'en_CA', 'en_GB', 'en_US', 'eo', 'fr', 'fr_CH', 'fr_FR'] # 拼写检查,给出所有可能的情况 >>> d.suggest("Helo") ['He lo', 'He-lo', 'Hello', 'Helot', 'Help', 'Halo', 'Hell', 'Held', 'Helm', 'Hero', "He'll"] # 自定义字典,字典内容直接就是一行一个单词 pwl = enchant.request_pwl_dict("mywords.txt") # 将自定义字典和系统内置字典结合起来 >>> d2 = enchant.DictWithPWL("en_US","mywords.txt") >>> d2.check("Hello") True
我写的一段支持拼音的纠错功能:
from collections import defaultdict from xpinyin import Pinyin import enchant class SpellCheck: instance = None # 单实例 pwl = None # 中文 enchant对象 pwl_pinyin = None # 拼音 enchant对象 pinyin_map = None # 拼音对汉字的字典 all_words = [] # 所有中文词 all_pinyins = [] # 所有拼音 @classmethod def sugguest(self, word): '''' 拼写纠错 :param word: 需要纠正我词,中文或是拼音 :return: 返回所有可能正确的词,列表 ''' spell_check = SpellCheck.get_instance() in_words = [w for w in spell_check.all_words if word in w] # 完全匹配的 zh_sugguests = sorted(in_words, key=lambda x: len(x)) zh_sugguests2 = spell_check.pwl.suggest(word) # 拼写纠错的 zh_sugguests.extend(zh_sugguests2) result = filter(lambda x: x not in word, zh_sugguests) p = Pinyin() py = p.get_pinyin(word) in_words = [w for w in spell_check.all_pinyins if py in w] # 完全匹配的-拼音 py_sugguests = sorted(in_words, key=lambda x: len(x)) py_sugguests2 = spell_check.pwl_pinyin.suggest(py) # 拼写纠错的-拼音 py_sugguests.extend(py_sugguests2) for sugguest in py_sugguests: result.extend(spell_check.pinyin_map[sugguest]) result = list(set(result)) return result
官方教程:http://pythonhosted.org/pyenchant/tutorial.html
其它类似的库有,Hunspell,ASpell,ISpell等。其中 Hunspell 为 Chrome、Firefox、OpenOffice 等做为拼写检查器。