用PyEnchant做拼写检查

安装 PyEnchant:

sudo pip install pyenchant

初步使用:

>>> import enchant
>>> d = enchant.Dict("en_US")
>>> d.check("Hello")
True
>>> d.check("Helo")
False

>>> d = enchant.Dict()
>>> d.tag
'en_AU'
>>> print d.tag
en_AU

>>> enchant.dict_exists("fake")
False
>>> enchant.dict_exists("en_US")
True
>>> d = enchant.request_dict("en_US")
>>> d
<enchant.Dict object at 0x2aaaabdffa50>cl
>>> enchant.list_languages()
['en', 'en_CA', 'en_GB', 'en_US', 'eo', 'fr', 'fr_CH', 'fr_FR']

# 拼写检查,给出所有可能的情况
>>> d.suggest("Helo")
['He lo', 'He-lo', 'Hello', 'Helot', 'Help', 'Halo', 'Hell', 'Held', 'Helm', 'Hero', "He'll"]

# 自定义字典,字典内容直接就是一行一个单词
pwl = enchant.request_pwl_dict("mywords.txt")
# 将自定义字典和系统内置字典结合起来
>>> d2 = enchant.DictWithPWL("en_US","mywords.txt")
>>> d2.check("Hello")
True

我写的一段支持拼音的纠错功能:

from collections import defaultdict
from xpinyin import Pinyin
import enchant

class SpellCheck:

    instance = None   # 单实例
    pwl = None        # 中文 enchant对象
    pwl_pinyin = None # 拼音 enchant对象
    pinyin_map = None # 拼音对汉字的字典
    all_words = []    # 所有中文词
    all_pinyins = []  # 所有拼音

    @classmethod
    def sugguest(self, word):
        ''''
        拼写纠错
        :param word: 需要纠正我词,中文或是拼音
        :return: 返回所有可能正确的词,列表
        '''
        spell_check = SpellCheck.get_instance()

        in_words = [w for w in spell_check.all_words if word in w]  # 完全匹配的
        zh_sugguests = sorted(in_words, key=lambda x: len(x))
        zh_sugguests2 = spell_check.pwl.suggest(word)  # 拼写纠错的
        zh_sugguests.extend(zh_sugguests2)
        result = filter(lambda x: x not in word, zh_sugguests)

        p = Pinyin()
        py = p.get_pinyin(word)
        in_words = [w for w in spell_check.all_pinyins if py in w] # 完全匹配的-拼音
        py_sugguests = sorted(in_words, key=lambda x: len(x))
        py_sugguests2 = spell_check.pwl_pinyin.suggest(py)  # 拼写纠错的-拼音
        py_sugguests.extend(py_sugguests2)
        for sugguest in py_sugguests:
            result.extend(spell_check.pinyin_map[sugguest])

        result = list(set(result))
        return result

官方教程:http://pythonhosted.org/pyenchant/tutorial.html

其它类似的库有,Hunspell,ASpell,ISpell等。其中 Hunspell 为 Chrome、Firefox、OpenOffice 等做为拼写检查器。