Merge pull request #248 from wangbin/master

exlucde word fragments from FREQ in posseg.cut
pull/252/head
Sun Junyi 10 years ago
commit 753c1be49c

@ -189,7 +189,7 @@ def __cut_DAG(sentence):
if buf: if buf:
if len(buf) == 1: if len(buf) == 1:
yield pair(buf, word_tag_tab.get(buf, 'x')) yield pair(buf, word_tag_tab.get(buf, 'x'))
elif buf not in jieba.FREQ: elif not jieba.FREQ.get(buf):
recognized = __cut_detail(buf) recognized = __cut_detail(buf)
for t in recognized: for t in recognized:
yield t yield t
@ -203,7 +203,7 @@ def __cut_DAG(sentence):
if buf: if buf:
if len(buf) == 1: if len(buf) == 1:
yield pair(buf, word_tag_tab.get(buf, 'x')) yield pair(buf, word_tag_tab.get(buf, 'x'))
elif (buf not in jieba.FREQ): elif not jieba.FREQ.get(buf):
recognized = __cut_detail(buf) recognized = __cut_detail(buf)
for t in recognized: for t in recognized:
yield t yield t

Loading…
Cancel
Save