merge chage from chao78787

pull/72/merge
Sun Junyi 12 years ago
commit 9ea14a8a54

@ -144,7 +144,7 @@ Output:
* 实验结果在4核3.4GHz Linux机器上对金庸全集进行精确分词获得了1MB/s的速度是单进程版的3.3倍。 * 实验结果在4核3.4GHz Linux机器上对金庸全集进行精确分词获得了1MB/s的速度是单进程版的3.3倍。
功能 5) : Tokenize返回词语在原文的起始位置 功能 6) : Tokenize返回词语在原文的起始位置
============================================ ============================================
* 注意输入参数只接受unicode * 注意输入参数只接受unicode
* 默认模式 * 默认模式
@ -181,7 +181,7 @@ word 有限公司 start: 6 end:10
``` ```
功能 6) : ChineseAnalyzer for Whoosh搜索引擎 功能 7) : ChineseAnalyzer for Whoosh搜索引擎
============================================ ============================================
* 引用: `from jieba.analyse import ChineseAnalyzer ` * 引用: `from jieba.analyse import ChineseAnalyzer `
* 用法示例https://github.com/fxsjy/jieba/blob/master/test/test_whoosh.py * 用法示例https://github.com/fxsjy/jieba/blob/master/test/test_whoosh.py

@ -264,15 +264,22 @@ def load_userdict(f):
if line_no==1: if line_no==1:
word = word.replace(u'\ufeff',u"") #remove bom flag if it exists word = word.replace(u'\ufeff',u"") #remove bom flag if it exists
if len(tup)==3: if len(tup)==3:
user_word_tag_tab[word]=tup[2].strip() add_word(word, freq, tup[2])
freq = float(freq) else:
FREQ[word] = log(freq / total) add_word(word, freq)
p = trie
for c in word: def add_word(word, freq, tag=None):
if not c in p: global FREQ, trie, total, user_word_tag_tab
p[c] ={} freq = float(freq)
p = p[c] FREQ[word] = log(freq / total)
p['']='' #ending flag if tag is not None:
user_word_tag_tab[word] = tag.strip()
p = trie
for c in word:
if not c in p:
p[c] = {}
p = p[c]
p[''] = '' # ending flag
__ref_cut = cut __ref_cut = cut
__ref_cut_for_search = cut_for_search __ref_cut_for_search = cut_for_search

@ -1,6 +1,6 @@
from distutils.core import setup from distutils.core import setup
setup(name='jieba', setup(name='jieba',
version='0.30', version='0.31.alpha',
description='Chinese Words Segementation Utilities', description='Chinese Words Segementation Utilities',
author='Sun, Junyi', author='Sun, Junyi',
author_email='ccnusjy@gmail.com', author_email='ccnusjy@gmail.com',

Loading…
Cancel
Save