use only one dictionary

pull/13/head
Sun Junyi 12 years ago
parent 6c0bcf6557
commit 193bfee1d4

@ -78,6 +78,22 @@ Output:
https://github.com/fxsjy/jieba/blob/master/test/extract_tags.py
功能 4) : 词性标注
================
* 标注句子分词后每个词的词性采用和ictclas兼容的标记法
* 用法示例
>>> import jieba.posseg as pseg
>>> words =pseg.cut("我爱北京天安门")
>>> for w in words:
... print w.word,w.flag
...
我 r
爱 v
北京 ns
天安门 ns
分词速度
=========

@ -16,7 +16,7 @@ def gen_trie(f_name):
ltotal = 0.0
content = open(f_name,'rb').read().decode('utf-8')
for line in content.split("\n"):
word,freq = line.split(" ")
word,freq,_ = line.split(" ")
freq = float(freq)
lfreq[word] = freq
ltotal+=freq

File diff suppressed because it is too large Load Diff

@ -15,7 +15,7 @@ def load_model(f_name):
for line in open(prob_p_path,"rb"):
line = line.strip()
if line=="":continue
word, tag = line.split(' ')
word, _, tag = line.split(' ')
result[word.decode('utf-8')]=tag
return result
@ -24,7 +24,7 @@ prob_start = load_model("prob_start.py")
prob_trans = load_model("prob_trans.py")
prob_emit = load_model("prob_emit.py")
char_state_tab = load_model("char_state_tab.py")
word_tag_tab = load_model("tags.txt")
word_tag_tab = load_model("../dict.txt")
class pair(object):
def __init__(self,word,flag):

Loading…
Cancel
Save