From 3246236133d0354a60a6147df819ba043349a0ba Mon Sep 17 00:00:00 2001 From: Richard Wong <chao787@gmail.com> Date: Wed, 3 Jul 2013 15:03:45 +0800 Subject: [PATCH] Separate cal and IO process. --- jieba/__init__.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index aacfba2..7eb8efe 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -264,15 +264,21 @@ def load_userdict(f): if line_no==1: word = word.replace(u'\ufeff',u"") #remove bom flag if it exists if len(tup)==3: - user_word_tag_tab[word]=tup[2].strip() - freq = float(freq) - FREQ[word] = log(freq / total) - p = trie - for c in word: - if not c in p: - p[c] ={} - p = p[c] - p['']='' #ending flag + add_word(word, freq, tup[2]) + add_word(word, freq) + +def add_word(word, freq, tag=None): + global FREQ, trie, total, user_word_tag_tab + freq = float(freq) + FREQ[word] = log(freq / total) + if tag is not None: + user_word_tag_tab[word] = tag.strip() + p = trie + for c in word: + if not c in p: + p[c] = {} + p = p[c] + p[''] = '' # ending flag __ref_cut = cut __ref_cut_for_search = cut_for_search