From 6a3f228c72e658c96af26dd7e852a14ffda24c72 Mon Sep 17 00:00:00 2001 From: Dingyuan Wang Date: Sun, 7 Sep 2014 18:50:10 +0800 Subject: [PATCH] fix python3 stuff --- jieba/__init__.py | 4 ++-- jieba/analyse/__init__.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index 1ff0a3c..6e7c3c2 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -91,7 +91,7 @@ def initialize(*args): if load_from_cache_fail: trie,FREQ,total = gen_trie(abs_path) - FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.iteritems()]) #normalize + FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.items()]) #normalize min_freq = min(FREQ.itervalues()) logger.debug("dumping model to file cache %s" % cache_file) try: @@ -131,7 +131,7 @@ def require_initialized(fn): def __cut_all(sentence): dag = get_DAG(sentence) old_j = -1 - for k,L in dag.iteritems(): + for k,L in dag.items(): if len(L)==1 and k>old_j: yield sentence[k:L[0]+1] old_j = L[0] diff --git a/jieba/analyse/__init__.py b/jieba/analyse/__init__.py index 0a91f6f..d28f85a 100644 --- a/jieba/analyse/__init__.py +++ b/jieba/analyse/__init__.py @@ -29,7 +29,7 @@ def get_idf(abs_path): for line in lines: word,freq = line.split(' ') idf_freq[word] = float(freq) - median_idf = sorted(idf_freq.values())[len(idf_freq)/2] + median_idf = sorted(idf_freq.values())[len(idf_freq)//2] return idf_freq, median_idf def set_stop_words(stop_words_path): @@ -56,7 +56,7 @@ def extract_tags(sentence,topK=20): if w.lower() in STOP_WORDS: continue freq[w]=freq.get(w,0.0)+1.0 total = sum(freq.values()) - freq = [(k,v/total) for k,v in freq.iteritems()] + freq = [(k,v/total) for k,v in freq.items()] tf_idf_list = [(v * idf_freq.get(k,median_idf),k) for k,v in freq] st_list = sorted(tf_idf_list,reverse=True)