fix python3 stuff

pull/183/head
Dingyuan Wang 11 years ago
parent b16cf0d63f
commit 6a3f228c72

@ -91,7 +91,7 @@ def initialize(*args):
if load_from_cache_fail: if load_from_cache_fail:
trie,FREQ,total = gen_trie(abs_path) trie,FREQ,total = gen_trie(abs_path)
FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.iteritems()]) #normalize FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.items()]) #normalize
min_freq = min(FREQ.itervalues()) min_freq = min(FREQ.itervalues())
logger.debug("dumping model to file cache %s" % cache_file) logger.debug("dumping model to file cache %s" % cache_file)
try: try:
@ -131,7 +131,7 @@ def require_initialized(fn):
def __cut_all(sentence): def __cut_all(sentence):
dag = get_DAG(sentence) dag = get_DAG(sentence)
old_j = -1 old_j = -1
for k,L in dag.iteritems(): for k,L in dag.items():
if len(L)==1 and k>old_j: if len(L)==1 and k>old_j:
yield sentence[k:L[0]+1] yield sentence[k:L[0]+1]
old_j = L[0] old_j = L[0]

@ -29,7 +29,7 @@ def get_idf(abs_path):
for line in lines: for line in lines:
word,freq = line.split(' ') word,freq = line.split(' ')
idf_freq[word] = float(freq) idf_freq[word] = float(freq)
median_idf = sorted(idf_freq.values())[len(idf_freq)/2] median_idf = sorted(idf_freq.values())[len(idf_freq)//2]
return idf_freq, median_idf return idf_freq, median_idf
def set_stop_words(stop_words_path): def set_stop_words(stop_words_path):
@ -56,7 +56,7 @@ def extract_tags(sentence,topK=20):
if w.lower() in STOP_WORDS: continue if w.lower() in STOP_WORDS: continue
freq[w]=freq.get(w,0.0)+1.0 freq[w]=freq.get(w,0.0)+1.0
total = sum(freq.values()) total = sum(freq.values())
freq = [(k,v/total) for k,v in freq.iteritems()] freq = [(k,v/total) for k,v in freq.items()]
tf_idf_list = [(v * idf_freq.get(k,median_idf),k) for k,v in freq] tf_idf_list = [(v * idf_freq.get(k,median_idf),k) for k,v in freq]
st_list = sorted(tf_idf_list,reverse=True) st_list = sorted(tf_idf_list,reverse=True)

Loading…
Cancel
Save