mirror of https://github.com/fxsjy/jieba.git
讓 jieba 可以切換 idf 語料庫
1. 新增繁體中文 idf 語料庫 2. 為了讓 jieba 可以切換 iff 語料庫,新增 get_idf, set_idf_path 方法,並改寫 extract_tags 3. test 增加 extract_tags_idfpathpull/174/head
parent
473ac1df75
commit
7198d562f1
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,32 @@
|
||||
import sys
|
||||
sys.path.append('../')
|
||||
|
||||
import jieba
|
||||
import jieba.analyse
|
||||
from optparse import OptionParser
|
||||
|
||||
USAGE = "usage: python extract_tags_idfpath.py [file name] -k [top k]"
|
||||
|
||||
parser = OptionParser(USAGE)
|
||||
parser.add_option("-k", dest="topK")
|
||||
opt, args = parser.parse_args()
|
||||
|
||||
|
||||
if len(args) < 1:
|
||||
print USAGE
|
||||
sys.exit(1)
|
||||
|
||||
file_name = args[0]
|
||||
|
||||
if opt.topK is None:
|
||||
topK = 10
|
||||
else:
|
||||
topK = int(opt.topK)
|
||||
|
||||
content = open(file_name, 'rb').read()
|
||||
|
||||
jieba.analyse.set_idf_path("../extra_dict/idf.txt.big");
|
||||
|
||||
tags = jieba.analyse.extract_tags(content, topK=topK)
|
||||
|
||||
print ",".join(tags)
|
Loading…
Reference in New Issue