You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
jieba/test/jiebacmd.py

29 lines
419 B
Python

'''
usage example (find top 100 words in abc.txt):
cat abc.txt | python jiebacmd.py | sort | uniq -c | sort -nr -k1 | head -100
'''
import sys
sys.path.append("../")
import jieba
default_encoding='utf-8'
if len(sys.argv)>1:
default_encoding = sys.argv[1]
while True:
line = sys.stdin.readline()
if line=="":
break
line = line.strip()
for word in jieba.cut(line):
print word.encode(default_encoding)