mirror of https://github.com/fxsjy/jieba.git
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
29 lines
419 B
Python
29 lines
419 B
Python
'''
|
|
usage example (find top 100 words in abc.txt):
|
|
|
|
cat abc.txt | python jiebacmd.py | sort | uniq -c | sort -nr -k1 | head -100
|
|
|
|
|
|
'''
|
|
|
|
|
|
import sys
|
|
sys.path.append("../")
|
|
|
|
import jieba
|
|
|
|
default_encoding='utf-8'
|
|
|
|
if len(sys.argv)>1:
|
|
default_encoding = sys.argv[1]
|
|
|
|
while True:
|
|
line = sys.stdin.readline()
|
|
if line=="":
|
|
break
|
|
line = line.strip()
|
|
for word in jieba.cut(line):
|
|
print word.encode(default_encoding)
|
|
|
|
|