mirror of https://github.com/fxsjy/jieba.git
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
29 lines
419 B
Python
29 lines
419 B
Python
13 years ago
|
'''
|
||
|
usage example (find top 100 words in abc.txt):
|
||
|
|
||
|
cat abc.txt | python jiebacmd.py | sort | uniq -c | sort -nr -k1 | head -100
|
||
|
|
||
|
|
||
|
'''
|
||
|
|
||
|
|
||
|
import sys
|
||
|
sys.path.append("../")
|
||
|
|
||
|
import jieba
|
||
|
|
||
|
default_encoding='utf-8'
|
||
|
|
||
|
if len(sys.argv)>1:
|
||
|
default_encoding = sys.argv[1]
|
||
|
|
||
|
while True:
|
||
|
line = sys.stdin.readline()
|
||
|
if line=="":
|
||
|
break
|
||
|
line = line.strip()
|
||
|
for word in jieba.cut(line):
|
||
|
print word.encode(default_encoding)
|
||
|
|
||
|
|