diff --git a/test/tt.py b/test/demo.py similarity index 100% rename from test/tt.py rename to test/demo.py diff --git a/test/jiebacmd.py b/test/jiebacmd.py new file mode 100644 index 0000000..f80f1e9 --- /dev/null +++ b/test/jiebacmd.py @@ -0,0 +1,28 @@ +''' +usage example (find top 100 words in abc.txt): + +cat abc.txt | python jiebacmd.py | sort | uniq -c | sort -nr -k1 | head -100 + + +''' + + +import sys +sys.path.append("../") + +import jieba + +default_encoding='utf-8' + +if len(sys.argv)>1: + default_encoding = sys.argv[1] + +while True: + line = sys.stdin.readline() + if line=="": + break + line = line.strip() + for word in jieba.cut(line): + print word.encode(default_encoding) + +