From 64b3c0d0e0af0f65d4a29eb79df0e9c4fe29557d Mon Sep 17 00:00:00 2001 From: fxsjy Date: Sat, 6 Oct 2012 14:50:10 +0800 Subject: [PATCH] add one more example --- test/{tt.py => demo.py} | 0 test/jiebacmd.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) rename test/{tt.py => demo.py} (100%) create mode 100644 test/jiebacmd.py diff --git a/test/tt.py b/test/demo.py similarity index 100% rename from test/tt.py rename to test/demo.py diff --git a/test/jiebacmd.py b/test/jiebacmd.py new file mode 100644 index 0000000..f80f1e9 --- /dev/null +++ b/test/jiebacmd.py @@ -0,0 +1,28 @@ +''' +usage example (find top 100 words in abc.txt): + +cat abc.txt | python jiebacmd.py | sort | uniq -c | sort -nr -k1 | head -100 + + +''' + + +import sys +sys.path.append("../") + +import jieba + +default_encoding='utf-8' + +if len(sys.argv)>1: + default_encoding = sys.argv[1] + +while True: + line = sys.stdin.readline() + if line=="": + break + line = line.strip() + for word in jieba.cut(line): + print word.encode(default_encoding) + +