From d3339633d556b1ae6ed5a1e5a9446d7d2772f54e Mon Sep 17 00:00:00 2001 From: Sun Junyi Date: Fri, 26 Apr 2013 14:51:58 +0800 Subject: [PATCH] in the speed test: initialize first to ignore the time of dict loading --- test/test_file.py | 3 ++- test/test_pos_file.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_file.py b/test/test_file.py index fe2d93a..adfb0bb 100644 --- a/test/test_file.py +++ b/test/test_file.py @@ -3,6 +3,7 @@ import sys,time import sys sys.path.append("../") import jieba +jieba.initialize() url = sys.argv[1] content = open(url,"rb").read() @@ -14,7 +15,7 @@ tm_cost = t2-t1 log_f = open("1.log","wb") for w in words: - print >> log_f, w.encode("gbk"), "/" , + print >> log_f, w.encode("utf-8"), "/" , print 'cost',tm_cost print 'speed' , len(content)/tm_cost, " bytes/second" diff --git a/test/test_pos_file.py b/test/test_pos_file.py index fd14a2d..ab70156 100644 --- a/test/test_pos_file.py +++ b/test/test_pos_file.py @@ -2,6 +2,8 @@ import urllib2 import sys,time import sys sys.path.append("../") +import jieba +jieba.initialize() import jieba.posseg as pseg url = sys.argv[1] @@ -14,7 +16,7 @@ tm_cost = t2-t1 log_f = open("1.log","wb") for w in words: - print >> log_f, w.encode("gbk"), "/" , + print >> log_f, w.encode("utf-8"), "/" , print 'speed' , len(content)/tm_cost, " bytes/second"