From 012fddf13f69a1546747e5c5f345a7bd29150841 Mon Sep 17 00:00:00 2001 From: Sun Junyi Date: Fri, 12 Apr 2013 22:37:53 +0800 Subject: [PATCH] ignore white space --- jieba/__init__.py | 2 +- jieba/posseg/__init__.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index faf52a4..5a17d5b 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -169,7 +169,7 @@ def cut(sentence,cut_all=False): tmp = re_skip.split(blk) for x in tmp: if re_skip.match(x): - if x!=' ': + if x.strip(' ')!='': yield x else: for xx in x: diff --git a/jieba/posseg/__init__.py b/jieba/posseg/__init__.py index 48b0488..fbc791e 100644 --- a/jieba/posseg/__init__.py +++ b/jieba/posseg/__init__.py @@ -136,7 +136,8 @@ def cut(sentence): tmp = re_skip.split(blk) for x in tmp: if re_skip.match(x): - yield pair(x,'') + if x.strip(' ')!='': + yield pair(x,'') else: for xx in x: if re_num.match(xx):