From a9f53e9c8501bb8775250494da2f2b4140978907 Mon Sep 17 00:00:00 2001 From: fxsjy Date: Sat, 22 Jun 2013 21:56:39 +0800 Subject: [PATCH] don't seprate CRLF --- jieba/__init__.py | 4 +++- jieba/posseg/__init__.py | 17 ++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index f77c705..dd43f46 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -221,7 +221,9 @@ def cut(sentence,cut_all=False): else: tmp = re_skip.split(blk) for x in tmp: - if not cut_all: + if re_skip.match(x): + yield x + elif not cut_all: for xx in x: yield xx else: diff --git a/jieba/posseg/__init__.py b/jieba/posseg/__init__.py index 55a5b20..233ffc1 100644 --- a/jieba/posseg/__init__.py +++ b/jieba/posseg/__init__.py @@ -141,13 +141,16 @@ def __cut_internal(sentence): else: tmp = re_skip.split(blk) for x in tmp: - for xx in x: - if re_num.match(xx): - yield pair(xx,'m') - elif re_eng.match(x): - yield pair(xx,'eng') - else: - yield pair(xx,'x') + if re_skip.match(x): + yield pair(x,'') + else: + for xx in x: + if re_num.match(xx): + yield pair(xx,'m') + elif re_eng.match(x): + yield pair(xx,'eng') + else: + yield pair(xx,'x') def __lcut_internal(sentence): return list(__cut_internal(sentence))