Add re.U flag to re variable.

12 years ago · 99d2492d67
parent fbfaac2eaa
commit 99d2492d67
1 changed files with 2 additions and 2 deletions
--- a/jieba/init.py
+++ b/jieba/init.py
@ -212,9 +212,9 @@ def cut(sentence,cut_all=False):
            sentence = sentence.decode('utf-8')
        except UnicodeDecodeError:
            sentence = sentence.decode('gbk','ignore')
-    re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)"), re.compile(ur"(\s+)")
+    re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)", re.U), re.compile(ur"(\s+)", re.U)
    if cut_all:
-        re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5]+)"), re.compile(ur"[^a-zA-Z0-9+#\n]")
+        re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5]+)", re.U), re.compile(ur"[^a-zA-Z0-9+#\n]", re.U)
    blocks = re_han.split(sentence)
    cut_block = __cut_DAG
    if cut_all: