From 06ebc6f71c2a843778a26f64294a3f0c685f5256 Mon Sep 17 00:00:00 2001 From: Sun Junyi Date: Wed, 12 Dec 2012 14:24:44 +0800 Subject: [PATCH] en-chn mix words in POS --- jieba/posseg/__init__.py | 29 +++++++++++++++++++++++++---- test/test_pos.py | 3 ++- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/jieba/posseg/__init__.py b/jieba/posseg/__init__.py index 0104bec..bdbdb3e 100644 --- a/jieba/posseg/__init__.py +++ b/jieba/posseg/__init__.py @@ -3,6 +3,7 @@ import os import viterbi import jieba import sys + default_encoding = sys.getfilesystemencoding() def load_model(f_name): @@ -60,10 +61,31 @@ def __cut(sentence): if next