From 0087a4e7e35b2bdf30bfa0b27c5418dc6323f272 Mon Sep 17 00:00:00 2001 From: fxsjy Date: Fri, 7 Jun 2013 13:59:36 +0800 Subject: [PATCH] adjust prob_trans for better support of name entity; fix some bad cases --- jieba/dict.txt | 2 +- jieba/finalseg/prob_trans.py | 2 +- test/test.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/jieba/dict.txt b/jieba/dict.txt index b8dcaa7..e5348f7 100644 --- a/jieba/dict.txt +++ b/jieba/dict.txt @@ -9169,6 +9169,7 @@ T恤 4 n 不弱 3 a 不强 3 v 不强不弱 3 l +不归路 3 i 不归零制 3 i 不当 657 d 不当不正 3 i @@ -138436,7 +138437,6 @@ T恤 4 n 张丁华 4 nr 张万仙 2 nr 张万年 64 nr -张三 288 nr 张三丰 436 nr 张三之 2 nr 张三影 2 nr diff --git a/jieba/finalseg/prob_trans.py b/jieba/finalseg/prob_trans.py index 3918dd5..6e3f37f 100644 --- a/jieba/finalseg/prob_trans.py +++ b/jieba/finalseg/prob_trans.py @@ -1,4 +1,4 @@ -{'B': {'E': -0.16037786260859094, 'M': -1.9093400568760384}, +{'B': {'E': -0.510825623765990, 'M': -0.916290731874155}, 'E': {'B': -0.5897149736854513, 'S': -0.8085250474669937}, 'M': {'E': -0.33344856811948514, 'M': -1.2603623820268226}, 'S': {'B': -0.7211965654669841, 'S': -0.6658631448798212}} diff --git a/test/test.py b/test/test.py index c5052f0..f1d3241 100644 --- a/test/test.py +++ b/test/test.py @@ -95,3 +95,4 @@ if __name__ == "__main__": cuttest('C++和c#是什么关系?11+122=133,是吗?PI=3.14159') cuttest('你认识那个和主席握手的的哥吗?他开一辆黑色的士。') cuttest('枪杆子中出政权') + cuttest('张三风同学走上了不归路')