From d703bce30236f278818d7f346b2d746256871380 Mon Sep 17 00:00:00 2001 From: JesseyXujin Date: Fri, 10 Jan 2020 16:30:46 +0800 Subject: [PATCH] paddle coredump exception fix (#807) * paddle_null_point_fix * add core expception note * delete yield * modify test paddle for supporting enable_paddle() --- jieba/__init__.py | 3 ++- jieba/posseg/__init__.py | 3 ++- test/test_paddle.py | 2 +- test/test_paddle_postag.py | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) mode change 100644 => 100755 jieba/__init__.py mode change 100644 => 100755 test/test_paddle.py mode change 100644 => 100755 test/test_paddle_postag.py diff --git a/jieba/__init__.py b/jieba/__init__.py old mode 100644 new mode 100755 index 992039e..04f53b1 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -298,8 +298,9 @@ class Tokenizer(object): is_paddle_installed = check_paddle_install['is_paddle_installed'] sentence = strdecode(sentence) if use_paddle and is_paddle_installed: + # if sentence is null, it will raise core exception in paddle. if sentence is None or sentence == "" or sentence == u"": - yield sentence + return import jieba.lac_small.predict as predict results = predict.get_sent(sentence) for sent in results: diff --git a/jieba/posseg/__init__.py b/jieba/posseg/__init__.py index 05d7c01..df47c99 100755 --- a/jieba/posseg/__init__.py +++ b/jieba/posseg/__init__.py @@ -279,8 +279,9 @@ def cut(sentence, HMM=True, use_paddle=False): """ is_paddle_installed = check_paddle_install['is_paddle_installed'] if use_paddle and is_paddle_installed: + # if sentence is null, it will raise core exception in paddle. if sentence is None or sentence == "" or sentence == u"": - yield pair(None, None) + return import jieba.lac_small.predict as predict sents, tags = predict.get_result(strdecode(sentence)) for i, sent in enumerate(sents): diff --git a/test/test_paddle.py b/test/test_paddle.py old mode 100644 new mode 100755 index a964533..6003dea --- a/test/test_paddle.py +++ b/test/test_paddle.py @@ -2,7 +2,7 @@ import sys sys.path.append("../") import jieba - +jieba.enable_paddle() def cuttest(test_sent): result = jieba.cut(test_sent, use_paddle=True) diff --git a/test/test_paddle_postag.py b/test/test_paddle_postag.py old mode 100644 new mode 100755 index b8c29c1..c7d083c --- a/test/test_paddle_postag.py +++ b/test/test_paddle_postag.py @@ -2,7 +2,8 @@ import sys sys.path.append("../") import jieba.posseg as pseg - +import jieba +jieba.enable_paddle() def cuttest(test_sent): result = pseg.cut(test_sent, use_paddle=True)