From 2eb11c802804a533c65252ac9d4b0394613fcfd6 Mon Sep 17 00:00:00 2001
From: fxsjy <ccnusjy@gmail.com>
Date: Mon, 13 Jan 2020 20:53:43 +0800
Subject: [PATCH] fix issue #810

---
 jieba/__init__.py   | 9 +++++----
 test/test_cutall.py | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/jieba/__init__.py b/jieba/__init__.py
index 04f53b1..e66aa49 100755
--- a/jieba/__init__.py
+++ b/jieba/__init__.py
@@ -205,14 +205,15 @@ class Tokenizer(object):
                 eng_scan = 0
                 yield eng_buf
             if len(L) == 1 and k > old_j:
-                if re_eng.match(sentence[k]):
+                word = sentence[k:L[0] + 1]
+                if re_eng.match(word):
                     if eng_scan == 0:
                         eng_scan = 1
-                        eng_buf = sentence[k]
+                        eng_buf = word
                     else:
-                        eng_buf += sentence[k]
+                        eng_buf += word
                 if eng_scan == 0:
-                    yield sentence[k:L[0] + 1]
+                    yield word
                 old_j = L[0]
             else:
                 for j in L:
diff --git a/test/test_cutall.py b/test/test_cutall.py
index bb9acf6..28499d8 100644
--- a/test/test_cutall.py
+++ b/test/test_cutall.py
@@ -98,3 +98,4 @@ if __name__ == "__main__":
     cuttest('你认识那个和主席握手的的哥吗？他开一辆黑色的士。')
     jieba.add_word('超敏C反应蛋白')
     cuttest('超敏C反应蛋白是什么, java好学吗?,小潘老板都学Python')
+    cuttest('steel健身爆发力运动兴奋补充剂')