Merge pull request #238 from gumblex/master

use str.splitlines to avoid losing line breaks
pull/240/head
Sun Junyi 10 years ago
commit 9ca5b69907

@ -43,7 +43,7 @@ def gen_pfdict(f_name):
ltotal = 0
with open(f_name, 'rb') as f:
lineno = 0
for line in f.read().rstrip().decode('utf-8').split('\n'):
for line in f.read().rstrip().decode('utf-8').splitlines():
lineno += 1
try:
word, freq = line.split(' ')[:2]
@ -313,7 +313,7 @@ def load_userdict(f):
f = open(f, 'rb')
content = f.read().decode('utf-8').lstrip('\ufeff')
line_no = 0
for line in content.split("\n"):
for line in content.splitlines():
line_no += 1
if not line.rstrip():
continue
@ -366,7 +366,7 @@ def enable_parallel(processnum=None):
pool = Pool(processnum)
def pcut(sentence, cut_all=False, HMM=True):
parts = strdecode(sentence).split('\n')
parts = strdecode(sentence).splitlines(True)
if cut_all:
result = pool.map(__lcut_all, parts)
elif HMM:
@ -378,7 +378,7 @@ def enable_parallel(processnum=None):
yield w
def pcut_for_search(sentence):
parts = strdecode(sentence).split('\n')
parts = strdecode(sentence).splitlines(True)
result = pool.map(__lcut_for_search, parts)
for r in result:
for w in r:

@ -253,7 +253,7 @@ def cut(sentence, HMM=True):
for w in __cut_internal(sentence, HMM=HMM):
yield w
else:
parts = strdecode(sentence).split('\n')
parts = strdecode(sentence).splitlines(True)
if HMM:
result = jieba.pool.map(__lcut_internal, parts)
else:

Loading…
Cancel
Save