hold the backward compatibility, let jython use a special loading workflow

pull/84/merge
Sun Junyi 12 years ago
parent 8757148d51
commit d4ede0fee6

2
.gitignore vendored

@ -164,3 +164,5 @@ pip-log.txt
*.log
test/tmp/*
#jython
*.class

@ -1,6 +1,7 @@
import re
import os
import marshal
import sys
MIN_FLOAT=-3.14e100
@ -39,7 +40,11 @@ def load_model():
return start_p, trans_p, emit_p
start_P, trans_P, emit_P = load_model()
if sys.platform.startswith("java"):
start_P, trans_P, emit_P = load_model()
else:
import prob_start,prob_trans,prob_emit
start_P, trans_P, emit_P = prob_start.P, prob_trans.P, prob_emit.P
def viterbi(obs, states, start_p, trans_p, emit_p):
V = [{}] #tabular

File diff suppressed because it is too large Load Diff

@ -0,0 +1,4 @@
P={'B': -0.26268660809250016,
'E': -3.14e+100,
'M': -3.14e+100,
'S': -1.4652633398537678}

@ -0,0 +1,4 @@
P={'B': {'E': -0.510825623765990, 'M': -0.916290731874155},
'E': {'B': -0.5897149736854513, 'S': -0.8085250474669937},
'M': {'E': -0.33344856811948514, 'M': -1.2603623820268226},
'S': {'B': -0.7211965654669841, 'S': -0.6658631448798212}}

@ -12,7 +12,7 @@ PROB_TRANS_P = "prob_trans.p"
PROB_EMIT_P = "prob_emit.p"
CHAR_STATE_TAB_P = "char_state_tab.p"
def load_model(f_name):
def load_model(f_name,isJython=True):
_curpath=os.path.normpath( os.path.join( os.getcwd(), os.path.dirname(__file__) ) )
result = {}
@ -23,6 +23,8 @@ def load_model(f_name):
word, _, tag = line.split(' ')
result[word.decode('utf-8')]=tag
f.closed
if not isJython:
return result
start_p = {}
abs_path = os.path.join(_curpath, PROB_START_P)
@ -50,7 +52,12 @@ def load_model(f_name):
return state, start_p, trans_p, emit_p, result
char_state_tab_P, start_P, trans_P, emit_P, word_tag_tab = load_model(jieba.get_abs_path_dict())
if sys.platform.startswith("java"):
char_state_tab_P, start_P, trans_P, emit_P, word_tag_tab = load_model(jieba.get_abs_path_dict())
else:
import char_state_tab, prob_start, prob_trans, prob_emit
char_state_tab_P, start_P, trans_P, emit_P = char_state_tab.P, prob_start.P, prob_trans.P, prob_emit.P
word_tag_tab = load_model(jieba.get_abs_path_dict(),isJython=False)
if jieba.user_word_tag_tab:
word_tag_tab.update(jieba.user_word_tag_tab)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,256 @@
P={('B', 'a'): -4.762305214596967,
('B', 'ad'): -6.680066036784177,
('B', 'ag'): -3.14e+100,
('B', 'an'): -8.697083223018778,
('B', 'b'): -5.018374362109218,
('B', 'bg'): -3.14e+100,
('B', 'c'): -3.423880184954888,
('B', 'd'): -3.9750475297585357,
('B', 'df'): -8.888974230828882,
('B', 'dg'): -3.14e+100,
('B', 'e'): -8.563551830394255,
('B', 'en'): -3.14e+100,
('B', 'f'): -5.491630418482717,
('B', 'g'): -3.14e+100,
('B', 'h'): -13.533365129970255,
('B', 'i'): -6.1157847275557105,
('B', 'in'): -3.14e+100,
('B', 'j'): -5.0576191284681915,
('B', 'jn'): -3.14e+100,
('B', 'k'): -3.14e+100,
('B', 'l'): -4.905883584659895,
('B', 'ln'): -3.14e+100,
('B', 'm'): -3.6524299819046386,
('B', 'mg'): -3.14e+100,
('B', 'mq'): -6.78695300139688,
('B', 'n'): -1.6966257797548328,
('B', 'ng'): -3.14e+100,
('B', 'nr'): -2.2310495913769506,
('B', 'nrfg'): -5.873722175405573,
('B', 'nrt'): -4.985642733519195,
('B', 'ns'): -2.8228438314969213,
('B', 'nt'): -4.846091668182416,
('B', 'nz'): -3.94698846057672,
('B', 'o'): -8.433498702146057,
('B', 'p'): -4.200984132085048,
('B', 'q'): -6.998123858956596,
('B', 'qe'): -3.14e+100,
('B', 'qg'): -3.14e+100,
('B', 'r'): -3.4098187790818413,
('B', 'rg'): -3.14e+100,
('B', 'rr'): -12.434752841302146,
('B', 'rz'): -7.946116471570005,
('B', 's'): -5.522673590839954,
('B', 't'): -3.3647479094528574,
('B', 'tg'): -3.14e+100,
('B', 'u'): -9.163917277503234,
('B', 'ud'): -3.14e+100,
('B', 'ug'): -3.14e+100,
('B', 'uj'): -3.14e+100,
('B', 'ul'): -3.14e+100,
('B', 'uv'): -3.14e+100,
('B', 'uz'): -3.14e+100,
('B', 'v'): -2.6740584874265685,
('B', 'vd'): -9.044728760238115,
('B', 'vg'): -3.14e+100,
('B', 'vi'): -12.434752841302146,
('B', 'vn'): -4.3315610890163585,
('B', 'vq'): -12.147070768850364,
('B', 'w'): -3.14e+100,
('B', 'x'): -3.14e+100,
('B', 'y'): -9.844485675856319,
('B', 'yg'): -3.14e+100,
('B', 'z'): -7.045681111485645,
('B', 'zg'): -3.14e+100,
('E', 'a'): -3.14e+100,
('E', 'ad'): -3.14e+100,
('E', 'ag'): -3.14e+100,
('E', 'an'): -3.14e+100,
('E', 'b'): -3.14e+100,
('E', 'bg'): -3.14e+100,
('E', 'c'): -3.14e+100,
('E', 'd'): -3.14e+100,
('E', 'df'): -3.14e+100,
('E', 'dg'): -3.14e+100,
('E', 'e'): -3.14e+100,
('E', 'en'): -3.14e+100,
('E', 'f'): -3.14e+100,
('E', 'g'): -3.14e+100,
('E', 'h'): -3.14e+100,
('E', 'i'): -3.14e+100,
('E', 'in'): -3.14e+100,
('E', 'j'): -3.14e+100,
('E', 'jn'): -3.14e+100,
('E', 'k'): -3.14e+100,
('E', 'l'): -3.14e+100,
('E', 'ln'): -3.14e+100,
('E', 'm'): -3.14e+100,
('E', 'mg'): -3.14e+100,
('E', 'mq'): -3.14e+100,
('E', 'n'): -3.14e+100,
('E', 'ng'): -3.14e+100,
('E', 'nr'): -3.14e+100,
('E', 'nrfg'): -3.14e+100,
('E', 'nrt'): -3.14e+100,
('E', 'ns'): -3.14e+100,
('E', 'nt'): -3.14e+100,
('E', 'nz'): -3.14e+100,
('E', 'o'): -3.14e+100,
('E', 'p'): -3.14e+100,
('E', 'q'): -3.14e+100,
('E', 'qe'): -3.14e+100,
('E', 'qg'): -3.14e+100,
('E', 'r'): -3.14e+100,
('E', 'rg'): -3.14e+100,
('E', 'rr'): -3.14e+100,
('E', 'rz'): -3.14e+100,
('E', 's'): -3.14e+100,
('E', 't'): -3.14e+100,
('E', 'tg'): -3.14e+100,
('E', 'u'): -3.14e+100,
('E', 'ud'): -3.14e+100,
('E', 'ug'): -3.14e+100,
('E', 'uj'): -3.14e+100,
('E', 'ul'): -3.14e+100,
('E', 'uv'): -3.14e+100,
('E', 'uz'): -3.14e+100,
('E', 'v'): -3.14e+100,
('E', 'vd'): -3.14e+100,
('E', 'vg'): -3.14e+100,
('E', 'vi'): -3.14e+100,
('E', 'vn'): -3.14e+100,
('E', 'vq'): -3.14e+100,
('E', 'w'): -3.14e+100,
('E', 'x'): -3.14e+100,
('E', 'y'): -3.14e+100,
('E', 'yg'): -3.14e+100,
('E', 'z'): -3.14e+100,
('E', 'zg'): -3.14e+100,
('M', 'a'): -3.14e+100,
('M', 'ad'): -3.14e+100,
('M', 'ag'): -3.14e+100,
('M', 'an'): -3.14e+100,
('M', 'b'): -3.14e+100,
('M', 'bg'): -3.14e+100,
('M', 'c'): -3.14e+100,
('M', 'd'): -3.14e+100,
('M', 'df'): -3.14e+100,
('M', 'dg'): -3.14e+100,
('M', 'e'): -3.14e+100,
('M', 'en'): -3.14e+100,
('M', 'f'): -3.14e+100,
('M', 'g'): -3.14e+100,
('M', 'h'): -3.14e+100,
('M', 'i'): -3.14e+100,
('M', 'in'): -3.14e+100,
('M', 'j'): -3.14e+100,
('M', 'jn'): -3.14e+100,
('M', 'k'): -3.14e+100,
('M', 'l'): -3.14e+100,
('M', 'ln'): -3.14e+100,
('M', 'm'): -3.14e+100,
('M', 'mg'): -3.14e+100,
('M', 'mq'): -3.14e+100,
('M', 'n'): -3.14e+100,
('M', 'ng'): -3.14e+100,
('M', 'nr'): -3.14e+100,
('M', 'nrfg'): -3.14e+100,
('M', 'nrt'): -3.14e+100,
('M', 'ns'): -3.14e+100,
('M', 'nt'): -3.14e+100,
('M', 'nz'): -3.14e+100,
('M', 'o'): -3.14e+100,
('M', 'p'): -3.14e+100,
('M', 'q'): -3.14e+100,
('M', 'qe'): -3.14e+100,
('M', 'qg'): -3.14e+100,
('M', 'r'): -3.14e+100,
('M', 'rg'): -3.14e+100,
('M', 'rr'): -3.14e+100,
('M', 'rz'): -3.14e+100,
('M', 's'): -3.14e+100,
('M', 't'): -3.14e+100,
('M', 'tg'): -3.14e+100,
('M', 'u'): -3.14e+100,
('M', 'ud'): -3.14e+100,
('M', 'ug'): -3.14e+100,
('M', 'uj'): -3.14e+100,
('M', 'ul'): -3.14e+100,
('M', 'uv'): -3.14e+100,
('M', 'uz'): -3.14e+100,
('M', 'v'): -3.14e+100,
('M', 'vd'): -3.14e+100,
('M', 'vg'): -3.14e+100,
('M', 'vi'): -3.14e+100,
('M', 'vn'): -3.14e+100,
('M', 'vq'): -3.14e+100,
('M', 'w'): -3.14e+100,
('M', 'x'): -3.14e+100,
('M', 'y'): -3.14e+100,
('M', 'yg'): -3.14e+100,
('M', 'z'): -3.14e+100,
('M', 'zg'): -3.14e+100,
('S', 'a'): -3.9025396831295227,
('S', 'ad'): -11.048458480182255,
('S', 'ag'): -6.954113917960154,
('S', 'an'): -12.84021794941031,
('S', 'b'): -6.472888763970454,
('S', 'bg'): -3.14e+100,
('S', 'c'): -4.786966795861212,
('S', 'd'): -3.903919764181873,
('S', 'df'): -3.14e+100,
('S', 'dg'): -8.948397651299683,
('S', 'e'): -5.942513006281674,
('S', 'en'): -3.14e+100,
('S', 'f'): -5.194820249981676,
('S', 'g'): -6.507826815331734,
('S', 'h'): -8.650563207383884,
('S', 'i'): -3.14e+100,
('S', 'in'): -3.14e+100,
('S', 'j'): -4.911992119644354,
('S', 'jn'): -3.14e+100,
('S', 'k'): -6.940320595827818,
('S', 'l'): -3.14e+100,
('S', 'ln'): -3.14e+100,
('S', 'm'): -3.269200652116097,
('S', 'mg'): -10.825314928868044,
('S', 'mq'): -3.14e+100,
('S', 'n'): -3.8551483897645107,
('S', 'ng'): -4.913434861102905,
('S', 'nr'): -4.483663103956885,
('S', 'nrfg'): -3.14e+100,
('S', 'nrt'): -3.14e+100,
('S', 'ns'): -3.14e+100,
('S', 'nt'): -12.147070768850364,
('S', 'nz'): -3.14e+100,
('S', 'o'): -8.464460927750023,
('S', 'p'): -2.9868401813596317,
('S', 'q'): -4.888658618255058,
('S', 'qe'): -3.14e+100,
('S', 'qg'): -3.14e+100,
('S', 'r'): -2.7635336784127853,
('S', 'rg'): -10.275268591948773,
('S', 'rr'): -3.14e+100,
('S', 'rz'): -3.14e+100,
('S', 's'): -3.14e+100,
('S', 't'): -3.14e+100,
('S', 'tg'): -6.272842531880403,
('S', 'u'): -6.940320595827818,
('S', 'ud'): -7.728230161053767,
('S', 'ug'): -7.5394037026636855,
('S', 'uj'): -6.85251045118004,
('S', 'ul'): -8.4153713175535,
('S', 'uv'): -8.15808672228609,
('S', 'uz'): -9.299258625372996,
('S', 'v'): -3.053292303412302,
('S', 'vd'): -3.14e+100,
('S', 'vg'): -5.9430181843676895,
('S', 'vi'): -3.14e+100,
('S', 'vn'): -11.453923588290419,
('S', 'vq'): -3.14e+100,
('S', 'w'): -3.14e+100,
('S', 'x'): -8.427419656069674,
('S', 'y'): -6.1970794699489575,
('S', 'yg'): -13.533365129970255,
('S', 'z'): -3.14e+100,
('S', 'zg'): -3.14e+100}

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save