From 62433a3205593ddbd82f0b0fc9f59419d7afe4e1 Mon Sep 17 00:00:00 2001 From: Yuan-Yi Chang Date: Fri, 27 Feb 2015 17:14:12 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AE=93=20jieba=20=E5=8F=AF=E4=BB=A5=E8=87=AA?= =?UTF-8?q?=E8=A1=8C=E6=8C=87=E5=AE=9A=20cache=5Ffile=20=E7=94=A2=E7=94=9F?= =?UTF-8?q?=E7=9A=84=E7=9B=AE=E9=8C=84=E4=BD=8D=E7=BD=AE=EF=BC=8C=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=20jieba=20=E5=9C=A8=20Read-only=20file=20system=20?= =?UTF-8?q?=E7=92=B0=E5=A2=83=E4=B8=AD=E9=81=8B=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1.在呼叫 jieba.cut() 等相關動作前,先透過 jieba.tmp_dir 指定目錄位置 2.當應用環境為 Read-Only File System,可透過預先產生 cache_file 的機制,讓 jieba 正常運行 3.實際案例為 Google App Engine 和 Heroku,其中前者免費版僅 128MB 記憶體空間無法運行,後者免費環境有 512MB 可正常運行。發佈前,先在本地端產生 cache_file 後,連同 cache_file 一併發佈至 Google App Engine 或 Heroku 環境上即可使用。 --- jieba/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index 4129554..2188a8c 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -23,6 +23,7 @@ total = 0 user_word_tag_tab = {} initialized = False pool = None +tmp_dir = None _curpath = os.path.normpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) @@ -61,7 +62,7 @@ def gen_pfdict(f_name): def initialize(dictionary=None): - global FREQ, total, initialized, DICTIONARY, DICT_LOCK + global FREQ, total, initialized, DICTIONARY, DICT_LOCK, tmp_dir if not dictionary: dictionary = DICTIONARY with DICT_LOCK: @@ -73,9 +74,9 @@ def initialize(dictionary=None): t1 = time.time() # default dictionary if abs_path == os.path.join(_curpath, "dict.txt"): - cache_file = os.path.join(tempfile.gettempdir(), "jieba.cache") + cache_file = os.path.join(tmp_dir if tmp_dir else tempfile.gettempdir(),"jieba.cache") else: # custom dictionary - cache_file = os.path.join(tempfile.gettempdir(), "jieba.u%s.cache" % md5( + cache_file = os.path.join(tmp_dir if tmp_dir else tempfile.gettempdir(),"jieba.u%s.cache" % md5( abs_path.encode('utf-8', 'replace')).hexdigest()) load_from_cache_fail = True