From 66fe17517d8efacac3003dc4ca4303551074c544 Mon Sep 17 00:00:00 2001 From: Dingyuan Wang Date: Fri, 26 Jun 2015 22:12:39 +0800 Subject: [PATCH] prevent moving across different filesystems at tempfile.mkstemp --- jieba/__init__.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index 2ec7548..75cceec 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -15,7 +15,10 @@ from hashlib import md5 from ._compat import * from . import finalseg -from shutil import move as _replace_file +if os.name == 'nt': + from shutil import move as _replace_file +else: + _replace_file = os.rename _get_module_path = lambda path: os.path.normpath(os.path.join(os.getcwd(), os.path.dirname(__file__), path)) @@ -107,11 +110,14 @@ class Tokenizer(object): # default dictionary elif abs_path == DEFAULT_DICT: cache_file = "jieba.cache" - else: # custom dictionary + # custom dictionary + else: cache_file = "jieba.u%s.cache" % md5( abs_path.encode('utf-8', 'replace')).hexdigest() cache_file = os.path.join( self.tmp_dir or tempfile.gettempdir(), cache_file) + # prevent absolute path in self.cache_file + tmpdir = os.path.dirname(cache_file) load_from_cache_fail = True if os.path.isfile(cache_file) and os.path.getmtime(cache_file) > os.path.getmtime(abs_path): @@ -132,7 +138,8 @@ class Tokenizer(object): default_logger.debug( "Dumping model to file cache %s" % cache_file) try: - fd, fpath = tempfile.mkstemp() + # prevent moving across different filesystems + fd, fpath = tempfile.mkstemp(dir=tmpdir) with os.fdopen(fd, 'wb') as temp_cache_file: marshal.dump( (self.FREQ, self.total), temp_cache_file)