From d65d72af9aac5effc24ba7a4c83a3902926adbaa Mon Sep 17 00:00:00 2001 From: Stanislas0 Date: Tue, 21 Feb 2023 19:23:38 +0800 Subject: [PATCH] refactor --- codegeex/tokenizer/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codegeex/tokenizer/tokenizer.py b/codegeex/tokenizer/tokenizer.py index 587c83f..1b9fa28 100644 --- a/codegeex/tokenizer/tokenizer.py +++ b/codegeex/tokenizer/tokenizer.py @@ -4,7 +4,7 @@ from transformers import AutoTokenizer from transformers.models.gpt2 import GPT2TokenizerFast -def encode_whitespaces(text, start_extra_id: int, max_len: int): +def encode_whitespaces(text: str, start_extra_id: int, max_len: int): """ Encode whitespaces to extra tokens in GPT-J. >>> encode_whitespaces('a\\n b\\n c', 10, 10)