pull/74/head
Stanislas0 2 years ago
parent 14493cbd1a
commit d65d72af9a

@ -4,7 +4,7 @@ from transformers import AutoTokenizer
from transformers.models.gpt2 import GPT2TokenizerFast from transformers.models.gpt2 import GPT2TokenizerFast
def encode_whitespaces(text, start_extra_id: int, max_len: int): def encode_whitespaces(text: str, start_extra_id: int, max_len: int):
""" Encode whitespaces to extra tokens in GPT-J. """ Encode whitespaces to extra tokens in GPT-J.
>>> encode_whitespaces('a\\n b\\n c', 10, 10) >>> encode_whitespaces('a\\n b\\n c', 10, 10)

Loading…
Cancel
Save