| { | |
| "do_lower_case":true, | |
| "remove_space":true, | |
| "keep_accents":true, | |
| "bos_token": "[CLS]", | |
| "eos_token": "[SEP]", | |
| "unk_token": "<unk>", | |
| "sep_token": "[SEP]", | |
| "pad_token": "<pad>", | |
| "cls_token": "[CLS]", | |
| "mask_token":{ | |
| "content":"[MASK]", | |
| "single_word":false, | |
| "lstrip":true, | |
| "rstrip":false, | |
| "normalized":false, | |
| "__type":"AddedToken" | |
| }, | |
| "tokenize_chinese_chars":false, | |
| "tokenizer_class": "BertJapaneseTokenizer", | |
| "word_tokenizer_type": "mecab", | |
| "subword_tokenizer_type": "sentencepiece", | |
| "mecab_kwargs": { | |
| "mecab_dic": "unidic_lite" | |
| }, | |
| "auto_map": { | |
| "AutoTokenizer": [ | |
| "distilbert_japanese_tokenizer.DistilBertJapaneseTokenizer", | |
| null | |
| ] | |
| } | |
| } |