| { | |
| "accumulate_gradients": 4, | |
| "ae_steps": [], | |
| "amp": 2, | |
| "architectures": [ | |
| "XLMWithLMHeadModel" | |
| ], | |
| "asm": false, | |
| "attention_dropout": 0.1, | |
| "batch_size": 16, | |
| "beam_size": 1, | |
| "bos_index": 0, | |
| "bos_token_id": 0, | |
| "bptt": 256, | |
| "bt_src_langs": [], | |
| "bt_steps": [], | |
| "causal": false, | |
| "clip_grad_norm": 1.0, | |
| "clm_steps": [], | |
| "command": "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/17/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,fr,es,de,it,pt,nl,sv,pl,ru,ar,tr,zh,ja,ko,hi,vi' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884510/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-fr-es-de-it-pt-nl-sv-pl-ru-ar-tr-zh-ja-ko-hi-vi' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656237 --master_port 14148 --exp_id \"16656237\"", | |
| "context_size": 0, | |
| "data_path": "/private/home/aconneau/projects/XLM/data/wiki/17/175k", | |
| "debug": false, | |
| "debug_slurm": false, | |
| "debug_train": false, | |
| "dropout": 0.1, | |
| "dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656237", | |
| "emb_dim": 1280, | |
| "embed_init_std": 0.02209708691207961, | |
| "encoder_only": true, | |
| "end_n_top": 5, | |
| "eos_index": 1, | |
| "epoch_size": 200000, | |
| "eval_bleu": false, | |
| "eval_only": false, | |
| "exp_id": "16656237", | |
| "exp_name": "xlm_17_100_big.3", | |
| "fp16": true, | |
| "gelu_activation": true, | |
| "global_rank": 0, | |
| "group_by_size": true, | |
| "hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656237/hypotheses", | |
| "id2lang": { | |
| "0": "ar", | |
| "1": "de", | |
| "10": "pl", | |
| "11": "pt", | |
| "12": "ru", | |
| "13": "sv", | |
| "14": "tr", | |
| "15": "vi", | |
| "16": "zh", | |
| "2": "en", | |
| "3": "es", | |
| "4": "fr", | |
| "5": "hi", | |
| "6": "it", | |
| "7": "ja", | |
| "8": "ko", | |
| "9": "nl" | |
| }, | |
| "init_std": 0.02, | |
| "is_encoder": true, | |
| "is_master": true, | |
| "is_slurm_job": true, | |
| "lambda_ae": 1.0, | |
| "lambda_ae_config": null, | |
| "lambda_bt": 1.0, | |
| "lambda_bt_config": null, | |
| "lambda_clm": 1.0, | |
| "lambda_clm_config": null, | |
| "lambda_mlm": 1.0, | |
| "lambda_mlm_config": null, | |
| "lambda_mt": 1.0, | |
| "lambda_mt_config": null, | |
| "lambda_pc": 1.0, | |
| "lambda_pc_config": null, | |
| "lang2id": { | |
| "ar": 0, | |
| "de": 1, | |
| "en": 2, | |
| "es": 3, | |
| "fr": 4, | |
| "hi": 5, | |
| "it": 6, | |
| "ja": 7, | |
| "ko": 8, | |
| "nl": 9, | |
| "pl": 10, | |
| "pt": 11, | |
| "ru": 12, | |
| "sv": 13, | |
| "tr": 14, | |
| "vi": 15, | |
| "zh": 16 | |
| }, | |
| "lang_id": 0, | |
| "langs": [ | |
| "en", | |
| "fr", | |
| "es", | |
| "de", | |
| "it", | |
| "pt", | |
| "nl", | |
| "sv", | |
| "pl", | |
| "ru", | |
| "ar", | |
| "tr", | |
| "zh", | |
| "ja", | |
| "ko", | |
| "hi", | |
| "vi" | |
| ], | |
| "layer_norm_eps": 1e-12, | |
| "lg_sampling_factor": 0.7, | |
| "lgs": "en-fr-es-de-it-pt-nl-sv-pl-ru-ar-tr-zh-ja-ko-hi-vi", | |
| "local_rank": 0, | |
| "mask_index": 5, | |
| "mask_token_id": 0, | |
| "master_addr": "learnfair1605", | |
| "master_port": 14148, | |
| "max_batch_size": 0, | |
| "max_epoch": 100000, | |
| "max_len": 200, | |
| "max_position_embeddings": 512, | |
| "max_vocab": 200000, | |
| "min_count": 0, | |
| "mlm_steps": [ | |
| [ | |
| "en", | |
| null | |
| ], | |
| [ | |
| "fr", | |
| null | |
| ], | |
| [ | |
| "es", | |
| null | |
| ], | |
| [ | |
| "de", | |
| null | |
| ], | |
| [ | |
| "it", | |
| null | |
| ], | |
| [ | |
| "pt", | |
| null | |
| ], | |
| [ | |
| "nl", | |
| null | |
| ], | |
| [ | |
| "sv", | |
| null | |
| ], | |
| [ | |
| "pl", | |
| null | |
| ], | |
| [ | |
| "ru", | |
| null | |
| ], | |
| [ | |
| "ar", | |
| null | |
| ], | |
| [ | |
| "tr", | |
| null | |
| ], | |
| [ | |
| "zh", | |
| null | |
| ], | |
| [ | |
| "ja", | |
| null | |
| ], | |
| [ | |
| "ko", | |
| null | |
| ], | |
| [ | |
| "hi", | |
| null | |
| ], | |
| [ | |
| "vi", | |
| null | |
| ] | |
| ], | |
| "model_type": "xlm", | |
| "mono_dataset": { | |
| "ar": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ar.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ar.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ar.pth" | |
| }, | |
| "de": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.de.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.de.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.de.pth" | |
| }, | |
| "en": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.en.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.en.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.en.pth" | |
| }, | |
| "es": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.es.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.es.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.es.pth" | |
| }, | |
| "fr": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.fr.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.fr.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.fr.pth" | |
| }, | |
| "hi": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.hi.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.hi.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.hi.pth" | |
| }, | |
| "it": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.it.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.it.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.it.pth" | |
| }, | |
| "ja": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ja.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ja.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ja.pth" | |
| }, | |
| "ko": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ko.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ko.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ko.pth" | |
| }, | |
| "nl": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.nl.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.nl.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.nl.pth" | |
| }, | |
| "pl": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.pl.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.pl.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.pl.pth" | |
| }, | |
| "pt": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.pt.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.pt.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.pt.pth" | |
| }, | |
| "ru": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.ru.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.ru.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.ru.pth" | |
| }, | |
| "sv": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.sv.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.sv.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.sv.pth" | |
| }, | |
| "tr": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.tr.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.tr.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.tr.pth" | |
| }, | |
| "vi": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.vi.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.vi.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.vi.pth" | |
| }, | |
| "zh": { | |
| "test": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/test.zh.pth", | |
| "train": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/train.zh.pth", | |
| "valid": "/private/home/aconneau/projects/XLM/data/wiki/17/175k/valid.zh.pth" | |
| } | |
| }, | |
| "mono_list": [ | |
| "en", | |
| "fr", | |
| "es", | |
| "de", | |
| "it", | |
| "pt", | |
| "nl", | |
| "sv", | |
| "pl", | |
| "ru", | |
| "ar", | |
| "tr", | |
| "zh", | |
| "ja", | |
| "ko", | |
| "hi", | |
| "vi" | |
| ], | |
| "mt_steps": [], | |
| "multi_gpu": true, | |
| "multi_node": true, | |
| "n_gpu_per_node": 8, | |
| "n_heads": 16, | |
| "n_langs": 17, | |
| "n_layers": 16, | |
| "n_nodes": 4, | |
| "node_id": 0, | |
| "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001", | |
| "pad_index": 2, | |
| "pad_token_id": 2, | |
| "para_dataset": {}, | |
| "para_list": [], | |
| "pc_steps": [], | |
| "ref_paths": {}, | |
| "reload_checkpoint": "", | |
| "reload_emb": "", | |
| "reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884510/best-valid_zh_mlm_ppl.pth", | |
| "sample_alpha": 0.5, | |
| "save_periodic": 0, | |
| "share_inout_emb": true, | |
| "sinusoidal_embeddings": false, | |
| "split_data": true, | |
| "start_n_top": 5, | |
| "stopping_criterion": "_valid_zh_mlm_ppl,25", | |
| "summary_activation": null, | |
| "summary_first_dropout": 0.1, | |
| "summary_proj_to_labels": true, | |
| "summary_type": "first", | |
| "summary_use_proj": true, | |
| "tokens_per_batch": -1, | |
| "unk_index": 3, | |
| "use_lang_emb": false, | |
| "use_memory": false, | |
| "validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl", | |
| "vocab_size": 200000, | |
| "word_blank": 0.0, | |
| "word_dropout": 0.0, | |
| "word_keep": 0.1, | |
| "word_mask": 0.8, | |
| "word_mask_keep_rand": "0.8,0.1,0.1", | |
| "word_pred": 0.15, | |
| "word_rand": 0.1, | |
| "word_shuffle": 0.0, | |
| "world_size": 32 | |
| } | |