| wandb_version: 1 | |
| _n_gpu: | |
| desc: null | |
| value: 1 | |
| _name_or_path: | |
| desc: null | |
| value: facebook/wav2vec2-xls-r-300m | |
| _wandb: | |
| desc: null | |
| value: | |
| cli_version: 0.12.9 | |
| framework: huggingface | |
| huggingface_version: 4.17.0.dev0 | |
| is_jupyter_run: false | |
| is_kaggle_kernel: false | |
| m: | |
| - 1: train/global_step | |
| 6: | |
| - 3 | |
| - 1: train/loss | |
| 5: 1 | |
| 6: | |
| - 1 | |
| - 1: train/learning_rate | |
| 5: 1 | |
| 6: | |
| - 1 | |
| - 1: train/epoch | |
| 5: 1 | |
| 6: | |
| - 1 | |
| - 1: eval/loss | |
| 5: 1 | |
| 6: | |
| - 1 | |
| - 1: eval/wer | |
| 5: 1 | |
| 6: | |
| - 1 | |
| - 1: eval/runtime | |
| 5: 1 | |
| 6: | |
| - 1 | |
| - 1: eval/samples_per_second | |
| 5: 1 | |
| 6: | |
| - 1 | |
| - 1: eval/steps_per_second | |
| 5: 1 | |
| 6: | |
| - 1 | |
| python_version: 3.8.8 | |
| start_time: 1643565257 | |
| t: | |
| 1: | |
| - 1 | |
| - 5 | |
| - 11 | |
| 2: | |
| - 1 | |
| - 5 | |
| - 11 | |
| 3: | |
| - 1 | |
| - 7 | |
| - 13 | |
| 4: 3.8.8 | |
| 5: 0.12.9 | |
| 6: 4.17.0.dev0 | |
| 8: | |
| - 5 | |
| activation_dropout: | |
| desc: null | |
| value: 0.05 | |
| adafactor: | |
| desc: null | |
| value: false | |
| adam_beta1: | |
| desc: null | |
| value: 0.9 | |
| adam_beta2: | |
| desc: null | |
| value: 0.999 | |
| adam_epsilon: | |
| desc: null | |
| value: 1.0e-08 | |
| adapter_kernel_size: | |
| desc: null | |
| value: 3 | |
| adapter_stride: | |
| desc: null | |
| value: 2 | |
| add_adapter: | |
| desc: null | |
| value: false | |
| add_cross_attention: | |
| desc: null | |
| value: false | |
| apply_spec_augment: | |
| desc: null | |
| value: true | |
| architectures: | |
| desc: null | |
| value: | |
| - Wav2Vec2ForPreTraining | |
| attention_dropout: | |
| desc: null | |
| value: 0.0 | |
| bad_words_ids: | |
| desc: null | |
| value: null | |
| bf16: | |
| desc: null | |
| value: false | |
| bf16_full_eval: | |
| desc: null | |
| value: false | |
| bos_token_id: | |
| desc: null | |
| value: 1 | |
| chunk_size_feed_forward: | |
| desc: null | |
| value: 0 | |
| classifier_proj_size: | |
| desc: null | |
| value: 256 | |
| codevector_dim: | |
| desc: null | |
| value: 768 | |
| contrastive_logits_temperature: | |
| desc: null | |
| value: 0.1 | |
| conv_bias: | |
| desc: null | |
| value: true | |
| conv_dim: | |
| desc: null | |
| value: | |
| - 512 | |
| - 512 | |
| - 512 | |
| - 512 | |
| - 512 | |
| - 512 | |
| - 512 | |
| conv_kernel: | |
| desc: null | |
| value: | |
| - 10 | |
| - 3 | |
| - 3 | |
| - 3 | |
| - 3 | |
| - 2 | |
| - 2 | |
| conv_stride: | |
| desc: null | |
| value: | |
| - 5 | |
| - 2 | |
| - 2 | |
| - 2 | |
| - 2 | |
| - 2 | |
| - 2 | |
| cross_attention_hidden_size: | |
| desc: null | |
| value: null | |
| ctc_loss_reduction: | |
| desc: null | |
| value: mean | |
| ctc_zero_infinity: | |
| desc: null | |
| value: false | |
| dataloader_drop_last: | |
| desc: null | |
| value: false | |
| dataloader_num_workers: | |
| desc: null | |
| value: 0 | |
| dataloader_pin_memory: | |
| desc: null | |
| value: true | |
| ddp_bucket_cap_mb: | |
| desc: null | |
| value: None | |
| ddp_find_unused_parameters: | |
| desc: null | |
| value: None | |
| debug: | |
| desc: null | |
| value: '[]' | |
| decoder_start_token_id: | |
| desc: null | |
| value: null | |
| deepspeed: | |
| desc: null | |
| value: None | |
| disable_tqdm: | |
| desc: null | |
| value: false | |
| diversity_loss_weight: | |
| desc: null | |
| value: 0.1 | |
| diversity_penalty: | |
| desc: null | |
| value: 0.0 | |
| do_eval: | |
| desc: null | |
| value: true | |
| do_predict: | |
| desc: null | |
| value: false | |
| do_sample: | |
| desc: null | |
| value: false | |
| do_stable_layer_norm: | |
| desc: null | |
| value: true | |
| do_train: | |
| desc: null | |
| value: true | |
| early_stopping: | |
| desc: null | |
| value: false | |
| encoder_no_repeat_ngram_size: | |
| desc: null | |
| value: 0 | |
| eos_token_id: | |
| desc: null | |
| value: 2 | |
| eval_accumulation_steps: | |
| desc: null | |
| value: None | |
| eval_batch_size: | |
| desc: null | |
| value: 64 | |
| eval_steps: | |
| desc: null | |
| value: 5 | |
| evaluation_strategy: | |
| desc: null | |
| value: steps | |
| feat_extract_activation: | |
| desc: null | |
| value: gelu | |
| feat_extract_dropout: | |
| desc: null | |
| value: 0.0 | |
| feat_extract_norm: | |
| desc: null | |
| value: layer | |
| feat_proj_dropout: | |
| desc: null | |
| value: 0.0 | |
| feat_quantizer_dropout: | |
| desc: null | |
| value: 0.0 | |
| final_dropout: | |
| desc: null | |
| value: 0.0 | |
| finetuning_task: | |
| desc: null | |
| value: null | |
| forced_bos_token_id: | |
| desc: null | |
| value: null | |
| forced_eos_token_id: | |
| desc: null | |
| value: null | |
| fp16: | |
| desc: null | |
| value: true | |
| fp16_backend: | |
| desc: null | |
| value: auto | |
| fp16_full_eval: | |
| desc: null | |
| value: false | |
| fp16_opt_level: | |
| desc: null | |
| value: O1 | |
| gradient_accumulation_steps: | |
| desc: null | |
| value: 1 | |
| gradient_checkpointing: | |
| desc: null | |
| value: true | |
| greater_is_better: | |
| desc: null | |
| value: false | |
| group_by_length: | |
| desc: null | |
| value: true | |
| half_precision_backend: | |
| desc: null | |
| value: amp | |
| hidden_act: | |
| desc: null | |
| value: gelu | |
| hidden_dropout: | |
| desc: null | |
| value: 0.0 | |
| hidden_size: | |
| desc: null | |
| value: 1024 | |
| hub_model_id: | |
| desc: null | |
| value: None | |
| hub_strategy: | |
| desc: null | |
| value: every_save | |
| hub_token: | |
| desc: null | |
| value: <HUB_TOKEN> | |
| id2label: | |
| desc: null | |
| value: | |
| '0': LABEL_0 | |
| '1': LABEL_1 | |
| ignore_data_skip: | |
| desc: null | |
| value: false | |
| initializer_range: | |
| desc: null | |
| value: 0.02 | |
| intermediate_size: | |
| desc: null | |
| value: 4096 | |
| is_decoder: | |
| desc: null | |
| value: false | |
| is_encoder_decoder: | |
| desc: null | |
| value: false | |
| label2id: | |
| desc: null | |
| value: | |
| LABEL_0: 0 | |
| LABEL_1: 1 | |
| label_names: | |
| desc: null | |
| value: None | |
| label_smoothing_factor: | |
| desc: null | |
| value: 0.0 | |
| layer_norm_eps: | |
| desc: null | |
| value: 1.0e-05 | |
| layerdrop: | |
| desc: null | |
| value: 0.0 | |
| learning_rate: | |
| desc: null | |
| value: 0.0002 | |
| length_column_name: | |
| desc: null | |
| value: input_length | |
| length_penalty: | |
| desc: null | |
| value: 1.0 | |
| load_best_model_at_end: | |
| desc: null | |
| value: true | |
| local_rank: | |
| desc: null | |
| value: -1 | |
| log_level: | |
| desc: null | |
| value: -1 | |
| log_level_replica: | |
| desc: null | |
| value: -1 | |
| log_on_each_node: | |
| desc: null | |
| value: true | |
| logging_dir: | |
| desc: null | |
| value: ./runs/Jan30_17-53-36_job-1abccd0a-3293-4ffe-8274-9e8f841f653f | |
| logging_first_step: | |
| desc: null | |
| value: false | |
| logging_nan_inf_filter: | |
| desc: null | |
| value: true | |
| logging_steps: | |
| desc: null | |
| value: 5 | |
| logging_strategy: | |
| desc: null | |
| value: steps | |
| lr_scheduler_type: | |
| desc: null | |
| value: linear | |
| mask_feature_length: | |
| desc: null | |
| value: 10 | |
| mask_feature_min_masks: | |
| desc: null | |
| value: 0 | |
| mask_feature_prob: | |
| desc: null | |
| value: 0.25 | |
| mask_time_length: | |
| desc: null | |
| value: 10 | |
| mask_time_min_masks: | |
| desc: null | |
| value: 2 | |
| mask_time_prob: | |
| desc: null | |
| value: 0.6 | |
| max_grad_norm: | |
| desc: null | |
| value: 1.0 | |
| max_length: | |
| desc: null | |
| value: 20 | |
| max_steps: | |
| desc: null | |
| value: -1 | |
| metric_for_best_model: | |
| desc: null | |
| value: loss | |
| min_length: | |
| desc: null | |
| value: 0 | |
| model_type: | |
| desc: null | |
| value: wav2vec2 | |
| mp_parameters: | |
| desc: null | |
| value: '' | |
| no_cuda: | |
| desc: null | |
| value: false | |
| no_repeat_ngram_size: | |
| desc: null | |
| value: 0 | |
| num_adapter_layers: | |
| desc: null | |
| value: 3 | |
| num_attention_heads: | |
| desc: null | |
| value: 16 | |
| num_beam_groups: | |
| desc: null | |
| value: 1 | |
| num_beams: | |
| desc: null | |
| value: 1 | |
| num_codevector_groups: | |
| desc: null | |
| value: 2 | |
| num_codevectors_per_group: | |
| desc: null | |
| value: 320 | |
| num_conv_pos_embedding_groups: | |
| desc: null | |
| value: 16 | |
| num_conv_pos_embeddings: | |
| desc: null | |
| value: 128 | |
| num_feat_extract_layers: | |
| desc: null | |
| value: 7 | |
| num_hidden_layers: | |
| desc: null | |
| value: 24 | |
| num_negatives: | |
| desc: null | |
| value: 100 | |
| num_return_sequences: | |
| desc: null | |
| value: 1 | |
| num_train_epochs: | |
| desc: null | |
| value: 4.0 | |
| optim: | |
| desc: null | |
| value: adamw_hf | |
| output_attentions: | |
| desc: null | |
| value: false | |
| output_dir: | |
| desc: null | |
| value: ./ | |
| output_hidden_size: | |
| desc: null | |
| value: 1024 | |
| output_hidden_states: | |
| desc: null | |
| value: false | |
| output_scores: | |
| desc: null | |
| value: false | |
| overwrite_output_dir: | |
| desc: null | |
| value: true | |
| pad_token_id: | |
| desc: null | |
| value: 218 | |
| past_index: | |
| desc: null | |
| value: -1 | |
| per_device_eval_batch_size: | |
| desc: null | |
| value: 64 | |
| per_device_train_batch_size: | |
| desc: null | |
| value: 64 | |
| per_gpu_eval_batch_size: | |
| desc: null | |
| value: None | |
| per_gpu_train_batch_size: | |
| desc: null | |
| value: None | |
| prediction_loss_only: | |
| desc: null | |
| value: false | |
| prefix: | |
| desc: null | |
| value: null | |
| problem_type: | |
| desc: null | |
| value: null | |
| proj_codevector_dim: | |
| desc: null | |
| value: 768 | |
| pruned_heads: | |
| desc: null | |
| value: {} | |
| push_to_hub: | |
| desc: null | |
| value: true | |
| push_to_hub_model_id: | |
| desc: null | |
| value: None | |
| push_to_hub_organization: | |
| desc: null | |
| value: None | |
| push_to_hub_token: | |
| desc: null | |
| value: <PUSH_TO_HUB_TOKEN> | |
| remove_invalid_values: | |
| desc: null | |
| value: false | |
| remove_unused_columns: | |
| desc: null | |
| value: true | |
| repetition_penalty: | |
| desc: null | |
| value: 1.0 | |
| report_to: | |
| desc: null | |
| value: '[''wandb'']' | |
| resume_from_checkpoint: | |
| desc: null | |
| value: None | |
| return_dict: | |
| desc: null | |
| value: true | |
| return_dict_in_generate: | |
| desc: null | |
| value: false | |
| run_name: | |
| desc: null | |
| value: xls-r-300m-fr | |
| save_on_each_node: | |
| desc: null | |
| value: false | |
| save_steps: | |
| desc: null | |
| value: 500 | |
| save_strategy: | |
| desc: null | |
| value: steps | |
| save_total_limit: | |
| desc: null | |
| value: 20 | |
| seed: | |
| desc: null | |
| value: 42 | |
| sep_token_id: | |
| desc: null | |
| value: null | |
| sharded_ddp: | |
| desc: null | |
| value: '[]' | |
| skip_memory_metrics: | |
| desc: null | |
| value: true | |
| task_specific_params: | |
| desc: null | |
| value: null | |
| tdnn_dilation: | |
| desc: null | |
| value: | |
| - 1 | |
| - 2 | |
| - 3 | |
| - 1 | |
| - 1 | |
| tdnn_dim: | |
| desc: null | |
| value: | |
| - 512 | |
| - 512 | |
| - 512 | |
| - 512 | |
| - 1500 | |
| tdnn_kernel: | |
| desc: null | |
| value: | |
| - 5 | |
| - 3 | |
| - 3 | |
| - 1 | |
| - 1 | |
| temperature: | |
| desc: null | |
| value: 1.0 | |
| tf32: | |
| desc: null | |
| value: None | |
| tie_encoder_decoder: | |
| desc: null | |
| value: false | |
| tie_word_embeddings: | |
| desc: null | |
| value: true | |
| tokenizer_class: | |
| desc: null | |
| value: null | |
| top_k: | |
| desc: null | |
| value: 50 | |
| top_p: | |
| desc: null | |
| value: 1.0 | |
| torch_dtype: | |
| desc: null | |
| value: float32 | |
| torchscript: | |
| desc: null | |
| value: false | |
| tpu_metrics_debug: | |
| desc: null | |
| value: false | |
| tpu_num_cores: | |
| desc: null | |
| value: None | |
| train_batch_size: | |
| desc: null | |
| value: 64 | |
| transformers_version: | |
| desc: null | |
| value: 4.17.0.dev0 | |
| use_bfloat16: | |
| desc: null | |
| value: false | |
| use_legacy_prediction_loop: | |
| desc: null | |
| value: false | |
| use_weighted_layer_sum: | |
| desc: null | |
| value: false | |
| vocab_size: | |
| desc: null | |
| value: 219 | |
| warmup_ratio: | |
| desc: null | |
| value: 0.0 | |
| warmup_steps: | |
| desc: null | |
| value: 2500 | |
| weight_decay: | |
| desc: null | |
| value: 0.0 | |
| xpu_backend: | |
| desc: null | |
| value: None | |
| xvector_output_dim: | |
| desc: null | |
| value: 512 | |