kashif's picture
kashif HF Staff
Upload 4 files
074b52e verified
{
"pretrained_model_name_or_path": "Qwen/Qwen3-0.6B",
"output_dir": "./outputs",
"seed": 42,
"tf32": true,
"model_type": "causal",
"vocab_size": 4096,
"hidden_size": 256,
"num_hidden_layers": 8,
"num_attention_heads": 8,
"context_length": 256,
"prediction_length": 64,
"tokenizer_class": "NonUniformBins",
"binning_power": 2.0,
"exponential_base": 1.01,
"n_special_tokens": 2,
"pad_token_id": 0,
"eos_token_id": 1,
"use_eos_token": true,
"min_past": 64,
"drop_prob": 0.1,
"shuffle_buffer_length": 20000,
"per_device_train_batch_size": 32,
"learning_rate": 0.0001,
"max_steps": 40000,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine",
"optim": "adamw_torch",
"gradient_accumulation_steps": 4,
"log_steps": 20,
"save_steps": 100,
"dataloader_num_workers": 8,
"torch_compile": true,
"logger": "<Logger __main__ (INFO)>",
"total_train_batch_size": 128,
"short_model_name": "Qwen3-0.6B",
"run_name": "Qwen3-0.6B-h256-l8-a8-ctx256-pred64-vocab4096-NonUniform-lr1.0e-04-bs128-steps40000"
}