|
|
[2025-10-10 13:10:41,462] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:24741] baseline 0.000GB (+0.000GB allocated, +0.002GB reserved) |
|
|
[2025-10-10 13:10:41,462] [INFO] [axolotl.cli.config.load_cfg:248] [PID:24741] config: |
|
|
{ |
|
|
"activation_offloading": false, |
|
|
"adapter": "qlora", |
|
|
"axolotl_config_path": "config.yaml", |
|
|
"base_model": "nferruz/ProtGPT2", |
|
|
"base_model_config": "nferruz/ProtGPT2", |
|
|
"batch_size": 2, |
|
|
"bf16": false, |
|
|
"capabilities": { |
|
|
"bf16": true, |
|
|
"compute_capability": "sm_75", |
|
|
"fp8": false, |
|
|
"n_gpu": 1, |
|
|
"n_node": 1 |
|
|
}, |
|
|
"context_parallel_size": 1, |
|
|
"dataloader_num_workers": 1, |
|
|
"dataloader_pin_memory": true, |
|
|
"dataloader_prefetch_factor": 256, |
|
|
"dataset_processes": 2, |
|
|
"datasets": [ |
|
|
{ |
|
|
"ds_type": "json", |
|
|
"message_property_mappings": { |
|
|
"content": "content", |
|
|
"role": "role" |
|
|
}, |
|
|
"path": "/content/sequences_tokenized.jsonl", |
|
|
"trust_remote_code": false |
|
|
} |
|
|
], |
|
|
"ddp": false, |
|
|
"device": "cuda:0", |
|
|
"dion_rank_fraction": 1.0, |
|
|
"dion_rank_multiple_of": 1, |
|
|
"env_capabilities": { |
|
|
"torch_version": "2.8.0" |
|
|
}, |
|
|
"eval_batch_size": 2, |
|
|
"eval_causal_lm_metrics": [ |
|
|
"sacrebleu", |
|
|
"comet", |
|
|
"ter", |
|
|
"chrf" |
|
|
], |
|
|
"eval_max_new_tokens": 128, |
|
|
"eval_sample_packing": true, |
|
|
"eval_steps": 0.01, |
|
|
"eval_table_size": 0, |
|
|
"experimental_skip_move_to_device": true, |
|
|
"fp16": true, |
|
|
"gradient_accumulation_steps": 1, |
|
|
"gradient_checkpointing": true, |
|
|
"gradient_checkpointing_kwargs": { |
|
|
"use_reentrant": true |
|
|
}, |
|
|
"group_by_length": false, |
|
|
"hub_model_id": "ProtGPT2-Oxido", |
|
|
"include_tkps": true, |
|
|
"is_falcon_derived_model": false, |
|
|
"is_llama_derived_model": false, |
|
|
"is_mistral_derived_model": false, |
|
|
"learning_rate": 0.002, |
|
|
"lisa_layers_attribute": "model.layers", |
|
|
"load_best_model_at_end": false, |
|
|
"load_in_4bit": true, |
|
|
"load_in_8bit": false, |
|
|
"local_rank": 0, |
|
|
"logging_steps": 100, |
|
|
"lora_alpha": 16, |
|
|
"lora_dropout": 0.05, |
|
|
"lora_r": 32, |
|
|
"lora_target_linear": true, |
|
|
"loraplus_lr_embedding": 1e-06, |
|
|
"lr_scheduler": "cosine", |
|
|
"mean_resizing_embeddings": false, |
|
|
"micro_batch_size": 2, |
|
|
"model_config_type": "gpt2", |
|
|
"num_epochs": 3.0, |
|
|
"optimizer": "paged_adamw_32bit", |
|
|
"output_dir": "./qlora-out", |
|
|
"pad_to_sequence_len": true, |
|
|
"pretrain_multipack_attn": true, |
|
|
"profiler_steps_start": 0, |
|
|
"qlora_sharded_model_loading": false, |
|
|
"ray_num_workers": 1, |
|
|
"resources_per_worker": { |
|
|
"GPU": 1 |
|
|
}, |
|
|
"sample_packing": true, |
|
|
"sample_packing_bin_size": 200, |
|
|
"sample_packing_group_size": 100000, |
|
|
"save_only_model": false, |
|
|
"save_safetensors": true, |
|
|
"save_strategy": "epoch", |
|
|
"sequence_len": 2048, |
|
|
"shuffle_before_merging_datasets": false, |
|
|
"shuffle_merged_datasets": true, |
|
|
"skip_prepare_dataset": false, |
|
|
"special_tokens": { |
|
|
"eos_token": "<|endoftext|>", |
|
|
"pad_token": "<|endoftext|>" |
|
|
}, |
|
|
"streaming_multipack_buffer_size": 10000, |
|
|
"strict": false, |
|
|
"tensor_parallel_size": 1, |
|
|
"tf32": false, |
|
|
"tiled_mlp_use_original_mlp": true, |
|
|
"tokenizer_config": "nferruz/ProtGPT2", |
|
|
"tokenizer_save_jinja_files": true, |
|
|
"tokenizer_type": "AutoTokenizer", |
|
|
"torch_dtype": "torch.float16", |
|
|
"train_on_inputs": false, |
|
|
"trl": { |
|
|
"log_completions": false, |
|
|
"mask_truncated_completions": false, |
|
|
"ref_model_mixup_alpha": 0.9, |
|
|
"ref_model_sync_steps": 64, |
|
|
"scale_rewards": true, |
|
|
"sync_ref_model": false, |
|
|
"use_vllm": false, |
|
|
"vllm_server_host": "0.0.0.0", |
|
|
"vllm_server_port": 8000 |
|
|
}, |
|
|
"type_of_model": "AutoModelForCausalLM", |
|
|
"use_ray": false, |
|
|
"val_set_size": 0.02, |
|
|
"vllm": { |
|
|
"device": "auto", |
|
|
"dtype": "auto", |
|
|
"gpu_memory_utilization": 0.9, |
|
|
"host": "0.0.0.0", |
|
|
"port": 8000 |
|
|
}, |
|
|
"warmup_steps": 100, |
|
|
"weight_decay": 0.0, |
|
|
"world_size": 1 |
|
|
} |
|
|
[2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:278] [PID:24741] EOS: 0 / <|endoftext|> |
|
|
[2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:279] [PID:24741] BOS: 0 / <|endoftext|> |
|
|
[2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:280] [PID:24741] PAD: 0 / <|endoftext|> |
|
|
[2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:281] [PID:24741] UNK: 0 / <|endoftext|> |
|
|
[2025-10-10 13:10:42,465] [INFO] [axolotl.loaders.tokenizer.load_tokenizer:295] [PID:24741] No Chat template selected. Consider adding a chat template for easier inference. |
|
|
[2025-10-10 13:10:42,466] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:476] [PID:24741] Unable to find prepared dataset in last_run_prepared/120d8e2ed44f3c537dc9a20773f86561 |
|
|
[2025-10-10 13:10:42,466] [INFO] [axolotl.utils.data.sft._load_raw_datasets:320] [PID:24741] Loading raw datasets... |
|
|
[2025-10-10 13:10:42,466] [WARNING] [axolotl.utils.data.sft._load_raw_datasets:322] [PID:24741] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset using `axolotl preprocess path/to/config.yml`. |
|
|
[2025-10-10 13:10:42,802] [INFO] [axolotl.utils.data.wrappers.get_dataset_wrapper:87] [PID:24741] Loading dataset: /content/sequences_tokenized.jsonl with base_type: None and prompt_style: None |
|
|
[2025-10-10 13:10:42,820] [INFO] [axolotl.utils.data.utils.handle_long_seq_in_dataset:218] [PID:24741] min_input_len: 6 |
|
|
[2025-10-10 13:10:42,821] [INFO] [axolotl.utils.data.utils.handle_long_seq_in_dataset:220] [PID:24741] max_input_len: 512 |
|
|
Dropping Long Sequences (>2048) (num_proc=2): 0% 0/6304 [00:00<?, ? examples/s]
Dropping Long Sequences (>2048) (num_proc=2): 16% 1000/6304 [00:00<00:01, 2728.50 examples/s]
Dropping Long Sequences (>2048) (num_proc=2): 48% 3000/6304 [00:00<00:00, 5756.81 examples/s]
Dropping Long Sequences (>2048) (num_proc=2): 79% 5000/6304 [00:00<00:00, 7979.32 examples/s]
Dropping Long Sequences (>2048) (num_proc=2): 100% 6304/6304 [00:00<00:00, 6859.52 examples/s] |
|
|
Drop Samples with Zero Trainable Tokens (num_proc=2): 0% 0/6304 [00:00<?, ? examples/s]
Drop Samples with Zero Trainable Tokens (num_proc=2): 16% 1000/6304 [00:00<00:01, 3093.94 examples/s]
Drop Samples with Zero Trainable Tokens (num_proc=2): 48% 3000/6304 [00:00<00:00, 7754.71 examples/s]
Drop Samples with Zero Trainable Tokens (num_proc=2): 79% 5000/6304 [00:00<00:00, 11055.90 examples/s]
Drop Samples with Zero Trainable Tokens (num_proc=2): 100% 6304/6304 [00:00<00:00, 8853.66 examples/s] |
|
|
Add position_id column (Sample Packing) (num_proc=2): 0% 0/6304 [00:00<?, ? examples/s]
Add position_id column (Sample Packing) (num_proc=2): 16% 1000/6304 [00:00<00:01, 3374.43 examples/s]
Add position_id column (Sample Packing) (num_proc=2): 48% 3000/6304 [00:00<00:00, 6482.08 examples/s]
Add position_id column (Sample Packing) (num_proc=2): 79% 5000/6304 [00:00<00:00, 7737.71 examples/s]
Add position_id column (Sample Packing) (num_proc=2): 100% 6304/6304 [00:00<00:00, 6958.78 examples/s] |
|
|
Saving the dataset (0/2 shards): 0% 0/6304 [00:00<?, ? examples/s]
Saving the dataset (0/2 shards): 50% 3152/6304 [00:00<00:00, 29151.65 examples/s]
Saving the dataset (1/2 shards): 100% 6304/6304 [00:00<00:00, 29151.65 examples/s]
Saving the dataset (2/2 shards): 100% 6304/6304 [00:00<00:00, 29151.65 examples/s]
Saving the dataset (2/2 shards): 100% 6304/6304 [00:00<00:00, 33750.47 examples/s] |
|
|
[2025-10-10 13:10:45,620] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:406] [PID:24741] total_num_tokens: 16_570 |
|
|
[2025-10-10 13:10:45,622] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:424] [PID:24741] `total_supervised_tokens: 16_570` |
|
|
[2025-10-10 13:10:48,083] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9218025207519531 |
|
|
[2025-10-10 13:10:49,020] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9363107681274414 |
|
|
[2025-10-10 13:10:49,934] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9140019416809082 |
|
|
[2025-10-10 13:10:50,862] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9271283149719238 |
|
|
[2025-10-10 13:10:50,882] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
[2025-10-10 13:10:50,882] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:483] [PID:24741] data_loader_len: 4 |
|
|
[2025-10-10 13:10:50,883] [INFO] [axolotl.utils.trainer.calc_sample_packing_eff_est:499] [PID:24741] sample_packing_eff_est across ranks: [0.8989800347222222] |
|
|
[2025-10-10 13:10:50,883] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:511] [PID:24741] sample_packing_eff_est: None |
|
|
[2025-10-10 13:10:50,883] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:522] [PID:24741] total_num_steps: 12 |
|
|
[2025-10-10 13:10:50,893] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:406] [PID:24741] total_num_tokens: 746_874 |
|
|
[2025-10-10 13:10:50,932] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:424] [PID:24741] `total_supervised_tokens: 746_874` |
|
|
[2025-10-10 13:10:52,871] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9358129501342773 |
|
|
[2025-10-10 13:10:53,781] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9106135368347168 |
|
|
[2025-10-10 13:10:55,014] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2319858074188232 |
|
|
[2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2722358703613281 |
|
|
[2025-10-10 13:10:56,287] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [183] |
|
|
[2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:483] [PID:24741] data_loader_len: 183 |
|
|
[2025-10-10 13:10:56,287] [INFO] [axolotl.utils.trainer.calc_sample_packing_eff_est:499] [PID:24741] sample_packing_eff_est across ranks: [0.9936909272820164] |
|
|
[2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:511] [PID:24741] sample_packing_eff_est: 1.0 |
|
|
[2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:522] [PID:24741] total_num_steps: 549 |
|
|
[2025-10-10 13:10:56,287] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:24741] Maximum number of steps set at 549 |
|
|
[2025-10-10 13:10:56,297] [DEBUG] [axolotl.train.setup_model_and_tokenizer:65] [PID:24741] Loading tokenizer... nferruz/ProtGPT2 |
|
|
[2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:278] [PID:24741] EOS: 0 / <|endoftext|> |
|
|
[2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:279] [PID:24741] BOS: 0 / <|endoftext|> |
|
|
[2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:280] [PID:24741] PAD: 0 / <|endoftext|> |
|
|
[2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:281] [PID:24741] UNK: 0 / <|endoftext|> |
|
|
[2025-10-10 13:10:57,214] [INFO] [axolotl.loaders.tokenizer.load_tokenizer:295] [PID:24741] No Chat template selected. Consider adding a chat template for easier inference. |
|
|
[2025-10-10 13:10:57,215] [DEBUG] [axolotl.train.setup_model_and_tokenizer:74] [PID:24741] Loading model |
|
|
[2025-10-10 13:10:57,333] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:24741] Patched Trainer.evaluation_loop with nanmean loss calculation |
|
|
[2025-10-10 13:10:57,334] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:24741] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation |
|
|
[2025-10-10 13:10:57,335] [INFO] [axolotl.loaders.patch_manager._apply_multipack_patches:301] [PID:24741] Applying multipack dataloader patch for sample packing... |
|
|
[2025-10-10 13:11:27,461] [WARNING] [axolotl.loaders.model._adjust_model_config:273] [PID:24741] increasing model.config.max_position_embeddings from 1024 to 2048 |
|
|
[2025-10-10 13:11:27,467] [INFO] [axolotl.loaders.model._prepare_model_for_quantization:863] [PID:24741] converting PEFT model w/ prepare_model_for_kbit_training |
|
|
[2025-10-10 13:11:27,479] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:345] [PID:24741] Converting modules to torch.float16 |
|
|
[2025-10-10 13:11:27,481] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:24741] Memory usage after model load 0.849GB (+0.849GB allocated, +0.918GB reserved) |
|
|
[2025-10-10 13:11:27,482] [INFO] [axolotl.loaders.adapter.load_lora:80] [PID:24741] found linear modules: ['c_attn', 'c_fc', 'c_proj'] |
|
|
trainable params: 23,592,960 || all params: 797,623,040 || trainable%: 2.9579 |
|
|
[2025-10-10 13:11:27,888] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:24741] after adapters 0.618GB (+0.618GB allocated, +1.012GB reserved) |
|
|
[2025-10-10 13:11:39,738] [INFO] [axolotl.train.save_initial_configs:398] [PID:24741] Pre-saving adapter config to ./qlora-out... |
|
|
[2025-10-10 13:11:39,738] [INFO] [axolotl.train.save_initial_configs:402] [PID:24741] Pre-saving tokenizer to ./qlora-out... |
|
|
[2025-10-10 13:11:39,828] [INFO] [axolotl.train.save_initial_configs:407] [PID:24741] Pre-saving model config to ./qlora-out... |
|
|
[2025-10-10 13:11:39,836] [INFO] [axolotl.train.execute_training:196] [PID:24741] Starting trainer... |
|
|
[2025-10-10 13:11:45,415] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8461699485778809 |
|
|
[2025-10-10 13:11:47,771] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 2.3554582595825195 |
|
|
[2025-10-10 13:11:49,329] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5573019981384277 |
|
|
[2025-10-10 13:11:51,006] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6764421463012695 |
|
|
[2025-10-10 13:11:51,006] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [183] |
|
|
0% 0/549 [00:00<?, ?it/s][2025-10-10 13:11:51,161] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:11:54,146] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2840440273284912 |
|
|
[2025-10-10 13:11:55,365] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.218794584274292 |
|
|
[2025-10-10 13:11:56,599] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2343621253967285 |
|
|
[2025-10-10 13:11:58,099] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4991233348846436 |
|
|
[2025-10-10 13:11:58,099] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:00<00:00, 3.33it/s][A |
|
|
75% 3/4 [00:01<00:00, 1.68it/s][A |
|
|
100% 4/4 [00:02<00:00, 1.30it/s][A
|
|
|
[A{'eval_loss': 8.67106819152832, 'eval_runtime': 7.1874, 'eval_samples_per_second': 17.67, 'eval_steps_per_second': 8.904, 'memory/max_active (GiB)': 2.61, 'memory/max_allocated (GiB)': 2.61, 'memory/device_reserved (GiB)': 3.21, 'epoch': 0} |
|
|
0% 0/549 [00:14<?, ?it/s] |
|
|
100% 4/4 [00:03<00:00, 1.30it/s][A |
|
|
[A
0% 1/549 [00:21<3:16:49, 21.55s/it]
0% 2/549 [00:25<1:40:35, 11.03s/it]
1% 3/549 [00:28<1:09:56, 7.69s/it]
1% 4/549 [00:32<55:34, 6.12s/it]
1% 5/549 [00:36<47:37, 5.25s/it]
1% 6/549 [00:40<42:50, 4.73s/it][2025-10-10 13:12:31,245] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:12:33,921] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.245626449584961 |
|
|
[2025-10-10 13:12:35,290] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3684089183807373 |
|
|
[2025-10-10 13:12:37,029] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.738837718963623 |
|
|
[2025-10-10 13:12:38,274] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2442030906677246 |
|
|
[2025-10-10 13:12:38,274] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.90it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.36it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.16it/s][A
|
|
|
[A{'eval_loss': 8.665609359741211, 'eval_runtime': 5.6196, 'eval_samples_per_second': 22.599, 'eval_steps_per_second': 11.389, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.03} |
|
|
1% 6/549 [00:52<42:50, 4.73s/it] |
|
|
100% 4/4 [00:03<00:00, 1.16it/s][A |
|
|
[A
1% 7/549 [00:56<1:17:17, 8.56s/it]
1% 8/549 [01:00<1:03:22, 7.03s/it]
2% 9/549 [01:04<54:04, 6.01s/it]
2% 10/549 [01:07<47:47, 5.32s/it]
2% 11/549 [01:11<43:30, 4.85s/it]
2% 12/549 [01:15<40:33, 4.53s/it][2025-10-10 13:13:06,562] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:13:09,050] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2255823612213135 |
|
|
[2025-10-10 13:13:10,291] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.240588665008545 |
|
|
[2025-10-10 13:13:11,581] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2900962829589844 |
|
|
[2025-10-10 13:13:13,335] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.753103256225586 |
|
|
[2025-10-10 13:13:13,335] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.86it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.34it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.14it/s][A
|
|
|
[A{'eval_loss': 8.614709854125977, 'eval_runtime': 5.8541, 'eval_samples_per_second': 21.694, 'eval_steps_per_second': 10.932, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.07} |
|
|
2% 12/549 [01:28<40:33, 4.53s/it] |
|
|
100% 4/4 [00:03<00:00, 1.14it/s][A |
|
|
[A
2% 13/549 [01:31<1:12:45, 8.14s/it]
3% 14/549 [01:35<1:00:59, 6.84s/it]
3% 15/549 [01:39<52:49, 5.93s/it]
3% 16/549 [01:43<47:08, 5.31s/it]
3% 17/549 [01:47<43:11, 4.87s/it]
3% 18/549 [01:51<40:28, 4.57s/it][2025-10-10 13:13:42,268] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:13:44,777] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.233976125717163 |
|
|
[2025-10-10 13:13:45,991] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2140934467315674 |
|
|
[2025-10-10 13:13:47,230] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2387192249298096 |
|
|
[2025-10-10 13:13:48,517] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2868869304656982 |
|
|
[2025-10-10 13:13:48,517] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.83it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 8.517555236816406, 'eval_runtime': 6.4388, 'eval_samples_per_second': 19.724, 'eval_steps_per_second': 9.94, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.1} |
|
|
3% 18/549 [02:03<40:28, 4.57s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
3% 19/549 [02:07<1:12:15, 8.18s/it]
4% 20/549 [02:11<1:00:54, 6.91s/it]
4% 21/549 [02:15<52:54, 6.01s/it]
4% 22/549 [02:19<47:19, 5.39s/it]
4% 23/549 [02:23<43:26, 4.96s/it]
4% 24/549 [02:27<40:38, 4.64s/it][2025-10-10 13:14:18,514] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:14:20,985] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2237637042999268 |
|
|
[2025-10-10 13:14:22,224] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2387983798980713 |
|
|
[2025-10-10 13:14:23,447] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2227163314819336 |
|
|
[2025-10-10 13:14:24,682] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2346465587615967 |
|
|
[2025-10-10 13:14:24,682] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 8.410308837890625, 'eval_runtime': 6.0594, 'eval_samples_per_second': 20.959, 'eval_steps_per_second': 10.562, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.13} |
|
|
4% 24/549 [02:39<40:38, 4.64s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
5% 25/549 [02:43<1:10:34, 8.08s/it]
5% 26/549 [02:47<59:22, 6.81s/it]
5% 27/549 [02:51<51:30, 5.92s/it]
5% 28/549 [02:54<45:59, 5.30s/it]
5% 29/549 [02:58<42:11, 4.87s/it]
5% 30/549 [03:02<39:29, 4.57s/it][2025-10-10 13:14:53,875] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:14:56,364] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.229734182357788 |
|
|
[2025-10-10 13:14:57,586] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2214250564575195 |
|
|
[2025-10-10 13:14:58,806] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.219433307647705 |
|
|
[2025-10-10 13:15:00,136] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.329803705215454 |
|
|
[2025-10-10 13:15:00,136] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 8.288145065307617, 'eval_runtime': 5.7346, 'eval_samples_per_second': 22.146, 'eval_steps_per_second': 11.16, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.16} |
|
|
5% 30/549 [03:14<39:29, 4.57s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
6% 31/549 [03:18<1:08:40, 7.95s/it]
6% 32/549 [03:22<57:59, 6.73s/it]
6% 33/549 [03:26<50:32, 5.88s/it]
6% 34/549 [03:30<45:20, 5.28s/it]
6% 35/549 [03:34<41:45, 4.87s/it]
7% 36/549 [03:38<39:13, 4.59s/it][2025-10-10 13:15:29,235] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:15:31,722] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.212110996246338 |
|
|
[2025-10-10 13:15:32,954] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.231447458267212 |
|
|
[2025-10-10 13:15:34,205] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2508065700531006 |
|
|
[2025-10-10 13:15:35,438] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2330925464630127 |
|
|
[2025-10-10 13:15:35,439] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A
|
|
|
[A{'eval_loss': 8.149022102355957, 'eval_runtime': 5.7853, 'eval_samples_per_second': 21.952, 'eval_steps_per_second': 11.062, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.2} |
|
|
7% 36/549 [03:50<39:13, 4.59s/it] |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A |
|
|
[A
7% 37/549 [03:53<1:08:00, 7.97s/it]
7% 38/549 [03:57<57:23, 6.74s/it]
7% 39/549 [04:01<50:02, 5.89s/it]
7% 40/549 [04:05<44:51, 5.29s/it]
7% 41/549 [04:09<41:10, 4.86s/it]
8% 42/549 [04:13<38:40, 4.58s/it][2025-10-10 13:16:04,533] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:16:07,240] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2179176807403564 |
|
|
[2025-10-10 13:16:08,479] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.239612340927124 |
|
|
[2025-10-10 13:16:09,694] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2147700786590576 |
|
|
[2025-10-10 13:16:10,944] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2497029304504395 |
|
|
[2025-10-10 13:16:10,945] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A
|
|
|
[A{'eval_loss': 7.923487663269043, 'eval_runtime': 5.7808, 'eval_samples_per_second': 21.969, 'eval_steps_per_second': 11.071, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.23} |
|
|
8% 42/549 [04:25<38:40, 4.58s/it] |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A |
|
|
[A
8% 43/549 [04:29<1:07:37, 8.02s/it]
8% 44/549 [04:33<56:57, 6.77s/it]
8% 45/549 [04:37<49:35, 5.90s/it]
8% 46/549 [04:41<44:25, 5.30s/it]
9% 47/549 [04:44<40:49, 4.88s/it]
9% 48/549 [04:48<38:22, 4.60s/it][2025-10-10 13:16:40,037] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:16:43,053] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2188963890075684 |
|
|
[2025-10-10 13:16:44,293] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2395522594451904 |
|
|
[2025-10-10 13:16:45,513] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.21950364112854 |
|
|
[2025-10-10 13:16:46,763] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2497367858886719 |
|
|
[2025-10-10 13:16:46,763] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 7.707249641418457, 'eval_runtime': 5.7224, 'eval_samples_per_second': 22.194, 'eval_steps_per_second': 11.184, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.26} |
|
|
9% 48/549 [05:01<38:22, 4.60s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
9% 49/549 [05:05<1:07:36, 8.11s/it]
9% 50/549 [05:09<56:51, 6.84s/it]
9% 51/549 [05:12<49:28, 5.96s/it]
9% 52/549 [05:16<44:15, 5.34s/it]
10% 53/549 [05:20<40:31, 4.90s/it]
10% 54/549 [05:24<36:22, 4.41s/it][2025-10-10 13:17:15,168] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:17:18,275] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8323063850402832 |
|
|
[2025-10-10 13:17:19,609] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3336091041564941 |
|
|
[2025-10-10 13:17:20,827] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2181472778320312 |
|
|
[2025-10-10 13:17:22,034] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2064990997314453 |
|
|
[2025-10-10 13:17:22,034] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 7.54141092300415, 'eval_runtime': 5.7423, 'eval_samples_per_second': 22.117, 'eval_steps_per_second': 11.145, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.3} |
|
|
10% 54/549 [05:36<36:22, 4.41s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
10% 55/549 [05:40<1:06:03, 8.02s/it]
10% 56/549 [05:44<55:34, 6.76s/it]
10% 57/549 [05:48<48:18, 5.89s/it]
11% 58/549 [05:52<43:15, 5.29s/it]
11% 59/549 [05:55<39:46, 4.87s/it]
11% 60/549 [05:59<37:18, 4.58s/it][2025-10-10 13:17:50,975] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:17:53,534] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2939870357513428 |
|
|
[2025-10-10 13:17:55,286] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7515311241149902 |
|
|
[2025-10-10 13:17:56,595] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3087666034698486 |
|
|
[2025-10-10 13:17:58,392] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7966506481170654 |
|
|
[2025-10-10 13:17:58,392] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 7.278426647186279, 'eval_runtime': 6.4276, 'eval_samples_per_second': 19.758, 'eval_steps_per_second': 9.957, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.33} |
|
|
11% 60/549 [06:13<37:18, 4.58s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
11% 61/549 [06:17<1:09:20, 8.52s/it]
11% 62/549 [06:21<57:52, 7.13s/it]
11% 63/549 [06:25<49:53, 6.16s/it]
12% 64/549 [06:29<44:24, 5.49s/it]
12% 65/549 [06:33<40:32, 5.03s/it]
12% 66/549 [06:37<37:50, 4.70s/it][2025-10-10 13:18:28,296] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:18:33,461] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 2.976652145385742 |
|
|
[2025-10-10 13:18:35,243] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7817871570587158 |
|
|
[2025-10-10 13:18:36,478] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2347698211669922 |
|
|
[2025-10-10 13:18:37,729] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2507171630859375 |
|
|
[2025-10-10 13:18:37,729] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.80it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.31it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A
|
|
|
[A{'eval_loss': 7.130417823791504, 'eval_runtime': 6.5516, 'eval_samples_per_second': 19.385, 'eval_steps_per_second': 9.769, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.36} |
|
|
12% 66/549 [06:53<37:50, 4.70s/it] |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A |
|
|
[A
12% 67/549 [06:57<1:14:42, 9.30s/it]
12% 68/549 [07:01<1:01:33, 7.68s/it]
13% 69/549 [07:04<52:22, 6.55s/it]
13% 70/549 [07:08<45:55, 5.75s/it]
13% 71/549 [07:12<41:24, 5.20s/it]
13% 72/549 [07:16<38:15, 4.81s/it][2025-10-10 13:19:07,848] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:19:10,905] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.743417501449585 |
|
|
[2025-10-10 13:19:12,274] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3681507110595703 |
|
|
[2025-10-10 13:19:13,507] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.232816457748413 |
|
|
[2025-10-10 13:19:14,734] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2264103889465332 |
|
|
[2025-10-10 13:19:14,734] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 6.794009685516357, 'eval_runtime': 5.7504, 'eval_samples_per_second': 22.086, 'eval_steps_per_second': 11.13, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.39} |
|
|
13% 72/549 [07:29<38:15, 4.81s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
13% 73/549 [07:33<1:05:59, 8.32s/it]
13% 74/549 [07:36<53:50, 6.80s/it]
14% 75/549 [07:40<46:47, 5.92s/it]
14% 76/549 [07:44<41:57, 5.32s/it]
14% 77/549 [07:48<38:32, 4.90s/it]
14% 78/549 [07:52<36:06, 4.60s/it][2025-10-10 13:19:43,212] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:19:45,690] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2276828289031982 |
|
|
[2025-10-10 13:19:47,405] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7151846885681152 |
|
|
[2025-10-10 13:19:48,875] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4696624279022217 |
|
|
[2025-10-10 13:19:50,118] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.24239182472229 |
|
|
[2025-10-10 13:19:50,118] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 6.708348751068115, 'eval_runtime': 5.7043, 'eval_samples_per_second': 22.264, 'eval_steps_per_second': 11.22, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.43} |
|
|
14% 78/549 [08:04<36:06, 4.60s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
14% 79/549 [08:08<1:03:59, 8.17s/it]
15% 80/549 [08:12<53:44, 6.88s/it]
15% 81/549 [08:16<46:34, 5.97s/it]
15% 82/549 [08:20<41:37, 5.35s/it]
15% 83/549 [08:24<38:10, 4.91s/it]
15% 84/549 [08:27<35:43, 4.61s/it][2025-10-10 13:20:19,124] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:20:21,612] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2227427959442139 |
|
|
[2025-10-10 13:20:22,917] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3052830696105957 |
|
|
[2025-10-10 13:20:24,696] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7787699699401855 |
|
|
[2025-10-10 13:20:25,947] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2503879070281982 |
|
|
[2025-10-10 13:20:25,947] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 6.766955852508545, 'eval_runtime': 5.7386, 'eval_samples_per_second': 22.131, 'eval_steps_per_second': 11.152, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.46} |
|
|
15% 84/549 [08:40<35:43, 4.61s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
15% 85/549 [08:44<1:03:06, 8.16s/it]
16% 86/549 [08:48<53:01, 6.87s/it]
16% 87/549 [08:52<45:57, 5.97s/it]
16% 88/549 [08:56<41:08, 5.35s/it]
16% 89/549 [08:59<37:43, 4.92s/it]
16% 90/549 [09:03<35:17, 4.61s/it][2025-10-10 13:20:55,024] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:20:57,527] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2476911544799805 |
|
|
[2025-10-10 13:20:58,765] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2383363246917725 |
|
|
[2025-10-10 13:21:00,176] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4102411270141602 |
|
|
[2025-10-10 13:21:01,923] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7466182708740234 |
|
|
[2025-10-10 13:21:01,923] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 6.3829665184021, 'eval_runtime': 5.7206, 'eval_samples_per_second': 22.2, 'eval_steps_per_second': 11.188, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.49} |
|
|
16% 90/549 [09:16<35:17, 4.61s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
17% 91/549 [09:20<1:02:22, 8.17s/it]
17% 92/549 [09:24<52:22, 6.88s/it]
17% 93/549 [09:28<45:23, 5.97s/it]
17% 94/549 [09:31<40:32, 5.35s/it]
17% 95/549 [09:35<37:09, 4.91s/it]
17% 96/549 [09:39<34:46, 4.61s/it][2025-10-10 13:21:30,890] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:21:33,421] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2245755195617676 |
|
|
[2025-10-10 13:21:34,670] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2479896545410156 |
|
|
[2025-10-10 13:21:35,919] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2494275569915771 |
|
|
[2025-10-10 13:21:37,555] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6355178356170654 |
|
|
[2025-10-10 13:21:37,555] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 6.172480583190918, 'eval_runtime': 6.1695, 'eval_samples_per_second': 20.585, 'eval_steps_per_second': 10.374, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.52} |
|
|
17% 96/549 [09:52<34:46, 4.61s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
18% 97/549 [09:56<1:02:02, 8.24s/it]
18% 98/549 [10:00<52:01, 6.92s/it]
18% 99/549 [10:04<45:02, 6.00s/it]
18% 100/549 [10:08<40:10, 5.37s/it]
{'loss': 7.8585, 'grad_norm': 1.3858423233032227, 'learning_rate': 0.00192, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'tokens_per_second_per_gpu': 42.0, 'epoch': 0.55} |
|
|
18% 100/549 [10:08<40:10, 5.37s/it]
18% 101/549 [10:12<36:58, 4.95s/it]
19% 102/549 [10:15<34:32, 4.64s/it][2025-10-10 13:22:07,077] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:22:09,610] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.245088815689087 |
|
|
[2025-10-10 13:22:10,855] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2448465824127197 |
|
|
[2025-10-10 13:22:12,106] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2508001327514648 |
|
|
[2025-10-10 13:22:13,381] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2749567031860352 |
|
|
[2025-10-10 13:22:13,382] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.937534809112549, 'eval_runtime': 6.3177, 'eval_samples_per_second': 20.102, 'eval_steps_per_second': 10.13, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.56} |
|
|
19% 102/549 [10:28<34:32, 4.64s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
19% 103/549 [10:32<1:00:52, 8.19s/it]
19% 104/549 [10:36<51:10, 6.90s/it]
19% 105/549 [10:40<44:23, 6.00s/it]
19% 106/549 [10:44<39:36, 5.36s/it]
19% 107/549 [10:48<36:21, 4.94s/it]
20% 108/549 [10:51<34:00, 4.63s/it][2025-10-10 13:22:43,069] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:22:45,569] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.246150016784668 |
|
|
[2025-10-10 13:22:46,822] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2529652118682861 |
|
|
[2025-10-10 13:22:48,105] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2827684879302979 |
|
|
[2025-10-10 13:22:49,347] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2417457103729248 |
|
|
[2025-10-10 13:22:49,347] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 5.884361267089844, 'eval_runtime': 5.9745, 'eval_samples_per_second': 21.257, 'eval_steps_per_second': 10.712, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.59} |
|
|
20% 108/549 [11:04<34:00, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
20% 109/549 [11:08<59:10, 8.07s/it]
20% 110/549 [11:11<49:49, 6.81s/it]
20% 111/549 [11:15<43:16, 5.93s/it]
20% 112/549 [11:19<38:43, 5.32s/it]
21% 113/549 [11:23<35:29, 4.89s/it]
21% 114/549 [11:27<33:16, 4.59s/it][2025-10-10 13:23:18,582] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:23:21,111] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2601120471954346 |
|
|
[2025-10-10 13:23:22,364] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2526865005493164 |
|
|
[2025-10-10 13:23:23,588] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.223961591720581 |
|
|
[2025-10-10 13:23:24,824] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2359259128570557 |
|
|
[2025-10-10 13:23:24,824] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 6.100166320800781, 'eval_runtime': 5.769, 'eval_samples_per_second': 22.014, 'eval_steps_per_second': 11.094, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.62} |
|
|
21% 114/549 [11:39<33:16, 4.59s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
21% 115/549 [11:43<57:40, 7.97s/it]
21% 116/549 [11:47<48:37, 6.74s/it]
21% 117/549 [11:51<42:19, 5.88s/it]
21% 118/549 [11:54<37:57, 5.28s/it]
22% 119/549 [11:58<34:52, 4.87s/it]
22% 120/549 [12:02<32:40, 4.57s/it][2025-10-10 13:23:53,849] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:23:56,384] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2743051052093506 |
|
|
[2025-10-10 13:23:57,627] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2425775527954102 |
|
|
[2025-10-10 13:23:58,872] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2450978755950928 |
|
|
[2025-10-10 13:24:00,110] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2371137142181396 |
|
|
[2025-10-10 13:24:00,110] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A
|
|
|
[A{'eval_loss': 5.836405277252197, 'eval_runtime': 5.7822, 'eval_samples_per_second': 21.964, 'eval_steps_per_second': 11.069, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.66} |
|
|
22% 120/549 [12:14<32:40, 4.57s/it] |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A |
|
|
[A
22% 121/549 [12:18<56:50, 7.97s/it]
22% 122/549 [12:22<47:53, 6.73s/it]
22% 123/549 [12:26<41:44, 5.88s/it]
23% 124/549 [12:30<37:24, 5.28s/it]
23% 125/549 [12:34<34:22, 4.86s/it]
23% 126/549 [12:38<32:18, 4.58s/it][2025-10-10 13:24:29,185] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:24:32,019] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2589247226715088 |
|
|
[2025-10-10 13:24:33,287] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2684495449066162 |
|
|
[2025-10-10 13:24:34,521] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2337257862091064 |
|
|
[2025-10-10 13:24:35,751] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2298574447631836 |
|
|
[2025-10-10 13:24:35,752] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A
|
|
|
[A{'eval_loss': 5.894244194030762, 'eval_runtime': 5.7903, 'eval_samples_per_second': 21.933, 'eval_steps_per_second': 11.053, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.69} |
|
|
23% 126/549 [12:50<32:18, 4.58s/it] |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A |
|
|
[A
23% 127/549 [12:54<56:47, 8.08s/it]
23% 128/549 [12:58<47:47, 6.81s/it]
23% 129/549 [13:02<41:35, 5.94s/it]
24% 130/549 [13:05<37:15, 5.33s/it]
24% 131/549 [13:09<34:11, 4.91s/it]
24% 132/549 [13:13<32:04, 4.61s/it][2025-10-10 13:25:04,943] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:25:08,050] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2962446212768555 |
|
|
[2025-10-10 13:25:09,299] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2489659786224365 |
|
|
[2025-10-10 13:25:10,529] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2295067310333252 |
|
|
[2025-10-10 13:25:11,775] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2457163333892822 |
|
|
[2025-10-10 13:25:11,775] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.814137935638428, 'eval_runtime': 5.7262, 'eval_samples_per_second': 22.179, 'eval_steps_per_second': 11.177, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.72} |
|
|
24% 132/549 [13:26<32:04, 4.61s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
24% 133/549 [13:30<56:33, 8.16s/it]
24% 134/549 [13:34<47:29, 6.87s/it]
25% 135/549 [13:37<41:08, 5.96s/it]
25% 136/549 [13:41<36:43, 5.34s/it]
25% 137/549 [13:45<33:38, 4.90s/it]
25% 138/549 [13:49<31:27, 4.59s/it][2025-10-10 13:25:40,704] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:25:43,878] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.726452112197876 |
|
|
[2025-10-10 13:25:45,124] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.245727777481079 |
|
|
[2025-10-10 13:25:46,372] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2481646537780762 |
|
|
[2025-10-10 13:25:47,600] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2278366088867188 |
|
|
[2025-10-10 13:25:47,601] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.674722671508789, 'eval_runtime': 5.7279, 'eval_samples_per_second': 22.172, 'eval_steps_per_second': 11.173, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.75} |
|
|
25% 138/549 [14:02<31:27, 4.59s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
25% 139/549 [14:06<55:44, 8.16s/it]
26% 140/549 [14:09<46:48, 6.87s/it]
26% 141/549 [14:13<40:35, 5.97s/it]
26% 142/549 [14:17<36:14, 5.34s/it]
26% 143/549 [14:21<33:13, 4.91s/it]
26% 144/549 [14:25<31:06, 4.61s/it][2025-10-10 13:26:16,597] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:26:19,460] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5904531478881836 |
|
|
[2025-10-10 13:26:21,049] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5887093544006348 |
|
|
[2025-10-10 13:26:22,299] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2505967617034912 |
|
|
[2025-10-10 13:26:23,529] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.229478120803833 |
|
|
[2025-10-10 13:26:23,529] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.685309410095215, 'eval_runtime': 5.7564, 'eval_samples_per_second': 22.062, 'eval_steps_per_second': 11.118, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.79} |
|
|
26% 144/549 [14:38<31:06, 4.61s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
26% 145/549 [14:41<55:08, 8.19s/it]
27% 146/549 [14:45<46:16, 6.89s/it]
27% 147/549 [14:49<40:05, 5.98s/it]
27% 148/549 [14:53<35:50, 5.36s/it]
27% 149/549 [14:57<32:50, 4.93s/it]
27% 150/549 [15:01<30:40, 4.61s/it][2025-10-10 13:26:52,575] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:26:55,133] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.290666103363037 |
|
|
[2025-10-10 13:26:56,896] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7622182369232178 |
|
|
[2025-10-10 13:26:58,238] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3421812057495117 |
|
|
[2025-10-10 13:26:59,467] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2286624908447266 |
|
|
[2025-10-10 13:26:59,467] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 5.722118377685547, 'eval_runtime': 5.7291, 'eval_samples_per_second': 22.167, 'eval_steps_per_second': 11.171, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.82} |
|
|
27% 150/549 [15:14<30:40, 4.61s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
28% 151/549 [15:17<54:17, 8.18s/it]
28% 152/549 [15:21<45:33, 6.89s/it]
28% 153/549 [15:25<39:27, 5.98s/it]
28% 154/549 [15:29<35:14, 5.35s/it]
28% 155/549 [15:33<32:17, 4.92s/it]
28% 156/549 [15:36<28:59, 4.43s/it][2025-10-10 13:27:27,880] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:27:30,445] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2810289859771729 |
|
|
[2025-10-10 13:27:31,672] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2266299724578857 |
|
|
[2025-10-10 13:27:33,392] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7197673320770264 |
|
|
[2025-10-10 13:27:34,889] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4970765113830566 |
|
|
[2025-10-10 13:27:34,889] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.537834167480469, 'eval_runtime': 5.7439, 'eval_samples_per_second': 22.11, 'eval_steps_per_second': 11.142, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.85} |
|
|
28% 156/549 [15:49<28:59, 4.43s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
29% 157/549 [15:53<52:50, 8.09s/it]
29% 158/549 [15:57<44:26, 6.82s/it]
29% 159/549 [16:01<38:37, 5.94s/it]
29% 160/549 [16:04<34:31, 5.33s/it]
29% 161/549 [16:08<31:41, 4.90s/it]
30% 162/549 [16:12<29:40, 4.60s/it][2025-10-10 13:28:03,961] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:28:06,582] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2377257347106934 |
|
|
[2025-10-10 13:28:07,822] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2394213676452637 |
|
|
[2025-10-10 13:28:09,276] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4542319774627686 |
|
|
[2025-10-10 13:28:10,984] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7078406810760498 |
|
|
[2025-10-10 13:28:10,984] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.492016315460205, 'eval_runtime': 5.8041, 'eval_samples_per_second': 21.881, 'eval_steps_per_second': 11.027, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.89} |
|
|
30% 162/549 [16:25<29:40, 4.60s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
30% 163/549 [16:29<52:53, 8.22s/it]
30% 164/549 [16:33<44:20, 6.91s/it]
30% 165/549 [16:37<38:21, 5.99s/it]
30% 166/549 [16:41<34:12, 5.36s/it]
30% 167/549 [16:44<31:21, 4.92s/it]
31% 168/549 [16:48<29:18, 4.62s/it][2025-10-10 13:28:40,029] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:28:42,579] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2564804553985596 |
|
|
[2025-10-10 13:28:43,831] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2514879703521729 |
|
|
[2025-10-10 13:28:45,090] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2588951587677002 |
|
|
[2025-10-10 13:28:46,762] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6718604564666748 |
|
|
[2025-10-10 13:28:46,762] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 5.551150321960449, 'eval_runtime': 6.0153, 'eval_samples_per_second': 21.113, 'eval_steps_per_second': 10.639, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.92} |
|
|
31% 168/549 [17:01<29:18, 4.62s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
31% 169/549 [17:05<52:01, 8.21s/it]
31% 170/549 [17:09<43:37, 6.91s/it]
31% 171/549 [17:13<37:46, 6.00s/it]
31% 172/549 [17:17<33:43, 5.37s/it]
32% 173/549 [17:21<30:52, 4.93s/it]
32% 174/549 [17:24<28:52, 4.62s/it][2025-10-10 13:29:16,068] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:29:18,702] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3219468593597412 |
|
|
[2025-10-10 13:29:20,003] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.301271915435791 |
|
|
[2025-10-10 13:29:21,280] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2761008739471436 |
|
|
[2025-10-10 13:29:22,700] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4196476936340332 |
|
|
[2025-10-10 13:29:22,700] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.445136070251465, 'eval_runtime': 6.3434, 'eval_samples_per_second': 20.021, 'eval_steps_per_second': 10.089, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.95} |
|
|
32% 174/549 [17:37<28:52, 4.62s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
32% 175/549 [17:41<51:37, 8.28s/it]
32% 176/549 [17:45<43:14, 6.96s/it]
32% 177/549 [17:49<37:23, 6.03s/it]
32% 178/549 [17:53<33:19, 5.39s/it]
33% 179/549 [17:57<30:29, 4.94s/it]
33% 180/549 [18:01<28:28, 4.63s/it][2025-10-10 13:29:52,329] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:29:54,842] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2502317428588867 |
|
|
[2025-10-10 13:29:56,106] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2638275623321533 |
|
|
[2025-10-10 13:29:57,362] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.255711317062378 |
|
|
[2025-10-10 13:29:58,622] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.260202169418335 |
|
|
[2025-10-10 13:29:58,622] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.406078338623047, 'eval_runtime': 6.1725, 'eval_samples_per_second': 20.575, 'eval_steps_per_second': 10.369, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 0.98} |
|
|
33% 180/549 [18:13<28:28, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
33% 181/549 [18:17<49:55, 8.14s/it]
33% 182/549 [18:21<41:59, 6.86s/it]
33% 183/549 [18:25<36:37, 6.00s/it][2025-10-10 13:30:16,541] [INFO] [axolotl.core.trainers.base._save:671] [PID:24741] Saving model checkpoint to ./qlora-out/checkpoint-183 |
|
|
34% 184/549 [18:35<43:14, 7.11s/it]
34% 185/549 [18:38<37:16, 6.14s/it]
34% 186/549 [18:42<33:03, 5.46s/it][2025-10-10 13:30:33,999] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:30:37,120] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5404832363128662 |
|
|
[2025-10-10 13:30:38,958] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8376410007476807 |
|
|
[2025-10-10 13:30:40,250] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2913427352905273 |
|
|
[2025-10-10 13:30:41,534] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2837259769439697 |
|
|
[2025-10-10 13:30:41,534] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 5.442931175231934, 'eval_runtime': 5.8182, 'eval_samples_per_second': 21.828, 'eval_steps_per_second': 11.0, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.02} |
|
|
34% 186/549 [18:56<33:03, 5.46s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
34% 187/549 [19:00<54:17, 9.00s/it]
34% 188/549 [19:03<44:58, 7.47s/it]
34% 189/549 [19:07<38:27, 6.41s/it]
35% 190/549 [19:11<33:56, 5.67s/it]
35% 191/549 [19:15<30:43, 5.15s/it]
35% 192/549 [19:19<28:25, 4.78s/it][2025-10-10 13:31:10,871] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:31:13,498] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.309769630432129 |
|
|
[2025-10-10 13:31:15,350] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8515050411224365 |
|
|
[2025-10-10 13:31:16,722] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3719418048858643 |
|
|
[2025-10-10 13:31:18,013] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2908899784088135 |
|
|
[2025-10-10 13:31:18,013] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.410170555114746, 'eval_runtime': 5.8179, 'eval_samples_per_second': 21.829, 'eval_steps_per_second': 11.001, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.05} |
|
|
35% 192/549 [19:32<28:25, 4.78s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
35% 193/549 [19:36<49:47, 8.39s/it]
35% 194/549 [19:40<41:34, 7.03s/it]
36% 195/549 [19:44<35:50, 6.07s/it]
36% 196/549 [19:48<31:51, 5.42s/it]
36% 197/549 [19:51<29:03, 4.95s/it]
36% 198/549 [19:55<27:04, 4.63s/it][2025-10-10 13:31:47,022] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:31:49,605] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2902817726135254 |
|
|
[2025-10-10 13:31:51,079] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4735219478607178 |
|
|
[2025-10-10 13:31:52,847] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7679553031921387 |
|
|
[2025-10-10 13:31:54,139] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2912085056304932 |
|
|
[2025-10-10 13:31:54,139] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.381246566772461, 'eval_runtime': 5.8109, 'eval_samples_per_second': 21.856, 'eval_steps_per_second': 11.014, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.08} |
|
|
36% 198/549 [20:08<27:04, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
36% 199/549 [20:12<48:18, 8.28s/it]
36% 200/549 [20:16<40:27, 6.96s/it]
{'loss': 5.9664, 'grad_norm': 0.3477332293987274, 'learning_rate': 0.001787100490808991, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'tokens_per_second_per_gpu': 21.15, 'epoch': 1.09} |
|
|
36% 200/549 [20:16<40:27, 6.96s/it]
37% 201/549 [20:20<34:59, 6.03s/it]
37% 202/549 [20:24<31:12, 5.40s/it]
37% 203/549 [20:28<28:34, 4.95s/it]
37% 204/549 [20:32<26:41, 4.64s/it][2025-10-10 13:32:23,315] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:32:25,913] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2995872497558594 |
|
|
[2025-10-10 13:32:27,238] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3243978023529053 |
|
|
[2025-10-10 13:32:29,078] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.840087652206421 |
|
|
[2025-10-10 13:32:30,448] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.369553804397583 |
|
|
[2025-10-10 13:32:30,448] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 5.376354217529297, 'eval_runtime': 5.788, 'eval_samples_per_second': 21.942, 'eval_steps_per_second': 11.057, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.11} |
|
|
37% 204/549 [20:45<26:41, 4.64s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
37% 205/549 [20:48<47:32, 8.29s/it]
38% 206/549 [20:52<39:47, 6.96s/it]
38% 207/549 [20:56<34:23, 6.03s/it]
38% 208/549 [21:00<30:40, 5.40s/it]
38% 209/549 [21:04<28:02, 4.95s/it]
38% 210/549 [21:08<26:09, 4.63s/it][2025-10-10 13:32:59,550] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:33:02,198] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3173751831054688 |
|
|
[2025-10-10 13:33:03,523] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3249952793121338 |
|
|
[2025-10-10 13:33:05,165] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.641808271408081 |
|
|
[2025-10-10 13:33:06,800] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6340107917785645 |
|
|
[2025-10-10 13:33:06,800] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.320211410522461, 'eval_runtime': 5.8867, 'eval_samples_per_second': 21.574, 'eval_steps_per_second': 10.872, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.15} |
|
|
38% 210/549 [21:21<26:09, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
38% 211/549 [21:25<47:01, 8.35s/it]
39% 212/549 [21:29<39:19, 7.00s/it]
39% 213/549 [21:33<33:58, 6.07s/it]
39% 214/549 [21:37<30:13, 5.41s/it]
39% 215/549 [21:40<27:37, 4.96s/it]
39% 216/549 [21:44<25:46, 4.65s/it][2025-10-10 13:33:36,019] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:33:38,696] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3145294189453125 |
|
|
[2025-10-10 13:33:40,014] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3176522254943848 |
|
|
[2025-10-10 13:33:41,479] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.464585781097412 |
|
|
[2025-10-10 13:33:43,345] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.865448236465454 |
|
|
[2025-10-10 13:33:43,345] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 5.3533477783203125, 'eval_runtime': 5.8744, 'eval_samples_per_second': 21.619, 'eval_steps_per_second': 10.895, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.18} |
|
|
39% 216/549 [21:58<25:46, 4.65s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
40% 217/549 [22:01<46:22, 8.38s/it]
40% 218/549 [22:05<38:46, 7.03s/it]
40% 219/549 [22:09<33:27, 6.08s/it]
40% 220/549 [22:13<29:44, 5.43s/it]
40% 221/549 [22:17<27:09, 4.97s/it]
40% 222/549 [22:21<25:19, 4.65s/it][2025-10-10 13:34:12,552] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:34:15,141] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2889900207519531 |
|
|
[2025-10-10 13:34:16,438] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2969541549682617 |
|
|
[2025-10-10 13:34:17,729] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2903382778167725 |
|
|
[2025-10-10 13:34:19,533] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8035340309143066 |
|
|
[2025-10-10 13:34:19,533] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.282848358154297, 'eval_runtime': 6.023, 'eval_samples_per_second': 21.086, 'eval_steps_per_second': 10.626, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.21} |
|
|
40% 222/549 [22:34<25:19, 4.65s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
41% 223/549 [22:38<45:08, 8.31s/it]
41% 224/549 [22:42<37:45, 6.97s/it]
41% 225/549 [22:45<32:36, 6.04s/it]
41% 226/549 [22:49<29:01, 5.39s/it]
41% 227/549 [22:53<26:31, 4.94s/it]
42% 228/549 [22:57<24:46, 4.63s/it][2025-10-10 13:34:48,798] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:34:51,398] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.286377191543579 |
|
|
[2025-10-10 13:34:52,681] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.282608985900879 |
|
|
[2025-10-10 13:34:53,962] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2809438705444336 |
|
|
[2025-10-10 13:34:55,426] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4635910987854004 |
|
|
[2025-10-10 13:34:55,427] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 5.275903701782227, 'eval_runtime': 6.2819, 'eval_samples_per_second': 20.217, 'eval_steps_per_second': 10.188, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.25} |
|
|
42% 228/549 [23:10<24:46, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
42% 229/549 [23:14<44:07, 8.27s/it]
42% 230/549 [23:18<36:56, 6.95s/it]
42% 231/549 [23:22<31:57, 6.03s/it]
42% 232/549 [23:26<28:28, 5.39s/it]
42% 233/549 [23:30<26:07, 4.96s/it]
43% 234/549 [23:33<24:24, 4.65s/it][2025-10-10 13:35:25,089] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:35:27,705] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2982923984527588 |
|
|
[2025-10-10 13:35:28,989] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2843728065490723 |
|
|
[2025-10-10 13:35:30,318] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3283510208129883 |
|
|
[2025-10-10 13:35:31,608] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2893931865692139 |
|
|
[2025-10-10 13:35:31,608] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.25028133392334, 'eval_runtime': 6.4496, 'eval_samples_per_second': 19.691, 'eval_steps_per_second': 9.923, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.28} |
|
|
43% 234/549 [23:46<24:24, 4.65s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
43% 235/549 [23:50<43:27, 8.30s/it]
43% 236/549 [23:54<36:23, 6.98s/it]
43% 237/549 [23:58<31:26, 6.05s/it]
43% 238/549 [24:02<27:57, 5.40s/it]
44% 239/549 [24:06<25:35, 4.95s/it]
44% 240/549 [24:10<23:53, 4.64s/it][2025-10-10 13:36:01,376] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:36:03,949] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2798094749450684 |
|
|
[2025-10-10 13:36:05,235] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2851686477661133 |
|
|
[2025-10-10 13:36:06,521] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2858819961547852 |
|
|
[2025-10-10 13:36:07,798] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2766339778900146 |
|
|
[2025-10-10 13:36:07,798] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.200336456298828, 'eval_runtime': 6.3809, 'eval_samples_per_second': 19.903, 'eval_steps_per_second': 10.03, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.31} |
|
|
44% 240/549 [24:23<23:53, 4.64s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
44% 241/549 [24:26<41:23, 8.06s/it]
44% 242/549 [24:30<34:49, 6.81s/it]
44% 243/549 [24:34<30:13, 5.93s/it]
44% 244/549 [24:37<27:00, 5.31s/it]
45% 245/549 [24:41<24:46, 4.89s/it]
45% 246/549 [24:45<23:12, 4.59s/it][2025-10-10 13:36:36,871] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:36:39,586] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.389134168624878 |
|
|
[2025-10-10 13:36:40,884] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2981505393981934 |
|
|
[2025-10-10 13:36:42,175] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2911362648010254 |
|
|
[2025-10-10 13:36:43,457] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2809994220733643 |
|
|
[2025-10-10 13:36:43,457] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.178483009338379, 'eval_runtime': 5.8445, 'eval_samples_per_second': 21.73, 'eval_steps_per_second': 10.95, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.34} |
|
|
45% 246/549 [24:58<23:12, 4.59s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
45% 247/549 [25:01<40:46, 8.10s/it]
45% 248/549 [25:05<34:15, 6.83s/it]
45% 249/549 [25:09<29:42, 5.94s/it]
46% 250/549 [25:13<26:32, 5.33s/it]
46% 251/549 [25:17<24:19, 4.90s/it]
46% 252/549 [25:21<22:45, 4.60s/it][2025-10-10 13:37:12,570] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:37:15,140] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2792997360229492 |
|
|
[2025-10-10 13:37:16,426] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2853057384490967 |
|
|
[2025-10-10 13:37:17,683] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2565968036651611 |
|
|
[2025-10-10 13:37:18,950] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2670745849609375 |
|
|
[2025-10-10 13:37:18,950] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.10it/s][A
|
|
|
[A{'eval_loss': 5.2125749588012695, 'eval_runtime': 5.8149, 'eval_samples_per_second': 21.84, 'eval_steps_per_second': 11.006, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.38} |
|
|
46% 252/549 [25:33<22:45, 4.60s/it] |
|
|
100% 4/4 [00:03<00:00, 1.10it/s][A |
|
|
[A
46% 253/549 [25:37<39:38, 8.03s/it]
46% 254/549 [25:41<33:19, 6.78s/it]
46% 255/549 [25:45<28:58, 5.91s/it]
47% 256/549 [25:49<25:54, 5.31s/it]
47% 257/549 [25:52<23:46, 4.88s/it]
47% 258/549 [25:56<22:17, 4.59s/it][2025-10-10 13:37:48,074] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:37:51,049] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2944858074188232 |
|
|
[2025-10-10 13:37:52,344] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2948596477508545 |
|
|
[2025-10-10 13:37:53,649] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3044750690460205 |
|
|
[2025-10-10 13:37:54,926] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2766392230987549 |
|
|
[2025-10-10 13:37:54,926] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A
|
|
|
[A{'eval_loss': 5.1886444091796875, 'eval_runtime': 5.8293, 'eval_samples_per_second': 21.787, 'eval_steps_per_second': 10.979, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.41} |
|
|
47% 258/549 [26:09<22:17, 4.59s/it] |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A |
|
|
[A
47% 259/549 [26:13<39:32, 8.18s/it]
47% 260/549 [26:17<33:09, 6.88s/it]
48% 261/549 [26:21<28:46, 5.99s/it]
48% 262/549 [26:25<25:40, 5.37s/it]
48% 263/549 [26:29<23:29, 4.93s/it]
48% 264/549 [26:32<21:59, 4.63s/it][2025-10-10 13:38:24,141] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:38:27,322] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2961266040802002 |
|
|
[2025-10-10 13:38:28,634] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.311255693435669 |
|
|
[2025-10-10 13:38:29,937] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3029818534851074 |
|
|
[2025-10-10 13:38:31,246] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3082985877990723 |
|
|
[2025-10-10 13:38:31,246] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A
|
|
|
[A{'eval_loss': 5.133144378662109, 'eval_runtime': 5.8367, 'eval_samples_per_second': 21.759, 'eval_steps_per_second': 10.965, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.44} |
|
|
48% 264/549 [26:45<21:59, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A |
|
|
[A
48% 265/549 [26:49<39:11, 8.28s/it]
48% 266/549 [26:53<32:49, 6.96s/it]
49% 267/549 [26:57<28:22, 6.04s/it]
49% 268/549 [27:00<24:24, 5.21s/it]
49% 269/549 [27:04<22:28, 4.82s/it]
49% 270/549 [27:08<21:06, 4.54s/it][2025-10-10 13:38:59,782] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:39:03,012] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7229037284851074 |
|
|
[2025-10-10 13:39:04,323] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3110601902008057 |
|
|
[2025-10-10 13:39:05,606] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2823243141174316 |
|
|
[2025-10-10 13:39:06,901] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2952723503112793 |
|
|
[2025-10-10 13:39:06,902] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 5.137051582336426, 'eval_runtime': 5.7827, 'eval_samples_per_second': 21.962, 'eval_steps_per_second': 11.068, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.48} |
|
|
49% 270/549 [27:21<21:06, 4.54s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
49% 271/549 [27:25<38:02, 8.21s/it]
50% 272/549 [27:29<31:53, 6.91s/it]
50% 273/549 [27:33<27:36, 6.00s/it]
50% 274/549 [27:37<24:36, 5.37s/it]
50% 275/549 [27:40<22:31, 4.93s/it]
50% 276/549 [27:44<21:02, 4.62s/it][2025-10-10 13:39:36,016] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:39:39,209] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8441014289855957 |
|
|
[2025-10-10 13:39:40,652] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4425406455993652 |
|
|
[2025-10-10 13:39:41,958] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.305793285369873 |
|
|
[2025-10-10 13:39:43,277] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3181705474853516 |
|
|
[2025-10-10 13:39:43,277] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.066106796264648, 'eval_runtime': 5.8019, 'eval_samples_per_second': 21.89, 'eval_steps_per_second': 11.031, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.51} |
|
|
50% 276/549 [27:57<21:02, 4.62s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
50% 277/549 [28:01<37:40, 8.31s/it]
51% 278/549 [28:05<31:29, 6.97s/it]
51% 279/549 [28:09<27:11, 6.04s/it]
51% 280/549 [28:13<24:11, 5.39s/it]
51% 281/549 [28:16<21:14, 4.76s/it]
51% 282/549 [28:20<20:01, 4.50s/it][2025-10-10 13:40:11,711] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:40:14,334] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3228764533996582 |
|
|
[2025-10-10 13:40:16,191] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.856102705001831 |
|
|
[2025-10-10 13:40:17,517] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3263425827026367 |
|
|
[2025-10-10 13:40:18,801] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2833828926086426 |
|
|
[2025-10-10 13:40:18,801] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.1207499504089355, 'eval_runtime': 5.7763, 'eval_samples_per_second': 21.987, 'eval_steps_per_second': 11.08, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.54} |
|
|
51% 282/549 [28:33<20:01, 4.50s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
52% 283/549 [28:37<36:15, 8.18s/it]
52% 284/549 [28:41<30:24, 6.88s/it]
52% 285/549 [28:45<26:18, 5.98s/it]
52% 286/549 [28:48<22:40, 5.17s/it]
52% 287/549 [28:52<20:56, 4.80s/it]
52% 288/549 [28:56<19:41, 4.53s/it][2025-10-10 13:40:47,311] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:40:49,884] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2716014385223389 |
|
|
[2025-10-10 13:40:51,187] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.302952527999878 |
|
|
[2025-10-10 13:40:52,997] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8093366622924805 |
|
|
[2025-10-10 13:40:54,396] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.398949146270752 |
|
|
[2025-10-10 13:40:54,396] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 5.056352615356445, 'eval_runtime': 5.778, 'eval_samples_per_second': 21.98, 'eval_steps_per_second': 11.077, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.57} |
|
|
52% 288/549 [29:09<19:41, 4.53s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
53% 289/549 [29:12<35:29, 8.19s/it]
53% 290/549 [29:16<29:45, 6.89s/it]
53% 291/549 [29:20<25:44, 5.99s/it]
53% 292/549 [29:24<22:59, 5.37s/it]
53% 293/549 [29:28<21:02, 4.93s/it]
54% 294/549 [29:32<19:37, 4.62s/it][2025-10-10 13:41:23,512] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:41:26,092] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2780861854553223 |
|
|
[2025-10-10 13:41:27,347] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2545959949493408 |
|
|
[2025-10-10 13:41:28,868] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.520756721496582 |
|
|
[2025-10-10 13:41:30,544] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6756396293640137 |
|
|
[2025-10-10 13:41:30,544] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.123226165771484, 'eval_runtime': 5.8064, 'eval_samples_per_second': 21.873, 'eval_steps_per_second': 11.022, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.61} |
|
|
54% 294/549 [29:45<19:37, 4.62s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
54% 295/549 [29:49<34:56, 8.25s/it]
54% 296/549 [29:52<29:14, 6.94s/it]
54% 297/549 [29:56<25:16, 6.02s/it]
54% 298/549 [30:00<22:30, 5.38s/it]
54% 299/549 [30:04<20:34, 4.94s/it]
55% 300/549 [30:08<19:11, 4.62s/it]
{'loss': 5.3381, 'grad_norm': 1.279155969619751, 'learning_rate': 0.0012049450205472586, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'tokens_per_second_per_gpu': 62.82, 'epoch': 1.64} |
|
|
55% 300/549 [30:08<19:11, 4.62s/it][2025-10-10 13:41:59,671] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:42:02,245] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2725634574890137 |
|
|
[2025-10-10 13:42:03,508] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2624311447143555 |
|
|
[2025-10-10 13:42:04,784] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2758598327636719 |
|
|
[2025-10-10 13:42:06,602] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8172187805175781 |
|
|
[2025-10-10 13:42:06,602] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.03715705871582, 'eval_runtime': 5.8695, 'eval_samples_per_second': 21.637, 'eval_steps_per_second': 10.904, 'memory/max_active (GiB)': 2.67, 'memory/max_allocated (GiB)': 2.67, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.64} |
|
|
55% 300/549 [30:21<19:11, 4.62s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
55% 301/549 [30:25<34:02, 8.23s/it]
55% 302/549 [30:29<28:29, 6.92s/it]
55% 303/549 [30:32<24:36, 6.00s/it]
55% 304/549 [30:36<21:55, 5.37s/it]
56% 305/549 [30:40<20:01, 4.93s/it]
56% 306/549 [30:44<18:41, 4.62s/it][2025-10-10 13:42:35,718] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:42:38,317] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2914865016937256 |
|
|
[2025-10-10 13:42:39,593] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2758712768554688 |
|
|
[2025-10-10 13:42:40,864] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2706935405731201 |
|
|
[2025-10-10 13:42:42,433] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5694029331207275 |
|
|
[2025-10-10 13:42:42,434] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.112299919128418, 'eval_runtime': 6.2942, 'eval_samples_per_second': 20.177, 'eval_steps_per_second': 10.168, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.67} |
|
|
56% 306/549 [30:57<18:41, 4.62s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
56% 307/549 [31:01<33:26, 8.29s/it]
56% 308/549 [31:04<27:15, 6.79s/it]
56% 309/549 [31:08<23:38, 5.91s/it]
56% 310/549 [31:12<21:07, 5.30s/it]
57% 311/549 [31:16<19:24, 4.89s/it]
57% 312/549 [31:20<18:10, 4.60s/it][2025-10-10 13:43:11,471] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:43:14,124] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3028910160064697 |
|
|
[2025-10-10 13:43:15,424] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2999858856201172 |
|
|
[2025-10-10 13:43:16,730] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3056349754333496 |
|
|
[2025-10-10 13:43:18,045] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.314713954925537 |
|
|
[2025-10-10 13:43:18,045] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.025712013244629, 'eval_runtime': 6.3277, 'eval_samples_per_second': 20.071, 'eval_steps_per_second': 10.114, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.7} |
|
|
57% 312/549 [31:33<18:10, 4.60s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
57% 313/549 [31:37<32:26, 8.25s/it]
57% 314/549 [31:40<27:11, 6.94s/it]
57% 315/549 [31:44<23:29, 6.02s/it]
58% 316/549 [31:48<20:54, 5.38s/it]
58% 317/549 [31:52<19:06, 4.94s/it]
58% 318/549 [31:56<17:49, 4.63s/it][2025-10-10 13:43:47,716] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:43:50,271] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2804148197174072 |
|
|
[2025-10-10 13:43:51,584] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.312223196029663 |
|
|
[2025-10-10 13:43:52,861] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2768139839172363 |
|
|
[2025-10-10 13:43:54,115] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.253570556640625 |
|
|
[2025-10-10 13:43:54,115] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.83it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 5.036094665527344, 'eval_runtime': 5.9136, 'eval_samples_per_second': 21.476, 'eval_steps_per_second': 10.822, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.74} |
|
|
58% 318/549 [32:08<17:49, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
58% 319/549 [32:12<31:00, 8.09s/it]
58% 320/549 [32:16<26:03, 6.83s/it]
58% 321/549 [32:20<22:34, 5.94s/it]
59% 322/549 [32:24<20:09, 5.33s/it]
59% 323/549 [32:28<18:27, 4.90s/it]
59% 324/549 [32:32<17:14, 4.60s/it][2025-10-10 13:44:23,324] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:44:25,881] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2637131214141846 |
|
|
[2025-10-10 13:44:27,160] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2784416675567627 |
|
|
[2025-10-10 13:44:28,425] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2644331455230713 |
|
|
[2025-10-10 13:44:29,691] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2658593654632568 |
|
|
[2025-10-10 13:44:29,691] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.993957996368408, 'eval_runtime': 5.7744, 'eval_samples_per_second': 21.994, 'eval_steps_per_second': 11.083, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.77} |
|
|
59% 324/549 [32:44<17:14, 4.60s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
59% 325/549 [32:48<29:56, 8.02s/it]
59% 326/549 [32:52<25:09, 6.77s/it]
60% 327/549 [32:55<21:50, 5.90s/it]
60% 328/549 [32:59<19:30, 5.30s/it]
60% 329/549 [33:03<17:53, 4.88s/it]
60% 330/549 [33:07<16:44, 4.59s/it][2025-10-10 13:44:58,744] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:45:01,309] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2841627597808838 |
|
|
[2025-10-10 13:45:02,598] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2886757850646973 |
|
|
[2025-10-10 13:45:03,860] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2614099979400635 |
|
|
[2025-10-10 13:45:05,121] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2612431049346924 |
|
|
[2025-10-10 13:45:05,121] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.10it/s][A
|
|
|
[A{'eval_loss': 4.980707168579102, 'eval_runtime': 5.825, 'eval_samples_per_second': 21.802, 'eval_steps_per_second': 10.987, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.8} |
|
|
60% 330/549 [33:19<16:44, 4.59s/it] |
|
|
100% 4/4 [00:03<00:00, 1.10it/s][A |
|
|
[A
60% 331/549 [33:23<29:09, 8.03s/it]
60% 332/549 [33:27<24:29, 6.77s/it]
61% 333/549 [33:31<21:16, 5.91s/it]
61% 334/549 [33:35<19:00, 5.30s/it]
61% 335/549 [33:39<17:24, 4.88s/it]
61% 336/549 [33:43<16:17, 4.59s/it][2025-10-10 13:45:34,234] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:45:37,199] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.272235631942749 |
|
|
[2025-10-10 13:45:38,489] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2894883155822754 |
|
|
[2025-10-10 13:45:39,750] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2607002258300781 |
|
|
[2025-10-10 13:45:41,007] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.256944179534912 |
|
|
[2025-10-10 13:45:41,008] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A
|
|
|
[A{'eval_loss': 4.9550395011901855, 'eval_runtime': 5.8454, 'eval_samples_per_second': 21.727, 'eval_steps_per_second': 10.949, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.84} |
|
|
61% 336/549 [33:55<16:17, 4.59s/it] |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A |
|
|
[A
61% 337/549 [33:59<28:48, 8.16s/it]
62% 338/549 [34:02<23:30, 6.69s/it]
62% 339/549 [34:06<20:27, 5.85s/it]
62% 340/549 [34:09<17:40, 5.08s/it]
62% 341/549 [34:13<16:22, 4.72s/it]
62% 342/549 [34:17<15:27, 4.48s/it][2025-10-10 13:46:08,957] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:46:12,140] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.782719373703003 |
|
|
[2025-10-10 13:46:13,456] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3164212703704834 |
|
|
[2025-10-10 13:46:14,727] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2703287601470947 |
|
|
[2025-10-10 13:46:15,997] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2702345848083496 |
|
|
[2025-10-10 13:46:15,998] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.987721920013428, 'eval_runtime': 5.7754, 'eval_samples_per_second': 21.99, 'eval_steps_per_second': 11.082, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.87} |
|
|
62% 342/549 [34:30<15:27, 4.48s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
62% 343/549 [34:34<27:56, 8.14s/it]
63% 344/549 [34:38<23:25, 6.86s/it]
63% 345/549 [34:42<20:16, 5.96s/it]
63% 346/549 [34:46<18:04, 5.34s/it]
63% 347/549 [34:50<16:31, 4.91s/it]
63% 348/549 [34:53<15:24, 4.60s/it][2025-10-10 13:46:45,044] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:46:48,119] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7470786571502686 |
|
|
[2025-10-10 13:46:49,670] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5505836009979248 |
|
|
[2025-10-10 13:46:50,995] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3248536586761475 |
|
|
[2025-10-10 13:46:52,300] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3044347763061523 |
|
|
[2025-10-10 13:46:52,300] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.86it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.946439743041992, 'eval_runtime': 5.7767, 'eval_samples_per_second': 21.985, 'eval_steps_per_second': 11.079, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.9} |
|
|
63% 348/549 [35:06<15:24, 4.60s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
64% 349/549 [35:10<27:36, 8.28s/it]
64% 350/549 [35:14<23:02, 6.95s/it]
64% 351/549 [35:18<19:51, 6.02s/it]
64% 352/549 [35:22<17:39, 5.38s/it]
64% 353/549 [35:26<16:06, 4.93s/it]
64% 354/549 [35:30<15:01, 4.62s/it][2025-10-10 13:47:21,279] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:47:23,907] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3633801937103271 |
|
|
[2025-10-10 13:47:25,699] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7914912700653076 |
|
|
[2025-10-10 13:47:26,959] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.260002613067627 |
|
|
[2025-10-10 13:47:28,226] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2668824195861816 |
|
|
[2025-10-10 13:47:28,226] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.9122467041015625, 'eval_runtime': 5.7915, 'eval_samples_per_second': 21.929, 'eval_steps_per_second': 11.051, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.93} |
|
|
64% 354/549 [35:42<15:01, 4.62s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
65% 355/549 [35:46<26:34, 8.22s/it]
65% 356/549 [35:50<22:14, 6.91s/it]
65% 357/549 [35:54<19:12, 6.00s/it]
65% 358/549 [35:58<17:07, 5.38s/it]
65% 359/549 [36:02<15:39, 4.94s/it]
66% 360/549 [36:06<14:35, 4.63s/it][2025-10-10 13:47:57,394] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:47:59,913] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2456605434417725 |
|
|
[2025-10-10 13:48:01,644] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7309658527374268 |
|
|
[2025-10-10 13:48:03,128] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.48356032371521 |
|
|
[2025-10-10 13:48:04,398] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.270266056060791 |
|
|
[2025-10-10 13:48:04,398] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.941189289093018, 'eval_runtime': 5.7826, 'eval_samples_per_second': 21.962, 'eval_steps_per_second': 11.068, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 1.97} |
|
|
66% 360/549 [36:19<14:35, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
66% 361/549 [36:22<25:49, 8.24s/it]
66% 362/549 [36:26<21:35, 6.93s/it]
66% 363/549 [36:30<18:36, 6.00s/it]
66% 364/549 [36:34<16:33, 5.37s/it]
66% 365/549 [36:38<15:06, 4.93s/it]
67% 366/549 [36:42<14:09, 4.64s/it][2025-10-10 13:48:33,538] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:48:36,115] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2747948169708252 |
|
|
[2025-10-10 13:48:37,542] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4260179996490479 |
|
|
[2025-10-10 13:48:39,314] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7726826667785645 |
|
|
[2025-10-10 13:48:40,603] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2883005142211914 |
|
|
[2025-10-10 13:48:40,603] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.909399509429932, 'eval_runtime': 5.7674, 'eval_samples_per_second': 22.021, 'eval_steps_per_second': 11.097, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.0} |
|
|
67% 366/549 [36:55<14:09, 4.64s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A[2025-10-10 13:48:46,376] [INFO] [axolotl.core.trainers.base._save:671] [PID:24741] Saving model checkpoint to ./qlora-out/checkpoint-366 |
|
|
67% 367/549 [37:06<31:54, 10.52s/it]
67% 368/549 [37:10<25:42, 8.52s/it]
67% 369/549 [37:13<20:50, 6.95s/it]
67% 370/549 [37:17<17:59, 6.03s/it]
68% 371/549 [37:21<16:00, 5.39s/it]
68% 372/549 [37:25<14:37, 4.96s/it][2025-10-10 13:49:16,639] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:49:19,397] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2493188381195068 |
|
|
[2025-10-10 13:49:20,670] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2731273174285889 |
|
|
[2025-10-10 13:49:21,916] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2452585697174072 |
|
|
[2025-10-10 13:49:23,190] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2739589214324951 |
|
|
[2025-10-10 13:49:23,190] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.83it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.31it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.10it/s][A
|
|
|
[A{'eval_loss': 4.937836647033691, 'eval_runtime': 5.8269, 'eval_samples_per_second': 21.795, 'eval_steps_per_second': 10.984, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.03} |
|
|
68% 372/549 [37:37<14:37, 4.96s/it] |
|
|
100% 4/4 [00:03<00:00, 1.10it/s][A |
|
|
[A
68% 373/549 [37:41<24:29, 8.35s/it]
68% 374/549 [37:45<20:27, 7.01s/it]
68% 375/549 [37:49<17:38, 6.08s/it]
68% 376/549 [37:53<15:38, 5.43s/it]
69% 377/549 [37:57<14:13, 4.96s/it]
69% 378/549 [38:01<13:13, 4.64s/it][2025-10-10 13:49:52,383] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:49:55,178] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2608587741851807 |
|
|
[2025-10-10 13:49:56,450] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2715425491333008 |
|
|
[2025-10-10 13:49:57,715] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2646293640136719 |
|
|
[2025-10-10 13:49:59,010] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2951922416687012 |
|
|
[2025-10-10 13:49:59,010] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A
|
|
|
[A{'eval_loss': 4.906888484954834, 'eval_runtime': 5.8215, 'eval_samples_per_second': 21.816, 'eval_steps_per_second': 10.994, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.07} |
|
|
69% 378/549 [38:13<13:13, 4.64s/it] |
|
|
100% 4/4 [00:03<00:00, 1.11it/s][A |
|
|
[A
69% 379/549 [38:17<23:03, 8.14s/it]
69% 380/549 [38:21<19:17, 6.85s/it]
69% 381/549 [38:25<16:40, 5.95s/it]
70% 382/549 [38:29<14:50, 5.33s/it]
70% 383/549 [38:32<13:32, 4.90s/it]
70% 384/549 [38:36<12:39, 4.60s/it][2025-10-10 13:50:28,063] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:50:31,217] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2841885089874268 |
|
|
[2025-10-10 13:50:32,493] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2753474712371826 |
|
|
[2025-10-10 13:50:33,766] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2733006477355957 |
|
|
[2025-10-10 13:50:35,051] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2847979068756104 |
|
|
[2025-10-10 13:50:35,051] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.906486988067627, 'eval_runtime': 5.7879, 'eval_samples_per_second': 21.942, 'eval_steps_per_second': 11.057, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.1} |
|
|
70% 384/549 [38:49<12:39, 4.60s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
70% 385/549 [38:53<22:27, 8.22s/it]
70% 386/549 [38:57<18:46, 6.91s/it]
70% 387/549 [39:01<16:14, 6.01s/it]
71% 388/549 [39:05<14:26, 5.38s/it]
71% 389/549 [39:09<13:10, 4.94s/it]
71% 390/549 [39:13<12:16, 4.63s/it][2025-10-10 13:51:04,231] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:51:07,499] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5531108379364014 |
|
|
[2025-10-10 13:51:08,778] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2783293724060059 |
|
|
[2025-10-10 13:51:10,059] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2812681198120117 |
|
|
[2025-10-10 13:51:11,349] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.28898286819458 |
|
|
[2025-10-10 13:51:11,349] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.916737079620361, 'eval_runtime': 5.7701, 'eval_samples_per_second': 22.01, 'eval_steps_per_second': 11.092, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.13} |
|
|
71% 390/549 [39:25<12:16, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
71% 391/549 [39:29<21:45, 8.27s/it]
71% 392/549 [39:33<18:09, 6.94s/it]
72% 393/549 [39:37<15:38, 6.01s/it]
72% 394/549 [39:41<13:52, 5.37s/it]
72% 395/549 [39:45<12:37, 4.92s/it]
72% 396/549 [39:49<11:45, 4.61s/it][2025-10-10 13:51:40,295] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:51:43,495] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8327646255493164 |
|
|
[2025-10-10 13:51:44,795] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2993097305297852 |
|
|
[2025-10-10 13:51:46,167] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.372373342514038 |
|
|
[2025-10-10 13:51:47,442] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2747223377227783 |
|
|
[2025-10-10 13:51:47,442] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.890364170074463, 'eval_runtime': 5.7714, 'eval_samples_per_second': 22.005, 'eval_steps_per_second': 11.089, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.16} |
|
|
72% 396/549 [40:02<11:45, 4.61s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
72% 397/549 [40:05<20:55, 8.26s/it]
72% 398/549 [40:09<17:27, 6.94s/it]
73% 399/549 [40:13<15:02, 6.02s/it]
73% 400/549 [40:17<13:22, 5.38s/it]
{'loss': 4.9866, 'grad_norm': 0.22382044792175293, 'learning_rate': 0.0005264834558836156, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'tokens_per_second_per_gpu': 41.85, 'epoch': 2.19} |
|
|
73% 400/549 [40:17<13:22, 5.38s/it]
73% 401/549 [40:21<12:11, 4.94s/it]
73% 402/549 [40:25<11:21, 4.63s/it][2025-10-10 13:52:16,526] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:52:19,410] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5968072414398193 |
|
|
[2025-10-10 13:52:21,081] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.670398473739624 |
|
|
[2025-10-10 13:52:22,353] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.272465705871582 |
|
|
[2025-10-10 13:52:23,640] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.285794973373413 |
|
|
[2025-10-10 13:52:23,640] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.866282939910889, 'eval_runtime': 5.7799, 'eval_samples_per_second': 21.973, 'eval_steps_per_second': 11.073, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.2} |
|
|
73% 402/549 [40:38<11:21, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
73% 403/549 [40:42<20:07, 8.27s/it]
74% 404/549 [40:45<16:47, 6.95s/it]
74% 405/549 [40:49<14:28, 6.03s/it]
74% 406/549 [40:53<12:52, 5.40s/it]
74% 407/549 [40:57<11:43, 4.95s/it]
74% 408/549 [41:01<10:53, 4.63s/it][2025-10-10 13:52:52,766] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:52:55,394] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.33892822265625 |
|
|
[2025-10-10 13:52:57,235] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.840940237045288 |
|
|
[2025-10-10 13:52:58,502] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2660002708435059 |
|
|
[2025-10-10 13:52:59,779] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2767868041992188 |
|
|
[2025-10-10 13:52:59,779] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.8776326179504395, 'eval_runtime': 5.754, 'eval_samples_per_second': 22.071, 'eval_steps_per_second': 11.123, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.23} |
|
|
74% 408/549 [41:14<10:53, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
74% 409/549 [41:18<19:13, 8.24s/it]
75% 410/549 [41:22<16:02, 6.93s/it]
75% 411/549 [41:25<13:48, 6.01s/it]
75% 412/549 [41:29<12:16, 5.38s/it]
75% 413/549 [41:33<11:11, 4.94s/it]
75% 414/549 [41:37<10:24, 4.63s/it][2025-10-10 13:53:28,861] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:53:31,446] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2704637050628662 |
|
|
[2025-10-10 13:53:32,999] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.552428960800171 |
|
|
[2025-10-10 13:53:34,672] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6721296310424805 |
|
|
[2025-10-10 13:53:35,947] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.274794101715088 |
|
|
[2025-10-10 13:53:35,947] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.875138282775879, 'eval_runtime': 5.7864, 'eval_samples_per_second': 21.948, 'eval_steps_per_second': 11.06, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.26} |
|
|
75% 414/549 [41:50<10:24, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
76% 415/549 [41:54<18:28, 8.27s/it]
76% 416/549 [41:58<15:24, 6.95s/it]
76% 417/549 [42:02<13:15, 6.03s/it]
76% 418/549 [42:06<11:46, 5.39s/it]
76% 419/549 [42:10<10:43, 4.95s/it]
77% 420/549 [42:13<09:58, 4.64s/it][2025-10-10 13:54:05,114] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:54:07,798] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3199918270111084 |
|
|
[2025-10-10 13:54:09,209] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4115185737609863 |
|
|
[2025-10-10 13:54:11,055] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8450191020965576 |
|
|
[2025-10-10 13:54:12,332] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2769029140472412 |
|
|
[2025-10-10 13:54:12,332] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.852082252502441, 'eval_runtime': 5.8261, 'eval_samples_per_second': 21.799, 'eval_steps_per_second': 10.985, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.3} |
|
|
77% 420/549 [42:27<09:58, 4.64s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
77% 421/549 [42:30<17:45, 8.32s/it]
77% 422/549 [42:34<14:46, 6.98s/it]
77% 423/549 [42:38<12:41, 6.04s/it]
77% 424/549 [42:42<11:15, 5.40s/it]
77% 425/549 [42:46<10:13, 4.95s/it]
78% 426/549 [42:50<09:30, 4.63s/it][2025-10-10 13:54:41,441] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:54:44,036] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2831828594207764 |
|
|
[2025-10-10 13:54:45,310] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2733635902404785 |
|
|
[2025-10-10 13:54:47,008] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.697882890701294 |
|
|
[2025-10-10 13:54:48,636] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6273066997528076 |
|
|
[2025-10-10 13:54:48,636] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.834587574005127, 'eval_runtime': 5.7994, 'eval_samples_per_second': 21.899, 'eval_steps_per_second': 11.036, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.33} |
|
|
78% 426/549 [43:03<09:30, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
78% 427/549 [43:07<16:53, 8.31s/it]
78% 428/549 [43:11<14:04, 6.98s/it]
78% 429/549 [43:14<12:05, 6.05s/it]
78% 430/549 [43:18<10:43, 5.40s/it]
79% 431/549 [43:22<09:44, 4.96s/it]
79% 432/549 [43:26<09:02, 4.64s/it][2025-10-10 13:55:17,787] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:55:20,430] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3111767768859863 |
|
|
[2025-10-10 13:55:21,756] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3263280391693115 |
|
|
[2025-10-10 13:55:23,205] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4486384391784668 |
|
|
[2025-10-10 13:55:25,025] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8193638324737549 |
|
|
[2025-10-10 13:55:25,025] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.831284046173096, 'eval_runtime': 5.8293, 'eval_samples_per_second': 21.787, 'eval_steps_per_second': 10.979, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.36} |
|
|
79% 432/549 [43:39<09:02, 4.64s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
79% 433/549 [43:43<16:05, 8.32s/it]
79% 434/549 [43:47<13:22, 6.98s/it]
79% 435/549 [43:51<11:28, 6.04s/it]
79% 436/549 [43:55<10:09, 5.39s/it]
80% 437/549 [43:59<09:13, 4.94s/it]
80% 438/549 [44:02<08:33, 4.63s/it][2025-10-10 13:55:54,066] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:55:56,645] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.282938003540039 |
|
|
[2025-10-10 13:55:57,916] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2711057662963867 |
|
|
[2025-10-10 13:55:59,205] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2884700298309326 |
|
|
[2025-10-10 13:56:00,852] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6469426155090332 |
|
|
[2025-10-10 13:56:00,852] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.831367492675781, 'eval_runtime': 6.1266, 'eval_samples_per_second': 20.729, 'eval_steps_per_second': 10.446, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.39} |
|
|
80% 438/549 [44:15<08:33, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
80% 439/549 [44:19<15:09, 8.27s/it]
80% 440/549 [44:23<12:36, 6.94s/it]
80% 441/549 [44:27<10:50, 6.03s/it]
81% 442/549 [44:31<09:36, 5.39s/it]
81% 443/549 [44:35<08:44, 4.95s/it]
81% 444/549 [44:39<08:06, 4.63s/it][2025-10-10 13:56:30,285] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:56:32,867] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.277099609375 |
|
|
[2025-10-10 13:56:34,157] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2903683185577393 |
|
|
[2025-10-10 13:56:35,441] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2831330299377441 |
|
|
[2025-10-10 13:56:36,811] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3701646327972412 |
|
|
[2025-10-10 13:56:36,811] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.824395656585693, 'eval_runtime': 6.3408, 'eval_samples_per_second': 20.029, 'eval_steps_per_second': 10.093, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.43} |
|
|
81% 444/549 [44:51<08:06, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
81% 445/549 [44:55<14:18, 8.26s/it]
81% 446/549 [44:59<11:55, 6.95s/it]
81% 447/549 [45:03<10:14, 6.03s/it]
82% 448/549 [45:07<09:03, 5.38s/it]
82% 449/549 [45:11<08:14, 4.94s/it]
82% 450/549 [45:15<07:38, 4.63s/it][2025-10-10 13:57:06,458] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:57:09,048] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2799561023712158 |
|
|
[2025-10-10 13:57:10,351] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3030431270599365 |
|
|
[2025-10-10 13:57:11,608] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2559781074523926 |
|
|
[2025-10-10 13:57:12,875] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2669663429260254 |
|
|
[2025-10-10 13:57:12,875] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.833980560302734, 'eval_runtime': 6.203, 'eval_samples_per_second': 20.474, 'eval_steps_per_second': 10.318, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.46} |
|
|
82% 450/549 [45:27<07:38, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
82% 451/549 [45:31<13:21, 8.18s/it]
82% 452/549 [45:35<11:08, 6.89s/it]
83% 453/549 [45:39<09:34, 5.98s/it]
83% 454/549 [45:43<08:28, 5.35s/it]
83% 455/549 [45:47<07:43, 4.93s/it]
83% 456/549 [45:51<07:10, 4.63s/it][2025-10-10 13:57:42,418] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:57:45,040] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2982087135314941 |
|
|
[2025-10-10 13:57:46,346] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3049485683441162 |
|
|
[2025-10-10 13:57:47,647] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3008086681365967 |
|
|
[2025-10-10 13:57:48,951] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3040728569030762 |
|
|
[2025-10-10 13:57:48,951] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.833824157714844, 'eval_runtime': 5.9057, 'eval_samples_per_second': 21.505, 'eval_steps_per_second': 10.837, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.49} |
|
|
83% 456/549 [46:03<07:10, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
83% 457/549 [46:07<12:27, 8.13s/it]
83% 458/549 [46:11<10:23, 6.86s/it]
84% 459/549 [46:15<08:56, 5.96s/it]
84% 460/549 [46:19<07:55, 5.34s/it]
84% 461/549 [46:23<07:11, 4.91s/it]
84% 462/549 [46:26<06:40, 4.60s/it][2025-10-10 13:58:18,149] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:58:20,763] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2804484367370605 |
|
|
[2025-10-10 13:58:22,047] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2839665412902832 |
|
|
[2025-10-10 13:58:23,316] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2688114643096924 |
|
|
[2025-10-10 13:58:24,612] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2959990501403809 |
|
|
[2025-10-10 13:58:24,613] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.826416015625, 'eval_runtime': 5.8235, 'eval_samples_per_second': 21.808, 'eval_steps_per_second': 10.99, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.52} |
|
|
84% 462/549 [46:39<06:40, 4.60s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
84% 463/549 [46:43<11:33, 8.06s/it]
85% 464/549 [46:46<09:37, 6.80s/it]
85% 465/549 [46:50<08:16, 5.91s/it]
85% 466/549 [46:54<07:19, 5.30s/it]
85% 467/549 [46:58<06:39, 4.87s/it]
85% 468/549 [47:02<06:11, 4.59s/it][2025-10-10 13:58:53,663] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:58:56,265] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2601747512817383 |
|
|
[2025-10-10 13:58:57,546] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2804811000823975 |
|
|
[2025-10-10 13:58:58,818] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2715575695037842 |
|
|
[2025-10-10 13:59:00,085] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2669270038604736 |
|
|
[2025-10-10 13:59:00,085] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.10it/s][A
|
|
|
[A{'eval_loss': 4.823799133300781, 'eval_runtime': 5.8422, 'eval_samples_per_second': 21.738, 'eval_steps_per_second': 10.955, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.56} |
|
|
85% 468/549 [47:14<06:11, 4.59s/it] |
|
|
100% 4/4 [00:03<00:00, 1.10it/s][A |
|
|
[A
85% 469/549 [47:18<10:44, 8.05s/it]
86% 470/549 [47:22<08:56, 6.80s/it]
86% 471/549 [47:26<07:43, 5.94s/it]
86% 472/549 [47:30<06:50, 5.33s/it]
86% 473/549 [47:34<06:12, 4.90s/it]
86% 474/549 [47:38<05:45, 4.61s/it][2025-10-10 13:59:29,330] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 13:59:32,429] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2759573459625244 |
|
|
[2025-10-10 13:59:33,742] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3128514289855957 |
|
|
[2025-10-10 13:59:35,045] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3031983375549316 |
|
|
[2025-10-10 13:59:36,349] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3030741214752197 |
|
|
[2025-10-10 13:59:36,349] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.10it/s][A
|
|
|
[A{'eval_loss': 4.822895526885986, 'eval_runtime': 5.8969, 'eval_samples_per_second': 21.537, 'eval_steps_per_second': 10.853, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.59} |
|
|
86% 474/549 [47:51<05:45, 4.61s/it] |
|
|
100% 4/4 [00:03<00:00, 1.10it/s][A |
|
|
[A
87% 475/549 [47:54<10:11, 8.26s/it]
87% 476/549 [47:58<08:26, 6.94s/it]
87% 477/549 [48:02<07:13, 6.02s/it]
87% 478/549 [48:06<06:21, 5.38s/it]
87% 479/549 [48:10<05:45, 4.93s/it]
87% 480/549 [48:14<05:19, 4.63s/it][2025-10-10 14:00:05,526] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:00:08,809] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4298784732818604 |
|
|
[2025-10-10 14:00:10,095] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.285125732421875 |
|
|
[2025-10-10 14:00:11,356] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2615094184875488 |
|
|
[2025-10-10 14:00:12,652] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.294884443283081 |
|
|
[2025-10-10 14:00:12,652] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.817491054534912, 'eval_runtime': 5.8133, 'eval_samples_per_second': 21.847, 'eval_steps_per_second': 11.009, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.62} |
|
|
87% 480/549 [48:27<05:19, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
88% 481/549 [48:31<09:23, 8.28s/it]
88% 482/549 [48:35<07:45, 6.95s/it]
88% 483/549 [48:38<06:37, 6.03s/it]
88% 484/549 [48:42<05:50, 5.39s/it]
88% 485/549 [48:46<05:16, 4.95s/it]
89% 486/549 [48:50<04:52, 4.64s/it][2025-10-10 14:00:41,802] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:00:45,094] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8106629848480225 |
|
|
[2025-10-10 14:00:46,391] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2969591617584229 |
|
|
[2025-10-10 14:00:47,689] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.297926425933838 |
|
|
[2025-10-10 14:00:48,990] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3006622791290283 |
|
|
[2025-10-10 14:00:48,991] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.809765815734863, 'eval_runtime': 5.8183, 'eval_samples_per_second': 21.828, 'eval_steps_per_second': 11.0, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.66} |
|
|
89% 486/549 [49:03<04:52, 4.64s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
89% 487/549 [49:07<08:35, 8.31s/it]
89% 488/549 [49:11<07:05, 6.97s/it]
89% 489/549 [49:15<06:02, 6.04s/it]
89% 490/549 [49:19<05:18, 5.39s/it]
89% 491/549 [49:23<04:46, 4.94s/it]
90% 492/549 [49:26<04:23, 4.63s/it][2025-10-10 14:01:18,064] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:01:21,207] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8038432598114014 |
|
|
[2025-10-10 14:01:22,618] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4110236167907715 |
|
|
[2025-10-10 14:01:23,883] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.264404296875 |
|
|
[2025-10-10 14:01:25,172] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.288550615310669 |
|
|
[2025-10-10 14:01:25,172] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.804036617279053, 'eval_runtime': 5.7598, 'eval_samples_per_second': 22.049, 'eval_steps_per_second': 11.112, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.69} |
|
|
90% 492/549 [49:39<04:23, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
90% 493/549 [49:43<07:42, 8.26s/it]
90% 494/549 [49:47<06:21, 6.94s/it]
90% 495/549 [49:51<05:24, 6.01s/it]
90% 496/549 [49:55<04:44, 5.37s/it]
91% 497/549 [49:59<04:16, 4.93s/it]
91% 498/549 [50:03<03:55, 4.62s/it][2025-10-10 14:01:54,168] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:01:56,896] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4492151737213135 |
|
|
[2025-10-10 14:01:58,644] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7475955486297607 |
|
|
[2025-10-10 14:01:59,912] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2675278186798096 |
|
|
[2025-10-10 14:02:01,187] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2756171226501465 |
|
|
[2025-10-10 14:02:01,188] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.802491664886475, 'eval_runtime': 5.7541, 'eval_samples_per_second': 22.071, 'eval_steps_per_second': 11.122, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.72} |
|
|
91% 498/549 [50:15<03:55, 4.62s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
91% 499/549 [50:19<06:51, 8.22s/it]
91% 500/549 [50:23<05:38, 6.91s/it]
{'loss': 4.7637, 'grad_norm': 0.1683763712644577, 'learning_rate': 7.053292796908628e-05, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'tokens_per_second_per_gpu': 21.22, 'epoch': 2.73} |
|
|
91% 500/549 [50:23<05:38, 6.91s/it]
91% 501/549 [50:27<04:47, 6.00s/it]
91% 502/549 [50:31<04:12, 5.37s/it]
92% 503/549 [50:35<03:46, 4.93s/it]
92% 504/549 [50:39<03:28, 4.63s/it][2025-10-10 14:02:30,241] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:02:32,802] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2650606632232666 |
|
|
[2025-10-10 14:02:34,502] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6998417377471924 |
|
|
[2025-10-10 14:02:36,052] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5492708683013916 |
|
|
[2025-10-10 14:02:37,338] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2862026691436768 |
|
|
[2025-10-10 14:02:37,338] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.802091121673584, 'eval_runtime': 5.8288, 'eval_samples_per_second': 21.789, 'eval_steps_per_second': 10.98, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.75} |
|
|
92% 504/549 [50:52<03:28, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
92% 505/549 [50:55<06:04, 8.28s/it]
92% 506/549 [50:59<04:59, 6.96s/it]
92% 507/549 [51:03<04:13, 6.04s/it]
93% 508/549 [51:07<03:41, 5.40s/it]
93% 509/549 [51:11<03:17, 4.95s/it]
93% 510/549 [51:15<03:00, 4.63s/it][2025-10-10 14:03:06,511] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:03:09,143] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3052573204040527 |
|
|
[2025-10-10 14:03:10,598] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4548754692077637 |
|
|
[2025-10-10 14:03:12,399] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.800527811050415 |
|
|
[2025-10-10 14:03:13,696] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2964775562286377 |
|
|
[2025-10-10 14:03:13,696] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.803107261657715, 'eval_runtime': 5.8267, 'eval_samples_per_second': 21.796, 'eval_steps_per_second': 10.984, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.79} |
|
|
93% 510/549 [51:28<03:00, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
93% 511/549 [51:32<05:15, 8.31s/it]
93% 512/549 [51:36<04:18, 6.98s/it]
93% 513/549 [51:39<03:37, 6.04s/it]
94% 514/549 [51:43<03:09, 5.40s/it]
94% 515/549 [51:47<02:48, 4.95s/it]
94% 516/549 [51:51<02:32, 4.64s/it][2025-10-10 14:03:42,833] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:03:45,384] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2671685218811035 |
|
|
[2025-10-10 14:03:46,651] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2670958042144775 |
|
|
[2025-10-10 14:03:48,344] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6930501461029053 |
|
|
[2025-10-10 14:03:49,888] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5432307720184326 |
|
|
[2025-10-10 14:03:49,888] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.801834583282471, 'eval_runtime': 5.9383, 'eval_samples_per_second': 21.387, 'eval_steps_per_second': 10.778, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.82} |
|
|
94% 516/549 [52:04<02:32, 4.64s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
94% 517/549 [52:08<04:25, 8.31s/it]
94% 518/549 [52:12<03:36, 6.97s/it]
95% 519/549 [52:16<03:01, 6.04s/it]
95% 520/549 [52:20<02:36, 5.40s/it]
95% 521/549 [52:24<02:18, 4.95s/it]
95% 522/549 [52:27<02:04, 4.63s/it][2025-10-10 14:04:19,117] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:04:21,739] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3012058734893799 |
|
|
[2025-10-10 14:04:23,021] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2816236019134521 |
|
|
[2025-10-10 14:04:24,451] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4290544986724854 |
|
|
[2025-10-10 14:04:26,225] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7740297317504883 |
|
|
[2025-10-10 14:04:26,225] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.80092716217041, 'eval_runtime': 5.7804, 'eval_samples_per_second': 21.971, 'eval_steps_per_second': 11.072, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.85} |
|
|
95% 522/549 [52:40<02:04, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
95% 523/549 [52:44<03:34, 8.26s/it]
95% 524/549 [52:48<02:53, 6.94s/it]
96% 525/549 [52:52<02:24, 6.01s/it]
96% 526/549 [52:56<02:03, 5.37s/it]
96% 527/549 [53:00<01:48, 4.93s/it]
96% 528/549 [53:04<01:37, 4.62s/it][2025-10-10 14:04:55,239] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:04:57,810] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2706298828125 |
|
|
[2025-10-10 14:04:59,088] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.276904582977295 |
|
|
[2025-10-10 14:05:00,384] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2962520122528076 |
|
|
[2025-10-10 14:05:02,135] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.750046730041504 |
|
|
[2025-10-10 14:05:02,135] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.84it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.32it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A
|
|
|
[A{'eval_loss': 4.8013153076171875, 'eval_runtime': 6.0043, 'eval_samples_per_second': 21.152, 'eval_steps_per_second': 10.659, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.89} |
|
|
96% 528/549 [53:16<01:37, 4.62s/it] |
|
|
100% 4/4 [00:03<00:00, 1.12it/s][A |
|
|
[A
96% 529/549 [53:20<02:45, 8.26s/it]
97% 530/549 [53:24<02:11, 6.95s/it]
97% 531/549 [53:28<01:48, 6.03s/it]
97% 532/549 [53:32<01:31, 5.39s/it]
97% 533/549 [53:36<01:19, 4.95s/it]
97% 534/549 [53:40<01:09, 4.63s/it][2025-10-10 14:05:31,473] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:05:34,055] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.281491994857788 |
|
|
[2025-10-10 14:05:35,343] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2869987487792969 |
|
|
[2025-10-10 14:05:36,619] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.275632381439209 |
|
|
[2025-10-10 14:05:38,079] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.460437297821045 |
|
|
[2025-10-10 14:05:38,080] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.801328182220459, 'eval_runtime': 6.2546, 'eval_samples_per_second': 20.305, 'eval_steps_per_second': 10.232, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.92} |
|
|
97% 534/549 [53:53<01:09, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
97% 535/549 [53:57<01:55, 8.26s/it]
98% 536/549 [54:00<01:30, 6.93s/it]
98% 537/549 [54:04<01:12, 6.01s/it]
98% 538/549 [54:08<00:59, 5.37s/it]
98% 539/549 [54:12<00:49, 4.93s/it]
98% 540/549 [54:16<00:41, 4.62s/it][2025-10-10 14:06:07,574] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:06:10,143] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2767753601074219 |
|
|
[2025-10-10 14:06:11,428] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2852528095245361 |
|
|
[2025-10-10 14:06:12,701] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2729084491729736 |
|
|
[2025-10-10 14:06:13,962] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2607090473175049 |
|
|
[2025-10-10 14:06:13,963] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.801631927490234, 'eval_runtime': 6.294, 'eval_samples_per_second': 20.178, 'eval_steps_per_second': 10.168, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.95} |
|
|
98% 540/549 [54:29<00:41, 4.62s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
99% 541/549 [54:32<01:05, 8.20s/it]
99% 542/549 [54:36<00:48, 6.91s/it]
99% 543/549 [54:40<00:36, 6.00s/it]
99% 544/549 [54:44<00:26, 5.37s/it]
99% 545/549 [54:48<00:19, 4.94s/it]
99% 546/549 [54:52<00:13, 4.63s/it][2025-10-10 14:06:43,637] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step... |
|
|
[2025-10-10 14:06:46,208] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2853760719299316 |
|
|
[2025-10-10 14:06:47,466] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2572674751281738 |
|
|
[2025-10-10 14:06:48,724] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2585771083831787 |
|
|
[2025-10-10 14:06:50,005] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2800981998443604 |
|
|
[2025-10-10 14:06:50,005] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4] |
|
|
|
|
|
0% 0/4 [00:00<?, ?it/s][A |
|
|
50% 2/4 [00:01<00:01, 1.85it/s][A |
|
|
75% 3/4 [00:02<00:00, 1.33it/s][A |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A
|
|
|
[A{'eval_loss': 4.80186653137207, 'eval_runtime': 6.011, 'eval_samples_per_second': 21.128, 'eval_steps_per_second': 10.647, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 2.98} |
|
|
99% 546/549 [55:04<00:13, 4.63s/it] |
|
|
100% 4/4 [00:03<00:00, 1.13it/s][A |
|
|
[A
100% 547/549 [55:08<00:16, 8.11s/it]
100% 548/549 [55:12<00:06, 6.84s/it]
100% 549/549 [55:16<00:00, 5.99s/it][2025-10-10 14:07:07,765] [INFO] [axolotl.core.trainers.base._save:671] [PID:24741] Saving model checkpoint to ./qlora-out/checkpoint-549 |
|
|
{'train_runtime': 3323.7208, 'train_samples_per_second': 0.33, 'train_steps_per_second': 0.165, 'train_loss': 5.692639126803706, 'memory/max_active (GiB)': 3.74, 'memory/max_allocated (GiB)': 3.74, 'memory/device_reserved (GiB)': 4.63, 'epoch': 3.0} |
|
|
100% 549/549 [55:23<00:00, 5.99s/it]
100% 549/549 [55:23<00:00, 6.05s/it] |