| { | |
| "adapter_path": "adapters", | |
| "batch_size": 2, | |
| "config": "granite_config.yaml", | |
| "data": "mlx-community/WikiSQL", | |
| "fine_tune_type": "dora", | |
| "grad_accumulation_steps": 1, | |
| "grad_checkpoint": true, | |
| "hf_dataset": [ | |
| { | |
| "path": "ethicalabs/Kurtis-E1-SFT", | |
| "train_split": "train[:40000]", | |
| "valid_split": "validation[:16]", | |
| "prompt_feature": "question", | |
| "completion_feature": "answer", | |
| "mask_prompt": false | |
| }, | |
| { | |
| "path": "ethicalabs/kurtis-v2-sft-mix-tiny", | |
| "train_split": "train[:10000]", | |
| "valid_split": "validation[:16]", | |
| "prompt_feature": "prompt", | |
| "completion_feature": "completion", | |
| "mask_prompt": false | |
| } | |
| ], | |
| "iters": 1200, | |
| "learning_rate": 5e-05, | |
| "lora_parameters": { | |
| "rank": 16, | |
| "alpha": 64, | |
| "dropout": 0.1, | |
| "scale": 8.0 | |
| }, | |
| "lr_schedule": { | |
| "name": "cosine_decay", | |
| "warmup": 120, | |
| "warmup_init": 0, | |
| "arguments": [ | |
| 5e-05, | |
| 1200, | |
| 5e-06 | |
| ] | |
| }, | |
| "mask_prompt": false, | |
| "max_seq_length": 2048, | |
| "merge_adapter": true, | |
| "model": "ethicalabs/granite-4.0-1b-MLX", | |
| "num_layers": 3, | |
| "optimizer": "adamw", | |
| "optimizer_config": { | |
| "adamw": { | |
| "betas": [ | |
| 0.9, | |
| 0.98 | |
| ], | |
| "eps": 1e-07, | |
| "weight_decay": 0.05, | |
| "bias_correction": true | |
| } | |
| }, | |
| "output_path": "ethicalabs/kurtis-e1.2-granite-4.0-1b-MLX", | |
| "project_name": null, | |
| "report_to": "wandb", | |
| "resume_adapter_file": null, | |
| "save_every": 300, | |
| "seed": 42, | |
| "steps_per_eval": 200, | |
| "steps_per_report": 10, | |
| "test": false, | |
| "test_batches": 100, | |
| "train": true, | |
| "val_batches": 50, | |
| "wandb": "kurtis-mlx-granite-1b" | |
| } |