File size: 1,956 Bytes

e604d0a

{
    "adapter_path": "adapters",
    "batch_size": 2,
    "config": "granite_config.yaml",
    "data": "mlx-community/WikiSQL",
    "fine_tune_type": "dora",
    "grad_accumulation_steps": 1,
    "grad_checkpoint": true,
    "hf_dataset": [
        {
            "path": "ethicalabs/Kurtis-E1-SFT",
            "train_split": "train[:40000]",
            "valid_split": "validation[:16]",
            "prompt_feature": "question",
            "completion_feature": "answer",
            "mask_prompt": false
        },
        {
            "path": "ethicalabs/kurtis-v2-sft-mix-tiny",
            "train_split": "train[:10000]",
            "valid_split": "validation[:16]",
            "prompt_feature": "prompt",
            "completion_feature": "completion",
            "mask_prompt": false
        }
    ],
    "iters": 1200,
    "learning_rate": 5e-05,
    "lora_parameters": {
        "rank": 16,
        "alpha": 64,
        "dropout": 0.1,
        "scale": 8.0
    },
    "lr_schedule": {
        "name": "cosine_decay",
        "warmup": 120,
        "warmup_init": 0,
        "arguments": [
            5e-05,
            1200,
            5e-06
        ]
    },
    "mask_prompt": false,
    "max_seq_length": 2048,
    "merge_adapter": true,
    "model": "ethicalabs/granite-4.0-1b-MLX",
    "num_layers": 3,
    "optimizer": "adamw",
    "optimizer_config": {
        "adamw": {
            "betas": [
                0.9,
                0.98
            ],
            "eps": 1e-07,
            "weight_decay": 0.05,
            "bias_correction": true
        }
    },
    "output_path": "ethicalabs/kurtis-e1.2-granite-4.0-1b-MLX",
    "project_name": null,
    "report_to": "wandb",
    "resume_adapter_file": null,
    "save_every": 300,
    "seed": 42,
    "steps_per_eval": 200,
    "steps_per_report": 10,
    "test": false,
    "test_batches": 100,
    "train": true,
    "val_batches": 50,
    "wandb": "kurtis-mlx-granite-1b"
}