File size: 3,733 Bytes
5492eb5
 
 
 
 
 
 
877c868
5492eb5
f4f325c
877c868
 
 
 
f4f325c
877c868
d203955
 
 
5492eb5
 
d203955
5492eb5
 
 
d203955
 
5492eb5
d203955
5492eb5
 
877c868
5492eb5
d203955
877c868
5492eb5
 
 
 
877c868
5492eb5
d203955
5492eb5
 
 
 
549a8f2
877c868
549a8f2
 
 
 
 
 
 
 
 
 
 
 
 
 
877c868
 
 
 
5492eb5
877c868
 
d203955
 
5492eb5
 
 
 
 
 
549a8f2
877c868
549a8f2
 
5492eb5
 
877c868
549a8f2
877c868
5492eb5
 
877c868
 
 
5492eb5
 
 
4fa82ae
5492eb5
 
 
 
 
 
549a8f2
877c868
549a8f2
5492eb5
 
549a8f2
 
5492eb5
549a8f2
5492eb5
 
d203955
5492eb5
 
d203955
 
 
877c868
 
60e6c02
877c868
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
import os

# ============================================
# 0. Préparation env
# ============================================
os.environ["OMP_NUM_THREADS"] = "8"

# 1. Configurations
base_model = "mistralai/Mistral-7B-Instruct-v0.3"
new_model_dir = "./mistral-7b-brvm-finetuned"
output_dir = "./results"

# 2. Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Utilisation du périphérique: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)} - "
          f"Mémoire: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")

# 3. Dataset
dataset = load_dataset("lamekemal/brvm_finetune")

# 4. Charger modèle + tokenizer (FP16 natif, pas de quantization)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)
model.config.use_cache = False
model.gradient_checkpointing_enable()

tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# ============================================
# 5. Preprocessing (max_seq_length=512)
# ============================================
def tokenize_function(examples):
    texts = [
        f"Instruction: {instr}\nRéponse: {resp}"
        for instr, resp in zip(examples["instruction"], examples["response"])
    ]
    return tokenizer(
        texts,
        truncation=True,
        padding="max_length",
        max_length=512,
    )

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# ============================================
# 6. LoRA config (tu peux augmenter r à 64 sur L40S)
# ============================================
lora_config = LoraConfig(
    r=64,
    lora_alpha=128,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

# ============================================
# 7. TrainingArguments
# ============================================
training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=3,
    per_device_train_batch_size=8,   # plus gros batch, tu peux tester 16
    gradient_accumulation_steps=2,
    optim="adamw_torch_fused",       # optimiseur rapide pour gros GPU
    save_steps=100,
    logging_steps=10,
    learning_rate=2e-5,              # plus bas qu’en 4bit
    fp16=True,
    max_grad_norm=1.0,
    warmup_ratio=0.03,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    eval_strategy="steps",
    eval_steps=100,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
)

# ============================================
# 8. Trainer
# ============================================
trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    peft_config=lora_config,
    args=training_args
)

# 9. Fine-tuning
trainer.train()

# 10. Sauvegarde locale
trainer.save_model(new_model_dir)

# 11. Push du modèle + tokenizer sur Hugging Face Hub
trainer.push_to_hub("lamekemal/mistral-7b-brvm-finetuned")
tokenizer.push_to_hub("lamekemal/mistral-7b-brvm-finetuned")

print(f"✅ Modèle LoRA sauvegardé localement dans {new_model_dir} et poussé sur Hugging Face Hub")