brvm_finetuner / finetune.py
lamekemal's picture
Update finetune.py
f119044 verified
raw
history blame
4.51 kB
import torch
from datasets import load_dataset
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TrainingArguments,
BitsAndBytesConfig,
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
import os
# ============================================
# 0. Préparation env
# ============================================
os.environ["OMP_NUM_THREADS"] = "8"
# 1. Configurations
base_model = "mistralai/Mistral-7B-Instruct-v0.3"
new_model_dir = "./mistral-7b-brvm-finetuned"
output_dir = "./results"
# 2. Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Utilisation du périphérique: {device}")
if torch.cuda.is_available():
print(f"GPU: {torch.cuda.get_device_name(0)} - "
f"Mémoire: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")
# 3. Dataset
dataset = load_dataset("lamekemal/brvm_finetune")
#bnb_config = BitsAndBytesConfig(
# load_in_8bit=True, # ou load_in_4bit=True pour encore moins de VRAM
# bnb_8bit_use_fp32_cpu_offload=True, # décharge certains modules sur CPU
#)
# Configuration pour 8-bit + offload CPU
bnb_config = BitsAndBytesConfig(
load_in_8bit=True,
llm_int8_enable_fp32_cpu_offload=True
)
# 4. Charger modèle + tokenizer (FP16 natif, pas de quantization)
#model = AutoModelForCausalLM.from_pretrained(
# base_model,
# torch_dtype=torch.float16,
# device_map="auto",
# trust_remote_code=True,
# quantization_config=bnb_config
#)
model = AutoModelForCausalLM.from_pretrained(
"mistralai/Mistral-7B-Instruct-v0.3",
quantization_config=bnb_config,
device_map="auto" # ou tu peux préciser {'': 'cpu'} pour offload complet
)
model.config.use_cache = False
model.gradient_checkpointing_enable()
#tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# ============================================
# 5. Preprocessing (max_seq_length=512)
# ============================================
def tokenize_function(examples):
texts = [
f"Instruction: {instr}\nRéponse: {resp}"
for instr, resp in zip(examples["instruction"], examples["response"])
]
return tokenizer(
texts,
truncation=True,
padding="max_length",
max_length=512,
)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
# ============================================
# 6. LoRA config (tu peux augmenter r à 64 sur L40S)
# ============================================
lora_config = LoraConfig(
r=64,
lora_alpha=128,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)
model.gradient_checkpointing_enable()
# ============================================
# 7. TrainingArguments
# ============================================
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=3,
per_device_train_batch_size=8, # plus gros batch, tu peux tester 16
gradient_accumulation_steps=2,
optim="adamw_torch_fused", # optimiseur rapide pour gros GPU
save_steps=100,
logging_steps=10,
learning_rate=2e-5, # plus bas qu’en 4bit
fp16=True,
max_grad_norm=1.0,
warmup_ratio=0.03,
lr_scheduler_type="cosine",
report_to="tensorboard",
eval_strategy="steps",
eval_steps=100,
save_total_limit=2,
load_best_model_at_end=True,
metric_for_best_model="eval_loss",
#use_peft=True,
)
# ============================================
# 8. Trainer
# ============================================
trainer = SFTTrainer(
model=model,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["validation"],
peft_config=lora_config,
args=training_args,
use_peft=True,
)
# 9. Fine-tuning
trainer.train()
# 10. Sauvegarde locale
trainer.save_model(new_model_dir)
# 11. Push du modèle + tokenizer sur Hugging Face Hub
trainer.push_to_hub("lamekemal/mistral-7b-brvm-finetuned")
tokenizer.push_to_hub("lamekemal/mistral-7b-brvm-finetuned")
print(f"✅ Modèle LoRA sauvegardé localement dans {new_model_dir} et poussé sur Hugging Face Hub")