import torch from datasets import load_dataset from transformers import ( AutoTokenizer, AutoModelForCausalLM, TrainingArguments, ) from peft import LoraConfig, get_peft_model from trl import SFTTrainer import os # ============================================ # 0. Préparation env # ============================================ os.environ["OMP_NUM_THREADS"] = "8" # 1. Configurations base_model = "mistralai/Mistral-7B-Instruct-v0.3" new_model_dir = "./mistral-7b-brvm-finetuned" output_dir = "./results" # 2. Device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Utilisation du périphérique: {device}") if torch.cuda.is_available(): print(f"GPU: {torch.cuda.get_device_name(0)} - " f"Mémoire: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB") # 3. Dataset dataset = load_dataset("lamekemal/brvm_finetune") # 4. Charger modèle + tokenizer (FP16 natif, pas de quantization) model = AutoModelForCausalLM.from_pretrained( base_model, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True, ) model.config.use_cache = False model.gradient_checkpointing_enable() tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" # ============================================ # 5. Preprocessing (max_seq_length=512) # ============================================ def tokenize_function(examples): texts = [ f"Instruction: {instr}\nRéponse: {resp}" for instr, resp in zip(examples["instruction"], examples["response"]) ] return tokenizer( texts, truncation=True, padding="max_length", max_length=512, ) tokenized_datasets = dataset.map(tokenize_function, batched=True) # ============================================ # 6. LoRA config (tu peux augmenter r à 64 sur L40S) # ============================================ lora_config = LoraConfig( r=64, lora_alpha=128, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM", ) model = get_peft_model(model, lora_config) # ============================================ # 7. TrainingArguments # ============================================ training_args = TrainingArguments( output_dir=output_dir, num_train_epochs=3, per_device_train_batch_size=8, # plus gros batch, tu peux tester 16 gradient_accumulation_steps=2, optim="adamw_torch_fused", # optimiseur rapide pour gros GPU save_steps=100, logging_steps=10, learning_rate=2e-5, # plus bas qu’en 4bit fp16=True, max_grad_norm=1.0, warmup_ratio=0.03, lr_scheduler_type="cosine", report_to="tensorboard", eval_strategy="steps", eval_steps=100, save_total_limit=2, load_best_model_at_end=True, metric_for_best_model="eval_loss", ) # ============================================ # 8. Trainer # ============================================ trainer = SFTTrainer( model=model, train_dataset=tokenized_datasets["train"], eval_dataset=tokenized_datasets["validation"], peft_config=lora_config, args=training_args ) # 9. Fine-tuning trainer.train() # 10. Sauvegarde locale trainer.save_model(new_model_dir) # 11. Push du modèle + tokenizer sur Hugging Face Hub trainer.push_to_hub("lamekemal/mistral-7b-brvm-finetuned") tokenizer.push_to_hub("lamekemal/mistral-7b-brvm-finetuned") print(f"✅ Modèle LoRA sauvegardé localement dans {new_model_dir} et poussé sur Hugging Face Hub")