Spaces:
Sleeping
Sleeping
File size: 4,510 Bytes
5492eb5 2f398e8 5492eb5 2f398e8 877c868 5492eb5 f4f325c 877c868 f4f325c 877c868 d203955 5492eb5 d203955 5492eb5 d203955 5492eb5 d203955 5492eb5 c22c35a 098b2be c22c35a 098b2be c22c35a 877c868 c22c35a 5492eb5 c22c35a 5492eb5 877c868 5492eb5 c22c35a 5492eb5 549a8f2 877c868 549a8f2 877c868 5492eb5 877c868 d203955 5492eb5 ae453f1 5492eb5 549a8f2 877c868 549a8f2 5492eb5 877c868 549a8f2 877c868 5492eb5 877c868 5492eb5 4fa82ae 5492eb5 f119044 5492eb5 549a8f2 877c868 549a8f2 5492eb5 549a8f2 5492eb5 f119044 5492eb5 d203955 5492eb5 d203955 877c868 60e6c02 877c868 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import torch
from datasets import load_dataset
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TrainingArguments,
BitsAndBytesConfig,
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
import os
# ============================================
# 0. Préparation env
# ============================================
os.environ["OMP_NUM_THREADS"] = "8"
# 1. Configurations
base_model = "mistralai/Mistral-7B-Instruct-v0.3"
new_model_dir = "./mistral-7b-brvm-finetuned"
output_dir = "./results"
# 2. Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Utilisation du périphérique: {device}")
if torch.cuda.is_available():
print(f"GPU: {torch.cuda.get_device_name(0)} - "
f"Mémoire: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")
# 3. Dataset
dataset = load_dataset("lamekemal/brvm_finetune")
#bnb_config = BitsAndBytesConfig(
# load_in_8bit=True, # ou load_in_4bit=True pour encore moins de VRAM
# bnb_8bit_use_fp32_cpu_offload=True, # décharge certains modules sur CPU
#)
# Configuration pour 8-bit + offload CPU
bnb_config = BitsAndBytesConfig(
load_in_8bit=True,
llm_int8_enable_fp32_cpu_offload=True
)
# 4. Charger modèle + tokenizer (FP16 natif, pas de quantization)
#model = AutoModelForCausalLM.from_pretrained(
# base_model,
# torch_dtype=torch.float16,
# device_map="auto",
# trust_remote_code=True,
# quantization_config=bnb_config
#)
model = AutoModelForCausalLM.from_pretrained(
"mistralai/Mistral-7B-Instruct-v0.3",
quantization_config=bnb_config,
device_map="auto" # ou tu peux préciser {'': 'cpu'} pour offload complet
)
model.config.use_cache = False
model.gradient_checkpointing_enable()
#tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# ============================================
# 5. Preprocessing (max_seq_length=512)
# ============================================
def tokenize_function(examples):
texts = [
f"Instruction: {instr}\nRéponse: {resp}"
for instr, resp in zip(examples["instruction"], examples["response"])
]
return tokenizer(
texts,
truncation=True,
padding="max_length",
max_length=512,
)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
# ============================================
# 6. LoRA config (tu peux augmenter r à 64 sur L40S)
# ============================================
lora_config = LoraConfig(
r=64,
lora_alpha=128,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)
model.gradient_checkpointing_enable()
# ============================================
# 7. TrainingArguments
# ============================================
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=3,
per_device_train_batch_size=8, # plus gros batch, tu peux tester 16
gradient_accumulation_steps=2,
optim="adamw_torch_fused", # optimiseur rapide pour gros GPU
save_steps=100,
logging_steps=10,
learning_rate=2e-5, # plus bas qu’en 4bit
fp16=True,
max_grad_norm=1.0,
warmup_ratio=0.03,
lr_scheduler_type="cosine",
report_to="tensorboard",
eval_strategy="steps",
eval_steps=100,
save_total_limit=2,
load_best_model_at_end=True,
metric_for_best_model="eval_loss",
#use_peft=True,
)
# ============================================
# 8. Trainer
# ============================================
trainer = SFTTrainer(
model=model,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["validation"],
peft_config=lora_config,
args=training_args,
use_peft=True,
)
# 9. Fine-tuning
trainer.train()
# 10. Sauvegarde locale
trainer.save_model(new_model_dir)
# 11. Push du modèle + tokenizer sur Hugging Face Hub
trainer.push_to_hub("lamekemal/mistral-7b-brvm-finetuned")
tokenizer.push_to_hub("lamekemal/mistral-7b-brvm-finetuned")
print(f"✅ Modèle LoRA sauvegardé localement dans {new_model_dir} et poussé sur Hugging Face Hub")
|