Spaces:

lamekemal
/

brvm_finetuner

Sleeping

App Files Files Community

brvm_finetuner / finetune.py

lamekemal

Update finetune.py

f119044 verified 3 months ago

raw

history blame

4.51 kB

	import torch
	from datasets import load_dataset
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	TrainingArguments,
	BitsAndBytesConfig,
	)

	from peft import LoraConfig, get_peft_model
	from trl import SFTTrainer
	import os

	# ============================================
	# 0. Préparation env
	# ============================================
	os.environ["OMP_NUM_THREADS"] = "8"

	# 1. Configurations
	base_model = "mistralai/Mistral-7B-Instruct-v0.3"
	new_model_dir = "./mistral-7b-brvm-finetuned"
	output_dir = "./results"

	# 2. Device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Utilisation du périphérique: {device}")
	if torch.cuda.is_available():
	print(f"GPU: {torch.cuda.get_device_name(0)} - "
	f"Mémoire: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")

	# 3. Dataset
	dataset = load_dataset("lamekemal/brvm_finetune")

	#bnb_config = BitsAndBytesConfig(
	# load_in_8bit=True, # ou load_in_4bit=True pour encore moins de VRAM
	# bnb_8bit_use_fp32_cpu_offload=True, # décharge certains modules sur CPU
	#)
	# Configuration pour 8-bit + offload CPU
	bnb_config = BitsAndBytesConfig(
	load_in_8bit=True,
	llm_int8_enable_fp32_cpu_offload=True
	)


	# 4. Charger modèle + tokenizer (FP16 natif, pas de quantization)
	#model = AutoModelForCausalLM.from_pretrained(
	# base_model,
	# torch_dtype=torch.float16,
	# device_map="auto",
	# trust_remote_code=True,
	# quantization_config=bnb_config
	#)
	model = AutoModelForCausalLM.from_pretrained(
	"mistralai/Mistral-7B-Instruct-v0.3",
	quantization_config=bnb_config,
	device_map="auto" # ou tu peux préciser {'': 'cpu'} pour offload complet
	)
	model.config.use_cache = False
	model.gradient_checkpointing_enable()

	#tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
	tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")

	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	tokenizer.padding_side = "right"

	# ============================================
	# 5. Preprocessing (max_seq_length=512)
	# ============================================
	def tokenize_function(examples):
	texts = [
	f"Instruction: {instr}\nRéponse: {resp}"
	for instr, resp in zip(examples["instruction"], examples["response"])
	]
	return tokenizer(
	texts,
	truncation=True,
	padding="max_length",
	max_length=512,
	)

	tokenized_datasets = dataset.map(tokenize_function, batched=True)

	# ============================================
	# 6. LoRA config (tu peux augmenter r à 64 sur L40S)
	# ============================================
	lora_config = LoraConfig(
	r=64,
	lora_alpha=128,
	target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
	"gate_proj", "up_proj", "down_proj"],
	lora_dropout=0.05,
	bias="none",
	task_type="CAUSAL_LM",
	)
	model = get_peft_model(model, lora_config)
	model.gradient_checkpointing_enable()

	# ============================================
	# 7. TrainingArguments
	# ============================================
	training_args = TrainingArguments(
	output_dir=output_dir,
	num_train_epochs=3,
	per_device_train_batch_size=8, # plus gros batch, tu peux tester 16
	gradient_accumulation_steps=2,
	optim="adamw_torch_fused", # optimiseur rapide pour gros GPU
	save_steps=100,
	logging_steps=10,
	learning_rate=2e-5, # plus bas qu’en 4bit
	fp16=True,
	max_grad_norm=1.0,
	warmup_ratio=0.03,
	lr_scheduler_type="cosine",
	report_to="tensorboard",
	eval_strategy="steps",
	eval_steps=100,
	save_total_limit=2,
	load_best_model_at_end=True,
	metric_for_best_model="eval_loss",
	#use_peft=True,
	)

	# ============================================
	# 8. Trainer
	# ============================================
	trainer = SFTTrainer(
	model=model,
	train_dataset=tokenized_datasets["train"],
	eval_dataset=tokenized_datasets["validation"],
	peft_config=lora_config,
	args=training_args,
	use_peft=True,
	)

	# 9. Fine-tuning
	trainer.train()

	# 10. Sauvegarde locale
	trainer.save_model(new_model_dir)

	# 11. Push du modèle + tokenizer sur Hugging Face Hub
	trainer.push_to_hub("lamekemal/mistral-7b-brvm-finetuned")
	tokenizer.push_to_hub("lamekemal/mistral-7b-brvm-finetuned")

	print(f"✅ Modèle LoRA sauvegardé localement dans {new_model_dir} et poussé sur Hugging Face Hub")