Spaces:
Sleeping
Sleeping
Update finetune.py
Browse files- finetune.py +6 -1
finetune.py
CHANGED
|
@@ -29,13 +29,18 @@ if torch.cuda.is_available():
|
|
| 29 |
# 3. Dataset
|
| 30 |
dataset = load_dataset("lamekemal/brvm_finetune")
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
# 4. Charger modèle + tokenizer (FP16 natif, pas de quantization)
|
| 33 |
model = AutoModelForCausalLM.from_pretrained(
|
| 34 |
base_model,
|
| 35 |
torch_dtype=torch.float16,
|
| 36 |
device_map="auto",
|
| 37 |
trust_remote_code=True,
|
| 38 |
-
|
| 39 |
)
|
| 40 |
model.config.use_cache = False
|
| 41 |
model.gradient_checkpointing_enable()
|
|
|
|
| 29 |
# 3. Dataset
|
| 30 |
dataset = load_dataset("lamekemal/brvm_finetune")
|
| 31 |
|
| 32 |
+
bnb_config = BitsAndBytesConfig(
|
| 33 |
+
load_in_8bit=True, # ou load_in_4bit=True pour encore moins de VRAM
|
| 34 |
+
bnb_8bit_use_fp32_cpu_offload=True, # décharge certains modules sur CPU
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
# 4. Charger modèle + tokenizer (FP16 natif, pas de quantization)
|
| 38 |
model = AutoModelForCausalLM.from_pretrained(
|
| 39 |
base_model,
|
| 40 |
torch_dtype=torch.float16,
|
| 41 |
device_map="auto",
|
| 42 |
trust_remote_code=True,
|
| 43 |
+
quantization_config=bnb_config
|
| 44 |
)
|
| 45 |
model.config.use_cache = False
|
| 46 |
model.gradient_checkpointing_enable()
|