Spaces:

lamekemal
/

brvm_finetuner

Sleeping

lamekemal commited on Sep 22

Commit

2bbbd42

verified ·

1 Parent(s): 513cce0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,11 +16,14 @@ def load_model():
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
-        device_map="auto",
         torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
         trust_remote_code=True,
     )
-    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
     return pipe
 # On charge le pipeline une fois au démarrage

     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
+        device_map="auto",  # Accelerate gère la répartition CPU/GPU
         torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
         trust_remote_code=True,
     )
+    # Ne PAS passer device quand on utilise accelerate
+    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
     return pipe
 # On charge le pipeline une fois au démarrage