cicdatopea commited on
Commit
764dade
·
verified ·
1 Parent(s): ec01bb8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -9
README.md CHANGED
@@ -10,16 +10,12 @@ This model is an int4 model with group_size 128 and symmetric quantization of [g
10
 
11
  Please follow the license of the original model.
12
 
13
- ### Inference on CPU
14
-
15
- we found the unquantized layer must run on BF16 or FP32, so cuda inference is not available now.
16
 
17
  Requirements
18
 
19
  ```bash
20
- pip install auto-round
21
- pip uninstall intel-extension-for-pytorch
22
- pip install intel-extension-for-transformers
23
  ```
24
 
25
  ~~~python
@@ -27,13 +23,12 @@ from transformers import AutoProcessor, Gemma3ForConditionalGeneration
27
  from PIL import Image
28
  import requests
29
  import torch
30
- from auto_round import AutoRoundConfig
31
 
32
  model_id = "OPEA/gemma-3-12b-it-int4-AutoRound"
33
 
34
  model = Gemma3ForConditionalGeneration.from_pretrained(
35
- model_id, torch_dtype=torch.bfloat16, device_map="auto", quantization_config=quantization_config
36
- ).eval()
37
 
38
  processor = AutoProcessor.from_pretrained(model_id)
39
 
 
10
 
11
  Please follow the license of the original model.
12
 
13
+ ### Inference on CPU/XPU/CUDA
 
 
14
 
15
  Requirements
16
 
17
  ```bash
18
+ pip install 'auto-round>=0.5'
 
 
19
  ```
20
 
21
  ~~~python
 
23
  from PIL import Image
24
  import requests
25
  import torch
26
+ from auto_round import AutoRoundConfig ## must import for autoround format or use the tranformers>4.51.3
27
 
28
  model_id = "OPEA/gemma-3-12b-it-int4-AutoRound"
29
 
30
  model = Gemma3ForConditionalGeneration.from_pretrained(
31
+ model_id, torch_dtype=torch.bfloat16, device_map="auto").eval()
 
32
 
33
  processor = AutoProcessor.from_pretrained(model_id)
34