Update README.md
Browse files
README.md
CHANGED
|
@@ -10,16 +10,12 @@ This model is an int4 model with group_size 128 and symmetric quantization of [g
|
|
| 10 |
|
| 11 |
Please follow the license of the original model.
|
| 12 |
|
| 13 |
-
### Inference on CPU
|
| 14 |
-
|
| 15 |
-
we found the unquantized layer must run on BF16 or FP32, so cuda inference is not available now.
|
| 16 |
|
| 17 |
Requirements
|
| 18 |
|
| 19 |
```bash
|
| 20 |
-
pip install auto-round
|
| 21 |
-
pip uninstall intel-extension-for-pytorch
|
| 22 |
-
pip install intel-extension-for-transformers
|
| 23 |
```
|
| 24 |
|
| 25 |
~~~python
|
|
@@ -27,13 +23,12 @@ from transformers import AutoProcessor, Gemma3ForConditionalGeneration
|
|
| 27 |
from PIL import Image
|
| 28 |
import requests
|
| 29 |
import torch
|
| 30 |
-
from auto_round import AutoRoundConfig
|
| 31 |
|
| 32 |
model_id = "OPEA/gemma-3-12b-it-int4-AutoRound"
|
| 33 |
|
| 34 |
model = Gemma3ForConditionalGeneration.from_pretrained(
|
| 35 |
-
model_id, torch_dtype=torch.bfloat16, device_map="auto"
|
| 36 |
-
).eval()
|
| 37 |
|
| 38 |
processor = AutoProcessor.from_pretrained(model_id)
|
| 39 |
|
|
|
|
| 10 |
|
| 11 |
Please follow the license of the original model.
|
| 12 |
|
| 13 |
+
### Inference on CPU/XPU/CUDA
|
|
|
|
|
|
|
| 14 |
|
| 15 |
Requirements
|
| 16 |
|
| 17 |
```bash
|
| 18 |
+
pip install 'auto-round>=0.5'
|
|
|
|
|
|
|
| 19 |
```
|
| 20 |
|
| 21 |
~~~python
|
|
|
|
| 23 |
from PIL import Image
|
| 24 |
import requests
|
| 25 |
import torch
|
| 26 |
+
from auto_round import AutoRoundConfig ## must import for autoround format or use the tranformers>4.51.3
|
| 27 |
|
| 28 |
model_id = "OPEA/gemma-3-12b-it-int4-AutoRound"
|
| 29 |
|
| 30 |
model = Gemma3ForConditionalGeneration.from_pretrained(
|
| 31 |
+
model_id, torch_dtype=torch.bfloat16, device_map="auto").eval()
|
|
|
|
| 32 |
|
| 33 |
processor = AutoProcessor.from_pretrained(model_id)
|
| 34 |
|