Update README.md
Browse files
README.md
CHANGED
|
@@ -1,3 +1,26 @@
|
|
| 1 |
-
---
|
| 2 |
-
license: mit
|
| 3 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
---
|
| 4 |
+
How to use with vllm:
|
| 5 |
+
```
|
| 6 |
+
from vllm import LLM, SamplingParams
|
| 7 |
+
inputs = [
|
| 8 |
+
"Who is the president of US?",
|
| 9 |
+
"Can you speak Indonesian?"
|
| 10 |
+
]
|
| 11 |
+
# Initialize the LLM model
|
| 12 |
+
llm = LLM(model="jester6136/SeaLLMs-v3-1.5B-Chat-AWQ",
|
| 13 |
+
quantization="AWQ",
|
| 14 |
+
gpu_memory_utilization=0.9,
|
| 15 |
+
max_model_len=2000,
|
| 16 |
+
max_num_seqs=32)
|
| 17 |
+
sparams = SamplingParams(temperature=0.0, max_tokens=2000, top_p=0.95,top_k=40,repetition_penalty=1.05)
|
| 18 |
+
chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>'
|
| 19 |
+
prompts = [chat_template.format(input=prompt) for prompt in inputs]
|
| 20 |
+
outputs = llm.generate(prompts, sparams)
|
| 21 |
+
# print out the model response
|
| 22 |
+
for output in outputs:
|
| 23 |
+
prompt = output.prompt
|
| 24 |
+
generated_text = output.outputs[0].text
|
| 25 |
+
print(f"Prompt: {prompt}\nResponse: {generated_text}\n\n")
|
| 26 |
+
```
|