AGI / fast_api_service.py
Dmitry Beresnev
add fast api service
441479b
raw
history blame
907 Bytes
from fastapi import FastAPI
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
MODEL_NAME = "deepseek-ai/deepseek-coder-6.7b-instruct"
app = FastAPI()
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
device_map="cpu"
)
@app.post("/v1/chat/completions")
def chat(req: dict):
messages = req.get("messages", [])
content = messages[-1]["content"]
inputs = tokenizer(content, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return {
"choices": [{
"message": {"role": "assistant", "content": response}
}]
}
@app.get("/")
def root():
return {"status": "DeepSeek API is online"}