Dmitry Beresnev commited on
Commit
441479b
·
1 Parent(s): ff5d4b2

add fast api service

Browse files
Files changed (2) hide show
  1. Dockerfile +8 -0
  2. fast_api_service.py +40 -0
Dockerfile ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12
2
+
3
+ RUN pip install --no-cache-dir fastapi uvicorn transformers accelerate torch
4
+
5
+ WORKDIR /app
6
+ COPY . /app
7
+
8
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
fast_api_service.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ MODEL_NAME = "deepseek-ai/deepseek-coder-6.7b-instruct"
6
+
7
+ app = FastAPI()
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
+ MODEL_NAME,
12
+ torch_dtype=torch.float32,
13
+ device_map="cpu"
14
+ )
15
+
16
+
17
+ @app.post("/v1/chat/completions")
18
+ def chat(req: dict):
19
+ messages = req.get("messages", [])
20
+ content = messages[-1]["content"]
21
+
22
+ inputs = tokenizer(content, return_tensors="pt")
23
+ outputs = model.generate(
24
+ **inputs,
25
+ max_new_tokens=256,
26
+ temperature=0.7
27
+ )
28
+
29
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
30
+
31
+ return {
32
+ "choices": [{
33
+ "message": {"role": "assistant", "content": response}
34
+ }]
35
+ }
36
+
37
+
38
+ @app.get("/")
39
+ def root():
40
+ return {"status": "DeepSeek API is online"}