Gggggggccccfggggfg

Paused

App Files Files Community

jnjj commited on Apr 22

Commit

48acb1a

verified ·

1 Parent(s): a9504c8

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -2

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import asyncio
 import json
 import time
 import logging
 from typing import List, Dict, Any, Optional, AsyncGenerator, Tuple, Union
 from fastapi import FastAPI, HTTPException, Depends, status
 from fastapi.responses import StreamingResponse, PlainTextResponse, HTMLResponse, JSONResponse
@@ -115,6 +116,7 @@ class GenerateRequest(BaseModel):
     use_cache: bool = Field(True)
     do_sample: bool = Field(True)
     tokenizer_kwargs: Optional[Dict[str, Any]] = None
     max_time: Optional[float] = Field(None, ge=0.0)
     length_penalty: float = Field(1.0, ge=0.0)
     no_repeat_ngram_size: int = Field(0, ge=0)
@@ -134,6 +136,7 @@ class GenerateRequest(BaseModel):
     length_normalization_factor: Optional[float] = Field(None)
     min_new_tokens: int = Field(0, ge=0)
     do_normalize_logits: bool = Field(False)
     @validator('stop_sequences')
     def validate_stop_sequences(cls, v):
         if v is not None:
@@ -314,6 +317,8 @@ async def stream_generation_logic(req: GenerateRequest, initial_ids: torch.Tenso
                        final_text_raw = final_text_raw.split(stop_seq, 1)[0]
                        break
         final_text_processed = post_process_text(final_text_raw, req.strip_trailing_whitespace, req.remove_incomplete_sentences)
         final_payload: Dict[str, Any] = {
             "type": "done",
             "total_prompt_tokens": initial_ids.shape[-1],
@@ -324,8 +329,11 @@ async def stream_generation_logic(req: GenerateRequest, initial_ids: torch.Tenso
         }
         yield json.dumps(final_payload) + "\n"
     except Exception as e:
-         error_payload = {"type": "error", "message": str(e)}
-         yield json.dumps(error_payload) + "\n"
     finally:
         await cleanup()
 async def non_stream_generation_logic(req: GenerateRequest, initial_ids: torch.Tensor, gen_cfg: GenerationConfig, device: str) -> Dict[str, Any]:
@@ -675,6 +683,10 @@ async def generate_endpoint(req: GenerateRequest):
                  return StreamingResponse(stream_generation_logic(req, ids, gen_cfg, device), media_type="application/json")
             else:
                  response_payload = await non_stream_generation_logic(req, ids, gen_cfg, device)
                  return JSONResponse(response_payload)
         except Exception as e:
              raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Generation error: {e}")

 import json
 import time
 import logging
+import markdown
 from typing import List, Dict, Any, Optional, AsyncGenerator, Tuple, Union
 from fastapi import FastAPI, HTTPException, Depends, status
 from fastapi.responses import StreamingResponse, PlainTextResponse, HTMLResponse, JSONResponse
     use_cache: bool = Field(True)
     do_sample: bool = Field(True)
     tokenizer_kwargs: Optional[Dict[str, Any]] = None
+    return_only_text: bool = Field(False)
     max_time: Optional[float] = Field(None, ge=0.0)
     length_penalty: float = Field(1.0, ge=0.0)
     no_repeat_ngram_size: int = Field(0, ge=0)
     length_normalization_factor: Optional[float] = Field(None)
     min_new_tokens: int = Field(0, ge=0)
     do_normalize_logits: bool = Field(False)
+    return_full_text: bool = Field(False)
     @validator('stop_sequences')
     def validate_stop_sequences(cls, v):
         if v is not None:
                        final_text_raw = final_text_raw.split(stop_seq, 1)[0]
                        break
         final_text_processed = post_process_text(final_text_raw, req.strip_trailing_whitespace, req.remove_incomplete_sentences)
+        if req.return_full_text:
+             final_text_processed = markdown.markdown(final_text_processed)
         final_payload: Dict[str, Any] = {
             "type": "done",
             "total_prompt_tokens": initial_ids.shape[-1],
         }
         yield json.dumps(final_payload) + "\n"
     except Exception as e:
+         if req.return_only_text:
+             yield f"Error: {e}\n"
+         else:
+             error_payload = {"type": "error", "message": str(e)}
+             yield json.dumps(error_payload) + "\n"
     finally:
         await cleanup()
 async def non_stream_generation_logic(req: GenerateRequest, initial_ids: torch.Tensor, gen_cfg: GenerationConfig, device: str) -> Dict[str, Any]:
                  return StreamingResponse(stream_generation_logic(req, ids, gen_cfg, device), media_type="application/json")
             else:
                  response_payload = await non_stream_generation_logic(req, ids, gen_cfg, device)
+                 if req.return_full_text and response_payload.get("generated_sequences"):
+                     first_sequence = response_payload["generated_sequences"][0].get("text", "")
+                     markdown_text = markdown.markdown(first_sequence)
+                     return PlainTextResponse(markdown_text)
                  return JSONResponse(response_payload)
         except Exception as e:
              raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Generation error: {e}")