Piyazon
added VV_Radio_RVC_ALI_Ug
024b5c5
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from pathlib import Path
import os
import html
import torch
import torchaudio
import torchcodec
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from fastapi.responses import JSONResponse, HTMLResponse, StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
from transformers import Wav2Vec2BertProcessor, AutoModelForCTC, VitsModel, AutoTokenizer
from pydub import AudioSegment
import tempfile
import gradio as gr
from transformers import VitsModel, AutoTokenizer
import torch
import numpy as np
import soundfile as sf
import io
import os
import string
import unicodedata
from pypinyin import pinyin, Style
import re
from umsc import UgMultiScriptConverter
from utils import preprocess_uyghur_text
from infer import infer
hf_token_env = os.environ.get("HF_TOKEN")
app = FastAPI()
# Allow specific domains or all (*) for testing
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Serve the current working directory at /files
app.mount("/files", StaticFiles(directory=".", html=True), name="files")
# infer("مەن پۈتۈن دۇنياغا كۈلۈپ قارايمەن.",
# "vibevoice/VibeVoice-1.5B",
# "VV_CV_Unique_Ug", 1.3, True)
@app.get("/", response_class=HTMLResponse)
def greet_html():
return """
<html>
<body>
<h1>
URL:
</h1>
</body>
</html>
"""
def generate_speech(text: str, checkpoint_path: str, cfg_scale: float):
fixed_text =preprocess_uyghur_text(text)
print(text)
print(fixed_text)
byte_io = infer(fixed_text, "vibevoice/VibeVoice-1.5B", checkpoint_path, cfg_scale)
return byte_io
@app.post("/synthesize")
def synthesize(
text: str = Form(...),
checkpoint_path_name: str = Form("VV_CV_Unique_Ug"),
cfg_scale: float = Form(1.3),
hf_token: str = Form(..., description="Hugging Face authentication token")):
if (hf_token_env != hf_token):
raise HTTPException(status_code=401, detail="Invalid Hugging Face token")
try:
audio_bytes = generate_speech(text, checkpoint_path_name, cfg_scale)
return StreamingResponse(audio_bytes, media_type="audio/wav", headers={"Content-Disposition": "attachment; filename=speech.wav"})
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
# @app.get("/", response_class=HTMLResponse)
# def index():
# """HTML page listing items in the current working directory."""
# root = Path(".")
# rows = []
# for child in sorted(root.iterdir(), key=lambda p: (not p.is_dir(), p.name.lower())):
# # Append "/" to directory names and link to /files/<name>
# label = child.name + ("/" if child.is_dir() else "")
# href = f"/files/{child.name}"
# rows.append(f'<li><a href="{href}">{html.escape(label)}</a></li>')
# return f"""
# <!doctype html>
# <html>
# <head>
# <meta charset="utf-8" />
# <title>Directory Listing</title>
# <style>
# body {{ font-family: system-ui, -apple-system, Segoe UI, Roboto, sans-serif; margin: 2rem; }}
# h1 {{ margin-bottom: 0.5rem; }}
# ul {{ line-height: 1.8; }}
# a {{ text-decoration: none; }}
# a:hover {{ text-decoration: underline; }}
# </style>
# </head>
# <body>
# <h1>Current folder</h1>
# <p>Serving <code>{html.escape(str(root.resolve()))}</code></p>
# <ul>
# {''.join(rows) if rows else '<li><em>(empty)</em></li>'}
# </ul>
# <p><a href="/api/ls">View as JSON</a></p>
# </body>
# </html>
# """
# @app.get("/api/ls")
# def list_json():
# """JSON endpoint listing files/directories in the CWD."""
# root = Path(".")
# return [
# {
# "name": p.name,
# "is_dir": p.is_dir(),
# "size": (p.stat().st_size if p.is_file() else None),
# "path": str(p.resolve()),
# "href": f"/files/{p.name}",
# }
# for p in sorted(root.iterdir(), key=lambda q: (not q.is_dir(), q.name.lower()))
# ]