mrleaf81 commited on
Commit
acf7945
·
verified ·
1 Parent(s): 6d3bfe7

Add app.py

Browse files
Files changed (1) hide show
  1. app.py +191 -0
app.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ HOLLY TTS API - Maya1 FastAPI Service
4
+ Production-ready TTS microservice for HOLLY AI
5
+ """
6
+
7
+ from fastapi import FastAPI, HTTPException
8
+ from fastapi.responses import Response
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from pydantic import BaseModel, Field
11
+ from typing import Optional
12
+ import os
13
+ import io
14
+ import soundfile as sf
15
+
16
+ from holly_voice_generator import HollyVoiceGenerator, HOLLY_VOICE_DESCRIPTION
17
+
18
+ # Initialize FastAPI
19
+ app = FastAPI(
20
+ title="HOLLY TTS API",
21
+ description="Self-hosted Maya1 TTS microservice for HOLLY AI",
22
+ version="1.0.0"
23
+ )
24
+
25
+ # CORS middleware
26
+ app.add_middleware(
27
+ CORSMiddleware,
28
+ allow_origins=["*"], # In production, restrict to your domains
29
+ allow_credentials=True,
30
+ allow_methods=["*"],
31
+ allow_headers=["*"],
32
+ )
33
+
34
+ # Global voice generator (lazy load)
35
+ voice_generator: Optional[HollyVoiceGenerator] = None
36
+
37
+
38
+ def get_generator() -> HollyVoiceGenerator:
39
+ """Get or initialize the voice generator"""
40
+ global voice_generator
41
+ if voice_generator is None:
42
+ voice_generator = HollyVoiceGenerator()
43
+ return voice_generator
44
+
45
+
46
+ class TTSRequest(BaseModel):
47
+ """TTS generation request"""
48
+ text: str = Field(..., description="Text to synthesize", min_length=1, max_length=5000)
49
+ description: Optional[str] = Field(
50
+ None,
51
+ description="Voice description (defaults to HOLLY's signature voice)"
52
+ )
53
+ temperature: float = Field(0.4, ge=0.1, le=1.0, description="Sampling temperature")
54
+ top_p: float = Field(0.9, ge=0.1, le=1.0, description="Nucleus sampling threshold")
55
+
56
+
57
+ class TTSResponse(BaseModel):
58
+ """TTS generation response metadata"""
59
+ success: bool
60
+ duration_seconds: float
61
+ sample_rate: int = 24000
62
+ message: str
63
+
64
+
65
+ @app.on_event("startup")
66
+ async def startup_event():
67
+ """Preload model on startup"""
68
+ print("🚀 HOLLY TTS API starting up...")
69
+ # Optionally preload model here
70
+ # get_generator()
71
+ print("✅ HOLLY TTS API ready!")
72
+
73
+
74
+ @app.get("/")
75
+ async def root():
76
+ """Health check endpoint"""
77
+ return {
78
+ "service": "HOLLY TTS API",
79
+ "status": "online",
80
+ "model": "maya-research/maya1",
81
+ "version": "1.0.0",
82
+ "voice": "HOLLY (Female, 30s, American, confident, intelligent, warm)"
83
+ }
84
+
85
+
86
+ @app.get("/health")
87
+ async def health():
88
+ """Health check for monitoring"""
89
+ return {"status": "healthy", "model_loaded": voice_generator is not None}
90
+
91
+
92
+ @app.post("/generate", response_class=Response)
93
+ async def generate_speech(request: TTSRequest):
94
+ """
95
+ Generate speech from text using HOLLY's voice
96
+
97
+ Returns WAV audio (24kHz, mono)
98
+ """
99
+ try:
100
+ # Get generator
101
+ generator = get_generator()
102
+
103
+ # Generate audio
104
+ audio = generator.generate(
105
+ text=request.text,
106
+ description=request.description,
107
+ temperature=request.temperature,
108
+ top_p=request.top_p
109
+ )
110
+
111
+ # Convert to WAV bytes
112
+ wav_buffer = io.BytesIO()
113
+ sf.write(wav_buffer, audio, 24000, format='WAV')
114
+ wav_bytes = wav_buffer.getvalue()
115
+
116
+ # Return audio
117
+ return Response(
118
+ content=wav_bytes,
119
+ media_type="audio/wav",
120
+ headers={
121
+ "Content-Disposition": "inline; filename=holly_speech.wav",
122
+ "X-Duration-Seconds": str(len(audio) / 24000),
123
+ "X-Sample-Rate": "24000"
124
+ }
125
+ )
126
+
127
+ except Exception as e:
128
+ raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
129
+
130
+
131
+ @app.post("/generate/info")
132
+ async def generate_speech_info(request: TTSRequest) -> TTSResponse:
133
+ """
134
+ Generate speech and return metadata (without audio bytes)
135
+ Useful for testing and monitoring
136
+ """
137
+ try:
138
+ generator = get_generator()
139
+
140
+ audio = generator.generate(
141
+ text=request.text,
142
+ description=request.description,
143
+ temperature=request.temperature,
144
+ top_p=request.top_p
145
+ )
146
+
147
+ duration = len(audio) / 24000
148
+
149
+ return TTSResponse(
150
+ success=True,
151
+ duration_seconds=duration,
152
+ message=f"Generated {len(audio)} samples ({duration:.2f}s)"
153
+ )
154
+
155
+ except Exception as e:
156
+ raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
157
+
158
+
159
+ @app.get("/voice/info")
160
+ async def voice_info():
161
+ """Get HOLLY's voice profile information"""
162
+ return {
163
+ "voice_name": "HOLLY",
164
+ "description": HOLLY_VOICE_DESCRIPTION,
165
+ "model": "maya-research/maya1",
166
+ "sample_rate": 24000,
167
+ "supported_emotions": [
168
+ "laugh", "laugh_harder", "chuckle", "giggle",
169
+ "whisper", "sigh", "gasp",
170
+ "angry", "cry",
171
+ "confident", "warm", "intelligent"
172
+ ],
173
+ "usage_example": {
174
+ "text": "Hello Hollywood! <chuckle> Let's build something amazing.",
175
+ "description": HOLLY_VOICE_DESCRIPTION
176
+ }
177
+ }
178
+
179
+
180
+ if __name__ == "__main__":
181
+ import uvicorn
182
+
183
+ port = int(os.environ.get("PORT", 8000))
184
+
185
+ uvicorn.run(
186
+ "app:app",
187
+ host="0.0.0.0",
188
+ port=port,
189
+ workers=1, # Maya1 is memory-intensive, use 1 worker
190
+ log_level="info"
191
+ )