|
|
from pydantic import BaseModel, Field, field_validator, model_validator |
|
|
from typing import Union, List, Annotated, Dict |
|
|
import re |
|
|
|
|
|
def clean_text(text: str) -> str: |
|
|
text = re.sub(r'https?://\S+|www\.\S+', '', text) |
|
|
text = re.sub(r'<.*?>', '', text) |
|
|
text = re.sub(r'\n', ' ', text) |
|
|
text = re.sub(r'\s+', ' ', text).strip() |
|
|
return text |
|
|
|
|
|
class TextInput(BaseModel): |
|
|
text: Annotated[ |
|
|
Union[str, List[str]], |
|
|
Field(..., title="Input text(s)", description="Single string or list of strings") |
|
|
] |
|
|
|
|
|
@field_validator("text") |
|
|
def validate_text(cls, value): |
|
|
if isinstance(value, str): |
|
|
value = value.strip() |
|
|
if not value: |
|
|
raise ValueError("String input cannot be empty.") |
|
|
elif isinstance(value, list): |
|
|
if not value: |
|
|
raise ValueError("List input cannot be empty.") |
|
|
for i, v in enumerate(value): |
|
|
if not isinstance(v, str) or not v.strip(): |
|
|
raise ValueError(f"Item {i} in list is not a valid non-empty string.") |
|
|
else: |
|
|
raise TypeError("Input must be a string or a list of strings.") |
|
|
return value |
|
|
|
|
|
|
|
|
@model_validator(mode="after") |
|
|
def clean_text_after(model): |
|
|
if isinstance(model.text, str): |
|
|
model.text = clean_text(model.text) |
|
|
else: |
|
|
model.text = [clean_text(t) for t in model.text] |
|
|
return model |
|
|
|
|
|
model_config = { |
|
|
"json_schema_extra": { |
|
|
"examples": [ |
|
|
{"text": "Where can I get a new water connection?"}, |
|
|
{"text": ["Where can I get a new water connection?", "My streetlight is broken."]} |
|
|
] |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
class UrgencyClassificationOutput(BaseModel): |
|
|
label: str = Field(..., description="Top predicted urgency label") |
|
|
confidence: float = Field(..., ge=0, le=1, description="Confidence score for top label") |
|
|
scores: Dict[str, float] = Field(..., description="All label confidence scores") |
|
|
|