from typing import Union, List, Annotated, Dict from pydantic import BaseModel, Field, field_validator, model_validator import re # Text cleaning function def clean_text(text: str) -> str: """Clean grievance text by removing URLs, HTML tags, extra whitespace.""" text = re.sub(r'https?://\S+|www\.\S+', '', text) # Remove URLs text = re.sub(r'<.*?>', '', text) # Remove HTML tags text = re.sub(r'\n', ' ', text) # Replace newlines with space text = re.sub(r'\s+', ' ', text).strip() # Reduce multiple spaces return text # pydantic classes class TextInput(BaseModel): text: Annotated[ Union[str, List[str]], Field( ..., title="Input text(s)", description="A single string or a list of non-empty strings representing user input." ) ] # Validator to ensure non-empty strings in both str and list[str] forms @field_validator("text") def validate_text(cls, value): if isinstance(value, str): value = value.strip() if not value: raise ValueError("String input cannot be empty.") elif isinstance(value, list): if not value: raise ValueError("List input cannot be empty.") for i, v in enumerate(value): if not isinstance(v, str) or not v.strip(): raise ValueError(f"Item {i} in list is not a valid non-empty string.") else: raise TypeError("Input must be a string or a list of strings.") return value @model_validator(mode="after") def clean_texts(self): if isinstance(self.text, str): self.text = clean_text(self.text) else: self.text = [clean_text(t) for t in self.text] return self # Correct place for OpenAPI examples in Pydantic v2 model_config = { "json_schema_extra": { "examples": [ { "text": "Where can I get a new water connection?" }, { "text": [ "Where can I get a new water connection?", "My streetlight is broken." ] } ] } } class ClassificationOutput(BaseModel): label: str = Field(..., description="Top predicted label") confidence: float = Field(..., ge=0, le=1, description="Confidence score") scores: Dict[str, float] = Field(..., description="All label confidence scores")