Spaces:

MCP-1st-Birthday
/

MedLLM-Agent

Running on Zero

App Files Files Community

Y Phung Nguyen commited on 27 days ago

Commit

ef322a1

1 Parent(s): b4f06b4

Upd langdetect acc

Browse files

Files changed (2) hide show

model.py +0 -128
utils.py +62 -4

model.py DELETED Viewed

@@ -1,128 +0,0 @@
-"""
-Model inference functions that require GPU.
-These functions are tagged with @spaces.GPU(max_duration=120) to ensure
-they only run on GPU and don't waste GPU time on CPU operations.
-"""
-import os
-import torch
-import logging
-from transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
-    TextIteratorStreamer,
-    StoppingCriteria,
-    StoppingCriteriaList,
-)
-from llama_index.llms.huggingface import HuggingFaceLLM
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-import spaces
-import threading
-logger = logging.getLogger(__name__)
-# Model configurations
-MEDSWIN_MODELS = {
-    "MedSwin SFT": "MedSwin/MedSwin-7B-SFT",
-    "MedSwin KD": "MedSwin/MedSwin-7B-KD",
-    "MedSwin TA": "MedSwin/MedSwin-Merged-TA-SFT-0.7"
-}
-DEFAULT_MEDICAL_MODEL = "MedSwin TA"
-EMBEDDING_MODEL = "abhinand/MedEmbed-large-v0.1"
-HF_TOKEN = os.environ.get("HF_TOKEN")
-# Global model storage (shared with app.py)
-# These will be initialized in app.py and accessed here
-global_medical_models = {}
-global_medical_tokenizers = {}
-def initialize_medical_model(model_name: str):
-    """Initialize medical model (MedSwin) - download on demand"""
-    global global_medical_models, global_medical_tokenizers
-    if model_name not in global_medical_models or global_medical_models[model_name] is None:
-        logger.info(f"Initializing medical model: {model_name}...")
-        model_path = MEDSWIN_MODELS[model_name]
-        tokenizer = AutoTokenizer.from_pretrained(model_path, token=HF_TOKEN)
-        model = AutoModelForCausalLM.from_pretrained(
-            model_path,
-            device_map="auto",
-            trust_remote_code=True,
-            token=HF_TOKEN,
-            torch_dtype=torch.float16
-        )
-        global_medical_models[model_name] = model
-        global_medical_tokenizers[model_name] = tokenizer
-        logger.info(f"Medical model {model_name} initialized successfully")
-    return global_medical_models[model_name], global_medical_tokenizers[model_name]
-@spaces.GPU(max_duration=120)
-def get_llm_for_rag(temperature=0.7, max_new_tokens=256, top_p=0.95, top_k=50):
-    """Get LLM for RAG indexing (uses medical model) - GPU only"""
-    # Use medical model for RAG indexing instead of translation model
-    medical_model_obj, medical_tokenizer = initialize_medical_model(DEFAULT_MEDICAL_MODEL)
-    return HuggingFaceLLM(
-        context_window=4096,
-        max_new_tokens=max_new_tokens,
-        tokenizer=medical_tokenizer,
-        model=medical_model_obj,
-        generate_kwargs={
-            "do_sample": True,
-            "temperature": temperature,
-            "top_k": top_k,
-            "top_p": top_p
-        }
-    )
-@spaces.GPU(max_duration=120)
-def get_embedding_model():
-    """Get embedding model for RAG - GPU only"""
-    return HuggingFaceEmbedding(model_name=EMBEDDING_MODEL, token=HF_TOKEN)
-@spaces.GPU(max_duration=120)
-def generate_with_medswin(
-    medical_model_obj,
-    medical_tokenizer,
-    prompt: str,
-    max_new_tokens: int,
-    temperature: float,
-    top_p: float,
-    top_k: int,
-    penalty: float,
-    eos_token_id: int,
-    pad_token_id: int,
-    stop_event: threading.Event,
-    streamer: TextIteratorStreamer,
-    stopping_criteria: StoppingCriteriaList
-):
-    """
-    Generate text with MedSwin model - GPU only
-    This function only performs the actual model inference on GPU.
-    All other operations (prompt preparation, post-processing) should be done outside.
-    """
-    # Tokenize prompt (this is a CPU operation but happens here for simplicity)
-    # The actual GPU work is in model.generate()
-    inputs = medical_tokenizer(prompt, return_tensors="pt").to(medical_model_obj.device)
-    # Prepare generation kwargs
-    generation_kwargs = dict(
-        **inputs,
-        streamer=streamer,
-        max_new_tokens=max_new_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        top_k=top_k,
-        repetition_penalty=penalty,
-        do_sample=True,
-        stopping_criteria=stopping_criteria,
-        eos_token_id=eos_token_id,
-        pad_token_id=pad_token_id
-    )
-    # Run generation on GPU - this is the only GPU operation
-    medical_model_obj.generate(**generation_kwargs)

utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Utility functions for translation, language detection, and formatting"""
 import asyncio
-from langdetect import detect, LangDetectException
 from logger import logger
 from client import MCP_AVAILABLE, call_agent
 from config import GEMINI_MODEL_LITE
@@ -33,13 +34,70 @@ def format_prompt_manually(messages: list, tokenizer) -> str:
     return prompt
 def detect_language(text: str) -> str:
-    """Detect language of input text"""
     try:
-        lang = detect(text)
-        return lang
     except LangDetectException:
         return "en"
 def format_url_as_domain(url: str) -> str:

 """Utility functions for translation, language detection, and formatting"""
 import asyncio
+import re
+from langdetect import detect_langs, LangDetectException
 from logger import logger
 from client import MCP_AVAILABLE, call_agent
 from config import GEMINI_MODEL_LITE
     return prompt
+MIN_TEXT_LENGTH_FOR_DETECTION = 12
+LANG_CONFIDENCE_THRESHOLD = 0.8
+ASCII_DOMINANCE_THRESHOLD = 0.97
+ENGLISH_HINT_RATIO = 0.2
+ENGLISH_HINT_WORDS = {
+    "the", "and", "with", "for", "you", "your", "have", "has", "that", "this",
+    "pain", "blood", "pressure", "please", "what", "how", "can", "should", "need"
+}
+def _ascii_ratio(text: str) -> float:
+    if not text:
+        return 1.0
+    ascii_chars = sum(1 for ch in text if ord(ch) < 128)
+    return ascii_chars / max(len(text), 1)
+def _looks_english(text: str) -> bool:
+    words = re.findall(r"[A-Za-z']+", text.lower())
+    if not words:
+        return False
+    english_hits = sum(1 for word in words if word in ENGLISH_HINT_WORDS)
+    return english_hits / len(words) >= ENGLISH_HINT_RATIO
 def detect_language(text: str) -> str:
+    """Detect language of input text with basic confidence heuristics"""
+    if not text:
+        return "en"
+    sample = text.strip()
+    if not sample:
+        return "en"
+    ascii_ratio = _ascii_ratio(sample)
+    has_non_ascii = ascii_ratio < 1.0
+    if len(sample) < MIN_TEXT_LENGTH_FOR_DETECTION and not has_non_ascii:
+        return "en"
     try:
+        detections = detect_langs(sample)
     except LangDetectException:
         return "en"
+    except Exception as exc:
+        logger.debug(f"[LANG-DETECT] Unexpected error, defaulting to English: {exc}")
+        return "en"
+    if not detections:
+        return "en"
+    top = detections[0]
+    lang_code = top.lang
+    confidence = getattr(top, "prob", 0.0)
+    if confidence < LANG_CONFIDENCE_THRESHOLD:
+        return "en"
+    if lang_code == "en":
+        return "en"
+    if not has_non_ascii and ascii_ratio >= ASCII_DOMINANCE_THRESHOLD and _looks_english(sample):
+        logger.info(f"[LANG-DETECT] Overrode {lang_code} due to English heuristics (ascii_ratio={ascii_ratio:.2f})")
+        return "en"
+    return lang_code
 def format_url_as_domain(url: str) -> str: