Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,27 +14,27 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
| 14 |
# Define a writable directory for NLTK data in Hugging Face Spaces
|
| 15 |
NLTK_DATA_DIR = "/tmp/nltk_data"
|
| 16 |
|
| 17 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
if NLTK_DATA_DIR not in nltk.data.path:
|
| 19 |
-
nltk.data.path.
|
| 20 |
-
logging.info(f"
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
# This
|
| 24 |
try:
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
logging.info("NLTK 'punkt' tokenizer
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
except Exception as e:
|
| 35 |
-
logging.error(f"Failed to download NLTK 'punkt' tokenizer: {e}")
|
| 36 |
-
# Re-raise the error as it's a critical dependency for the application to function.
|
| 37 |
-
raise
|
| 38 |
|
| 39 |
# --- Groq API Configuration ---
|
| 40 |
# IMPORTANT: It's highly recommended to set your GROQ_API_KEY as an environment variable
|
|
|
|
| 14 |
# Define a writable directory for NLTK data in Hugging Face Spaces
|
| 15 |
NLTK_DATA_DIR = "/tmp/nltk_data"
|
| 16 |
|
| 17 |
+
# Ensure the NLTK data directory exists
|
| 18 |
+
os.makedirs(NLTK_DATA_DIR, exist_ok=True)
|
| 19 |
+
logging.info(f"Ensured NLTK data directory exists: {NLTK_DATA_DIR}")
|
| 20 |
+
|
| 21 |
+
# Add this directory to NLTK's data path and prioritize it
|
| 22 |
if NLTK_DATA_DIR not in nltk.data.path:
|
| 23 |
+
nltk.data.path.insert(0, NLTK_DATA_DIR) # Use insert(0) to prioritize this path
|
| 24 |
+
logging.info(f"Prioritized {NLTK_DATA_DIR} in NLTK data path.")
|
| 25 |
|
| 26 |
+
# Attempt to download 'punkt' if it's not fully accessible or missing sub-components like 'punkt_tab'.
|
| 27 |
+
# This step is crucial and will run every time the app starts to ensure the resource is available.
|
| 28 |
try:
|
| 29 |
+
logging.info("Attempting to verify/download NLTK 'punkt' tokenizer to ensure all components are available...")
|
| 30 |
+
# This will download 'punkt' if not found, or verify if already there.
|
| 31 |
+
# quiet=False provides more verbose output during download in logs.
|
| 32 |
+
nltk.download('punkt', download_dir=NLTK_DATA_DIR, quiet=False)
|
| 33 |
+
logging.info("NLTK 'punkt' tokenizer confirmed (or downloaded) successfully.")
|
| 34 |
+
except Exception as e:
|
| 35 |
+
logging.error(f"Critical error: Failed to download NLTK 'punkt' tokenizer (this might affect 'punkt_tab'): {e}")
|
| 36 |
+
# Re-raise the error as the application cannot proceed without this resource.
|
| 37 |
+
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
# --- Groq API Configuration ---
|
| 40 |
# IMPORTANT: It's highly recommended to set your GROQ_API_KEY as an environment variable
|