#!/usr/bin/env python3 """ Startup script to handle environment setup and pre-checks. This script ensures proper directory permissions and environment setup before starting the app. """ import os import sys import tempfile from pathlib import Path def setup_environment(): """Setup environment variables and directories.""" # Set cache directories to writable locations cache_dirs = { 'SENTENCE_TRANSFORMERS_HOME': ['/tmp/sentence_transformers', '/data/sentence_transformers'], 'VECTOR_PERSIST_DIR': ['/tmp/vector_db', '/data/vector_db'], 'HF_HOME': ['/tmp/huggingface', '/data/huggingface'], 'TRANSFORMERS_CACHE': ['/tmp/transformers', '/data/transformers'], } for env_var, candidates in cache_dirs.items(): if env_var not in os.environ: for candidate in candidates: try: Path(candidate).mkdir(parents=True, exist_ok=True) # Test write permission test_file = Path(candidate) / "test_write" test_file.write_text("test") test_file.unlink() os.environ[env_var] = candidate print(f"āœ… Set {env_var} to {candidate}") break except (OSError, PermissionError): continue else: # Fallback to system temp temp_dir = os.path.join(tempfile.gettempdir(), env_var.lower()) try: Path(temp_dir).mkdir(parents=True, exist_ok=True) os.environ[env_var] = temp_dir print(f"āš ļø Fallback: Set {env_var} to {temp_dir}") except Exception as e: print(f"āŒ Could not set {env_var}: {e}") # Ensure tokenizer libs don't attempt parallelism post-fork (prevents hangs) os.environ.setdefault('TOKENIZERS_PARALLELISM', 'false') # Cap thread usage for BLAS backends to avoid CPU thrashing on shared runners os.environ.setdefault('OMP_NUM_THREADS', '1') os.environ.setdefault('OPENBLAS_NUM_THREADS', '1') os.environ.setdefault('MKL_NUM_THREADS', '1') os.environ.setdefault('NUMEXPR_NUM_THREADS', '1') def check_permissions(): """Check and report on directory permissions.""" dirs_to_check = [ os.environ.get('SENTENCE_TRANSFORMERS_HOME', '/tmp/sentence_transformers'), os.environ.get('VECTOR_PERSIST_DIR', '/tmp/vector_db'), '/tmp', '.', ] print("\nšŸ“‹ Directory Permissions Check:") for dir_path in dirs_to_check: if not dir_path: continue try: path = Path(dir_path) if not path.exists(): path.mkdir(parents=True, exist_ok=True) # Test write permission test_file = path / "test_permissions" test_file.write_text("test") test_file.unlink() print(f"āœ… {dir_path} - writable") except Exception as e: print(f"āŒ {dir_path} - not writable: {e}") def main(): """Main startup routine.""" print("šŸš€ Starting CVE Fact Checker setup...") setup_environment() check_permissions() print("\nšŸŽÆ Environment setup complete!") print("šŸ“ Key directories:") for key in ['SENTENCE_TRANSFORMERS_HOME', 'VECTOR_PERSIST_DIR']: print(f" {key}: {os.environ.get(key, 'Not set')}") return True if __name__ == "__main__": success = main() sys.exit(0 if success else 1)