FROM debian:bookworm-slim AS builder

# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    build-essential \
    cmake \
    ca-certificates \
    libcurl4-openssl-dev \
    && rm -rf /var/lib/apt/lists/*

# Clone and build llama.cpp with MINIMAL optimizations (fast build)
WORKDIR /build
ARG CACHEBUST=4
RUN git clone https://github.com/ggerganov/llama.cpp.git && \
    cd llama.cpp && \
    cmake -B build -DCMAKE_BUILD_TYPE=Release \
        -DGGML_NATIVE=OFF \
        -DGGML_AVX2=OFF \
        -DGGML_AVX=OFF \
        -DGGML_FMA=OFF \
        -DGGML_F16C=OFF && \
    cmake --build build --config Release --target llama-server -j1 && \
    echo "=== Binary dependencies ===" && \
    ldd build/bin/llama-server || true

# Runtime stage
FROM debian:bookworm-slim

# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    libcurl4 \
    ca-certificates \
    libgomp1 \
    libstdc++6 \
    && rm -rf /var/lib/apt/lists/*

# Copy llama-server binary and all shared libraries from builder
COPY --from=builder /build/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server
COPY --from=builder /build/llama.cpp/build/bin/*.so.* /usr/local/lib/

# Update library cache
RUN ldconfig

# Install Python and FastAPI dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3 \
    python3-pip \
    && rm -rf /var/lib/apt/lists/*

# Install Python packages
RUN pip3 install --no-cache-dir fastapi uvicorn requests pydantic duckduckgo-search beautifulsoup4 lxml --break-system-packages

# Create non-root user
RUN useradd -m -u 1000 user && \
    mkdir -p /home/user/.cache/llama.cpp && \
    chown -R user:user /home/user

# Copy application code
COPY --chown=user:user app.py /home/user/app.py

USER user
WORKDIR /home/user

# Set environment variables
ENV HOME=/home/user \
    LLAMA_CACHE=/home/user/.cache/llama.cpp \
    PATH=/home/user/.local/bin:$PATH \
    PYTHONUNBUFFERED=1

EXPOSE 7860

# Start FastAPI app (which manages llama-server internally)
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]