FROM debian:bookworm-slim AS builder # Install build dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ git \ build-essential \ cmake \ ca-certificates \ libcurl4-openssl-dev \ && rm -rf /var/lib/apt/lists/* # Clone and build llama.cpp with MINIMAL optimizations (fast build) WORKDIR /build ARG CACHEBUST=4 RUN git clone https://github.com/ggerganov/llama.cpp.git && \ cd llama.cpp && \ cmake -B build -DCMAKE_BUILD_TYPE=Release \ -DGGML_NATIVE=OFF \ -DGGML_AVX2=OFF \ -DGGML_AVX=OFF \ -DGGML_FMA=OFF \ -DGGML_F16C=OFF && \ cmake --build build --config Release --target llama-server -j1 && \ echo "=== Binary dependencies ===" && \ ldd build/bin/llama-server || true # Runtime stage FROM debian:bookworm-slim # Install runtime dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ libcurl4 \ ca-certificates \ libgomp1 \ libstdc++6 \ && rm -rf /var/lib/apt/lists/* # Copy llama-server binary and all shared libraries from builder COPY --from=builder /build/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server COPY --from=builder /build/llama.cpp/build/bin/*.so.* /usr/local/lib/ # Update library cache RUN ldconfig # Install Python and FastAPI dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python3-pip \ && rm -rf /var/lib/apt/lists/* # Install Python packages RUN pip3 install --no-cache-dir fastapi uvicorn requests pydantic duckduckgo-search beautifulsoup4 lxml --break-system-packages # Create non-root user RUN useradd -m -u 1000 user && \ mkdir -p /home/user/.cache/llama.cpp && \ chown -R user:user /home/user # Copy application code COPY --chown=user:user app.py /home/user/app.py USER user WORKDIR /home/user # Set environment variables ENV HOME=/home/user \ LLAMA_CACHE=/home/user/.cache/llama.cpp \ PATH=/home/user/.local/bin:$PATH \ PYTHONUNBUFFERED=1 EXPOSE 7860 # Start FastAPI app (which manages llama-server internally) CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]