# =============================================================================
# ADUC-SDR Video Suite — High-Perf Diffusers for 8× L40S (SM 8.9)
# CUDA 12.8 | PyTorch 2.8.0+cu128 | Ubuntu 22.04
# =============================================================================
FROM nvidia/cuda:12.8.0-devel-ubuntu22.04

LABEL maintainer="Carlos Rodrigues dos Santos & Development Partner"
LABEL description="High-performance Diffusers stack with FA2/SDPA, 8×L40S"
LABEL version="4.4.0"
LABEL cuda_version="12.8.0"
LABEL python_version="3.10"
LABEL pytorch_version="2.8.0+cu128"
LABEL gpu_optimized_for="8x_NVIDIA_L40S"

# ---------------- Core env & caches ----------------
ENV DEBIAN_FRONTEND=noninteractive TZ=UTC LANG=C.UTF-8 LC_ALL=C.UTF-8 \
    PYTHONUNBUFFERED=1 PYTHONDONTWRITEBYTECODE=1 \
    PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1

# GPU/Compute
ENV NVIDIA_VISIBLE_DEVICES=all
ENV CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
ENV TORCH_CUDA_ARCH_LIST="8.9"
ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
ENV CUDA_DEVICE_MAX_CONNECTIONS=32

# Threads
ENV OMP_NUM_THREADS=8 MKL_NUM_THREADS=8 MAX_JOBS=160

# Alloc/caches
ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,garbage_collection_threshold:0.8
ENV CUDA_LAUNCH_BLOCKING=0 CUDA_CACHE_MAXSIZE=2147483648 CUDA_CACHE_DISABLE=0

# Hugging Face caches
ENV APP_HOME=/app
WORKDIR $APP_HOME


ENV MODELS_DIR=/app/models
RUN mkdir -p /home/user/.cache/models && ln -sf /home/user/.cache/models /app/models

# ---------------- Sistema & Python ----------------
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential gosu tree cmake git git-lfs curl wget ffmpeg ninja-build \
    python3.10 python3.10-dev python3.10-distutils python3-pip \
    && apt-get clean && rm -rf /var/lib/apt/lists/*

RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \
    ln -sf /usr/bin/python3.10 /usr/bin/python && \
    python3 -m pip install --upgrade pip


# ---------------- PyTorch cu128 (pinado) ----------------
RUN pip install --index-url https://download.pytorch.org/whl/cu128 \
    torch==2.8.0+cu128 torchvision==0.23.0+cu128 torchaudio==2.8.0+cu128

# ---------------- Toolchain, Triton, FA2 (sem bnb) ----------------
RUN pip install packaging ninja cmake pybind11 scikit-build cython hf_transfer numpy==1.24.4

# Triton 3.x (sem triton.ops)
RUN pip uninstall -y triton || true && \
    pip install -v --no-build-isolation triton==3.4.0

# FlashAttention 2.8.x
RUN pip install flash-attn==2.8.3 --no-build-isolation || \
    pip install flash-attn==2.8.2 --no-build-isolation || \
    pip install flash-attn==2.8.1 --no-build-isolation || \
    pip install flash-attn==2.8.0.post2 --no-build-isolation

# Diffusers/Transformers estáveis (sem dev)
RUN pip install --no-cache-dir diffusers==0.31.0 transformers==4.44.2 accelerate==0.34.2 omegaconf==2.3.0

# Opcional: seu fork de otimizações
RUN pip install -U git+https://github.com/carlex22/diffusers-aduc-sdr

# ---------------- Dependências da aplicação ----------------
COPY requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

RUN pip install --upgrade bitsandbytes


RUN apt-get update && apt-get install -y curl && \
    echo "Instalando dependências .whl customizadas..." && \
    pip install --no-cache-dir \
        "https://huggingface.co/euIaxs22/Aduc-sdr/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl" \
        "https://huggingface.co/euIaxs22/Aduc-sdr/resolve/main/dropout_layer_norm-0.1-cp310-cp310-linux_x86_64.whl" && \
    apt-get purge -y curl && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*

# ====================================================================


# Scripts e configs
COPY info.sh ./app/info.sh
COPY builder.sh ./app/builder.sh
COPY start.sh ./app/start.sh
COPY entrypoint.sh ./app/entrypoint.sh


COPY . .
RUN useradd -m -u 1000 -s /bin/bash appuser
RUN chown -R appuser:appuser /app && \
    chmod 0755 /app/entrypoint.sh /app/start.sh /app/info.sh /app/builder.sh

#USER appuser


# Declara volume persistente para HF Spaces
#VOLUME /data

# Env vars para caches em /data
ENV HF_HOME=/data/.cache/huggingface
ENV TORCH_HOME=/data/.cache/torch
ENV HF_DATASETS_CACHE=/data/.cache/datasets
ENV TRANSFORMERS_CACHE=/data/.cache/transformers
ENV DIFFUSERS_CACHE=/data/.cache/diffusers
ENV HF_HUB_ENABLE_HF_TRANSFER=1
ENV TOKENIZERS_PARALLELISM=false


#VOLUME ["/data/.cache/huggingface/hub"]

ENTRYPOINT ["/app/entrypoint.sh"]

USER appuser

# ---------------- Entry ----------------

CMD ["/app/start.sh"]