File size: 5,210 Bytes
010a7cc
 
 
 
 
fba9280
010a7cc
 
 
 
 
 
 
fba9280
010a7cc
 
 
 
 
 
 
 
 
 
 
 
ffd9da7
010a7cc
 
 
 
 
4e2d1c4
 
 
57900d5
 
1a3da98
823756e
2878eb1
518076a
808cbb9
 
9fa8180
 
4348ba8
33d1dd8
fba9280
 
010a7cc
f9ad7ef
010a7cc
 
94810f1
 
 
 
 
 
3561d5e
94810f1
1fcb490
94810f1
1fcb490
010a7cc
 
f0ae7e5
 
 
 
010a7cc
 
e396355
010a7cc
 
e396355
4e2d1c4
 
1ec3602
 
 
31010a8
7bbd5f8
5ada94d
 
4b66bfd
80f5a1b
 
 
 
5ada94d
c07a9c9
5ada94d
c07a9c9
5478056
c07a9c9
1316ea3
c07a9c9
 
1316ea3
 
33d1dd8
 
1316ea3
 
c07a9c9
 
7602bc8
385e8d9
 
 
f979f21
385e8d9
7602bc8
d27f64f
b68293c
91583b8
d27f64f
 
ed777aa
f8d6c86
b68293c
f8d6c86
 
 
 
 
 
 
ffd9da7
 
f8d6c86
91583b8
 
 
 
20d4e13
7602bc8
91583b8
cdd33e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# =============================================================================
# ADUC-SDR Video Suite — High-Perf Diffusers for 8× L40S (SM 8.9)
# CUDA 12.8 | PyTorch 2.8.0+cu128 | Ubuntu 22.04
# =============================================================================
FROM nvidia/cuda:12.8.0-devel-ubuntu22.04

LABEL maintainer="Carlos Rodrigues dos Santos & Development Partner"
LABEL description="High-performance Diffusers stack with FA2/SDPA, 8×L40S"
LABEL version="4.4.0"
LABEL cuda_version="12.8.0"
LABEL python_version="3.10"
LABEL pytorch_version="2.8.0+cu128"
LABEL gpu_optimized_for="8x_NVIDIA_L40S"

# ---------------- Core env & caches ----------------
ENV DEBIAN_FRONTEND=noninteractive TZ=UTC LANG=C.UTF-8 LC_ALL=C.UTF-8 \
    PYTHONUNBUFFERED=1 PYTHONDONTWRITEBYTECODE=1 \
    PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1

# GPU/Compute
ENV NVIDIA_VISIBLE_DEVICES=all
ENV TORCH_CUDA_ARCH_LIST="8.9"
ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
ENV CUDA_DEVICE_MAX_CONNECTIONS=32

# Threads
ENV OMP_NUM_THREADS=8 MKL_NUM_THREADS=8 MAX_JOBS=160

# Alloc/caches
ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,garbage_collection_threshold:0.8
ENV CUDA_LAUNCH_BLOCKING=0 CUDA_CACHE_MAXSIZE=2147483648 CUDA_CACHE_DISABLE=0

# Hugging Face caches
ENV APP_HOME=/app
WORKDIR $APP_HOME


ENV MODELS_DIR=/app/models
RUN mkdir -p /data /data/.cache/huggingface /home/user/.cache/models && ln -sf /home/user/.cache/models /app/models




# ---------------- Sistema & Python ----------------
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential gosu tree cmake git git-lfs curl wget ffmpeg ninja-build \
    python3.10 python3.10-dev python3.10-distutils python3-pip  \
    && apt-get clean && rm -rf /var/lib/apt/lists/*

RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \
    ln -sf /usr/bin/python3.10 /usr/bin/python  && \
    python3 -m pip install --upgrade pip


# ---------------- PyTorch cu128 (pinado) ----------------
RUN pip install --index-url https://download.pytorch.org/whl/cu128 \
    torch==2.8.0+cu128 torchvision==0.23.0+cu128 torchaudio==2.8.0+cu128

# ---------------- Toolchain, Triton, FA2 (sem bnb) ----------------
RUN pip install packaging ninja cmake pybind11 scikit-build cython hf_transfer numpy>=1.24.4

# Triton 3.x (sem triton.ops)
RUN pip uninstall -y triton || true && \
    pip install -v --no-build-isolation triton==3.4.0

# FlashAttention 2.8.x
RUN pip install flash-attn==2.8.3 --no-build-isolation || \
    pip install flash-attn==2.8.2 --no-build-isolation || \
    pip install flash-attn==2.8.1 --no-build-isolation || \
    pip install flash-attn==2.8.0.post2 --no-build-isolation

# Diffusers/Transformers estáveis (sem dev)
#RUN pip install --no-cache-dir diffusers>=0.31.0 transformers>=4.44.2 accelerate>=0.34.2 omegaconf>=2.3.0

# Opcional: seu fork de otimizações
#RUN pip install -U git+https://github.com/carlex22/diffusers-aduc-sdr

# ---------------- Dependências da aplicação ----------------
COPY requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

RUN pip install --upgrade bitsandbytes


RUN apt-get update && apt-get install -y curl && \
    echo "Instalando dependências .whl customizadas..." && \
    pip install --no-cache-dir \
        "https://huggingface.co/euIaxs22/Aduc-sdr/resolve/main/apex-0.1-cp310-cp310-linux_x86_64.whl" \
        "https://huggingface.co/euIaxs22/Aduc-sdr/resolve/main/dropout_layer_norm-0.1-cp310-cp310-linux_x86_64.whl" && \
    apt-get purge -y curl && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*


# ====================================================================
# <<< BLOCO ATUALIZADO PARA INSTALAR LTX-VIDEO E Q8_KERNELS (com .whl) >>>
#RUN echo "Instalando q8_kernels (wheel) e LTX-Video (git)..." && \
    # 1. Instala q8_kernels a partir do seu arquivo .whl pré-compilado
    #pip install --no-cache-dir "https://huggingface.co/euIaxs22/Aduc-sdr/resolve/main/q8_kernels-0.0.5-cp310-cp310-linux_x86_64.whl" && \ 
    
    # 2. Instala a biblioteca LTX-Video a partir do fork (esta parte permanece igual)
    #git clone https://github.com/Lightricks/LTX-Video.git /data/LTX-Video && \
    #cd /data/LTX-Video && \
    #python -m venv env && \
    #source env/bin/activate  && \
    #python -m pip install -e .\[inference\] && \
    #cd ..
# ====================================================================

# Scripts e configs
COPY info.sh ./app/info.sh
COPY builder.sh ./app/builder.sh
COPY start.sh ./app/start.sh
COPY entrypoint.sh ./app/entrypoint.sh

COPY . .
RUN useradd -m -u 1000 -s /bin/bash appuser
RUN chown -R appuser:appuser /app  && \
    chmod 0755 /app/entrypoint.sh /app/start.sh /app/info.sh /app/builder.sh



# Declara volume persistente para HF Spaces
VOLUME /data

# Env vars para caches em /data
ENV HF_HOME=/data/.cache/huggingface
ENV TORCH_HOME=/data/.cache/torch
ENV HF_DATASETS_CACHE=/data/.cache/datasets
ENV TRANSFORMERS_CACHE=/data/.cache/transformers
ENV DIFFUSERS_CACHE=/data/.cache/diffusers
ENV HF_HUB_ENABLE_HF_TRANSFER=1
ENV TOKENIZERS_PARALLELISM=false


ENTRYPOINT ["/app/entrypoint.sh"]

USER appuser

# ---------------- Entry ----------------

CMD ["/app/start.sh"]