Spaces:
Paused
Paused
| # video_service.py | |
| import torch | |
| import numpy as np | |
| import random | |
| import os | |
| import yaml | |
| from pathlib import Path | |
| import imageio | |
| import tempfile | |
| import sys | |
| import subprocess | |
| import threading | |
| import time | |
| from huggingface_hub import hf_hub_download | |
| # --- LÓGICA DE SETUP E DEPENDÊNCIAS --- | |
| def run_setup(): | |
| setup_script_path = "setup.py" | |
| if not os.path.exists(setup_script_path): | |
| print("AVISO: script 'setup.py' não encontrado. Pulando a clonagem de dependências.") | |
| return | |
| try: | |
| print("--- Executando setup.py para garantir que as dependências estão presentes ---") | |
| subprocess.run([sys.executable, setup_script_path], check=True) | |
| print("--- Setup concluído com sucesso ---") | |
| except subprocess.CalledProcessError as e: | |
| print(f"ERRO CRÍTICO DURANTE O SETUP: 'setup.py' falhou com código {e.returncode}.") | |
| sys.exit(1) | |
| DEPS_DIR = Path("/data") | |
| LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video" | |
| if not LTX_VIDEO_REPO_DIR.exists(): | |
| run_setup() | |
| def add_deps_to_path(): | |
| if not LTX_VIDEO_REPO_DIR.exists(): | |
| raise FileNotFoundError(f"Repositório LTX-Video não encontrado em '{LTX_VIDEO_REPO_DIR}'. Execute o setup.") | |
| if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path: | |
| sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve())) | |
| #add_deps_to_path() | |
| # Importações específicas do modelo | |
| from inference import ( | |
| create_ltx_video_pipeline, create_latent_upsampler, | |
| load_image_to_tensor_with_resize_and_crop, seed_everething, | |
| calculate_padding, load_media_file | |
| ) | |
| from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem | |
| from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy | |
| # --- CONFIGURAÇÃO DA DISTRIBUIÇÃO DE GPUS --- | |
| GPU_MAPPING = [ | |
| {'base': 'cuda:0', 'upscaler': 'cuda:2'}, | |
| {'base': 'cuda:1', 'upscaler': 'cuda:3'} | |
| ] | |
| class VideoService: | |
| def __init__(self): | |
| print("Inicializando VideoService (modo Lazy Loading)...") | |
| self.models_loaded = False | |
| self.workers = None | |
| self.config = self._load_config() | |
| self.models_dir = "downloaded_models" | |
| self.loading_lock = threading.Lock() # Para evitar que múltiplos usuários iniciem o carregamento ao mesmo tempo | |
| def _ensure_models_are_loaded(self): | |
| """Verifica se os modelos estão carregados e os carrega se não estiverem.""" | |
| with self.loading_lock: | |
| if not self.models_loaded: | |
| print("Primeira requisição recebida. Iniciando carregamento dos modelos...") | |
| if torch.cuda.is_available() and torch.cuda.device_count() < 4: | |
| raise RuntimeError(f"Este serviço está configurado para 4 GPUs, mas apenas {torch.cuda.device_count()} foram encontradas.") | |
| self._download_model_files() | |
| self.workers = self._initialize_workers() | |
| self.models_loaded = True | |
| print(f"Modelos carregados com sucesso. {len(self.workers)} workers prontos.") | |
| def _load_config(self): | |
| config_file_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled.yaml" | |
| with open(config_file_path, "r") as file: | |
| return yaml.safe_load(file) | |
| def _download_model_files(self): | |
| Path(self.models_dir).mkdir(parents=True, exist_ok=True) | |
| LTX_REPO = "Lightricks/LTX-Video" | |
| print("Baixando arquivos de modelo (se necessário)...") | |
| self.distilled_model_path = hf_hub_download(repo_id=LTX_REPO, filename=self.config["checkpoint_path"], local_dir=self.models_dir) | |
| self.spatial_upscaler_path = hf_hub_download(repo_id=LTX_REPO, filename=self.config["spatial_upscaler_model_path"], local_dir=self.models_dir) | |
| print("Download de modelos concluído.") | |
| def _load_models_for_worker(self, base_device, upscaler_device): | |
| print(f"Carregando modelo base para {base_device} e upscaler para {upscaler_device}") | |
| pipeline = create_ltx_video_pipeline( | |
| ckpt_path=self.distilled_model_path, precision=self.config["precision"], | |
| text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"], | |
| sampler=self.config["sampler"], device="cpu", enhance_prompt=False, | |
| prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"], | |
| prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"], | |
| ) | |
| latent_upsampler = create_latent_upsampler(self.spatial_upscaler_path, device="cpu") | |
| pipeline.to(base_device) | |
| latent_upsampler.to(upscaler_device) | |
| return pipeline, latent_upsampler | |
| def _initialize_workers(self): | |
| workers = [] | |
| for i, mapping in enumerate(GPU_MAPPING): | |
| print(f"--- Inicializando Worker {i} ---") | |
| pipeline, latent_upsampler = self._load_models_for_worker(mapping['base'], mapping['upscaler']) | |
| workers.append({"id": i, "base_pipeline": pipeline, "latent_upsampler": latent_upsampler, "devices": mapping, "lock": threading.Lock()}) | |
| return workers | |
| def _acquire_worker(self): | |
| while True: | |
| for worker in self.workers: | |
| if worker["lock"].acquire(blocking=False): | |
| print(f"Worker {worker['id']} adquirido para uma nova tarefa.") | |
| return worker | |
| time.sleep(0.1) | |
| def generate(self, prompt, negative_prompt, input_image_filepath=None, input_video_filepath=None, height=512, width=704, mode="text-to-video", duration=2.0, frames_to_use=9, seed=42, randomize_seed=True, guidance_scale=1.0, # Agora usado corretamente | |
| improve_texture=True, progress_callback=None): | |
| self._ensure_models_are_loaded() | |
| worker = self._acquire_worker() | |
| base_device = worker['devices']['base'] | |
| upscaler_device = worker['devices']['upscaler'] | |
| try: | |
| # Validações alinhadas com app-20.py | |
| if mode == "image-to-video" and not input_image_filepath: | |
| raise ValueError("Caminho da imagem obrigatório para o modo image-to-video") | |
| if mode == "video-to-video" and not input_video_filepath: | |
| raise ValueError("Caminho do vídeo obrigatório para o modo video-to-video") | |
| used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed) | |
| seed_everething(used_seed) | |
| FPS = 30.0 # Alinhado com app-20.py | |
| MAX_NUM_FRAMES = 257 | |
| target_frames_ideal = duration * FPS | |
| target_frames_rounded = round(target_frames_ideal) | |
| if target_frames_rounded < 1: target_frames_rounded = 1 | |
| n_val = round(float(target_frames_rounded - 1.0) / 8.0) | |
| actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1))) | |
| actual_height = int(height) | |
| actual_width = int(width) | |
| height_padded = (actual_height - 1) // 32 * 32 + 32 | |
| width_padded = (actual_width - 1) // 32 * 32 + 32 | |
| num_frames_padded = (actual_num_frames - 2) // 8 * 8 + 1 # Alinhamento exato com app-20.py | |
| if num_frames_padded != actual_num_frames: | |
| print(f"Warning: actual_num_frames {actual_num_frames} and num_frames_padded {num_frames_padded} differ. Using num_frames_padded for pipeline.") | |
| padding_values = calculate_padding(actual_height, actual_width, height_padded, width_padded) | |
| pad_left, pad_right, pad_top, pad_bottom = padding_values | |
| # Kwargs base alinhados | |
| call_kwargs = { | |
| "prompt": prompt, | |
| "negative_prompt": negative_prompt, | |
| "height": height_padded, | |
| "width": width_padded, | |
| "num_frames": num_frames_padded, | |
| "framerate": int(FPS), | |
| "generator": torch.Generator(device=base_device).manual_seed(used_seed), | |
| "output_type": "pt", | |
| "conditioning_items": None, | |
| "media_items": None, | |
| "decode_timestep": self.config['decode_timestep'], | |
| "decode_noise_scale": self.config['decode_noise_scale'], | |
| "stochastic_sampling": self.config['stochastic_sampling'], | |
| "image_cond_noise_scale": 0.15, # Alinhado | |
| "is_video": True, | |
| "vae_per_channel_normalize": True, | |
| "mixed_precision": self.config['precision'] + " mixed_precision", | |
| "offload_to_cpu": False, | |
| "enhance_prompt": False, | |
| } | |
| # Estratégia de skip layer alinhada | |
| stg_mode_str = self.config.get('stg_mode', 'attention_values') | |
| if stg_mode_str.lower() in ['stgav', 'attentionvalues']: | |
| call_kwargs['skip_layer_strategy'] = SkipLayerStrategy.AttentionValues | |
| # ... (adicionar outros elif como no app-20.py) | |
| # Conditioning para modos | |
| if mode == "image-to-video" and input_image_filepath: | |
| media_tensor = load_image_to_tensor_with_resize_and_crop(input_image_filepath, actual_height, actual_width) | |
| media_tensor = torch.nn.functional.pad(media_tensor, padding_values) | |
| call_kwargs['conditioning_items'] = ConditioningItem(media_tensor.to(base_device), 0, 1.0) | |
| elif mode == "video-to-video" and input_video_filepath: | |
| call_kwargs['media_items'] = load_media_file(media_path=input_video_filepath, height=actual_height, width=actual_width, max_frames=int(frames_to_use), padding=padding_values).to(base_device) | |
| result_images_tensor = None | |
| if improve_texture: | |
| # Alinhamento exato: Use LTXMultiScalePipeline como no app-20.py | |
| active_latent_upsampler = worker['latent_upsampler'] | |
| if not active_latent_upsampler: | |
| raise ValueError("Spatial upscaler model not loaded or improve_texture not selected, cannot use multi-scale.") | |
| multi_scale_pipeline_obj = LTXMultiScalePipeline(worker['base_pipeline'], active_latent_upsampler) | |
| first_pass_args = self.config.get('first_pass', {}).copy() | |
| first_pass_args['guidance_scale'] = float(guidance_scale) # Override UI | |
| first_pass_args.pop('num_inference_steps', None) | |
| second_pass_args = self.config.get('second_pass', {}).copy() | |
| second_pass_args['guidance_scale'] = float(guidance_scale) # Override UI | |
| second_pass_args.pop('num_inference_steps', None) | |
| multi_scale_call_kwargs = call_kwargs.copy() | |
| multi_scale_call_kwargs.update({ | |
| "downscale_factor": self.config['downscale_factor'], | |
| "first_pass": first_pass_args, | |
| "second_pass": second_pass_args, | |
| }) | |
| print(f"Calling multi-scale pipeline eff. HxW {actual_height}x{actual_width}, Frames {actual_num_frames} - Padded {num_frames_padded} on {base_device}") | |
| result_images_tensor = multi_scale_pipeline_obj(**multi_scale_call_kwargs).images | |
| else: | |
| # Single-pass alinhado | |
| single_pass_call_kwargs = call_kwargs.copy() | |
| first_pass_config_from_yaml = self.config.get('first_pass', {}) | |
| single_pass_call_kwargs['timesteps'] = first_pass_config_from_yaml.get('timesteps') | |
| single_pass_call_kwargs['guidance_scale'] = float(guidance_scale) # Override UI | |
| single_pass_call_kwargs['stg_scale'] = first_pass_config_from_yaml.get('stg_scale') | |
| single_pass_call_kwargs['rescaling_scale'] = first_pass_config_from_yaml.get('rescaling_scale') | |
| single_pass_call_kwargs['skip_block_list'] = first_pass_config_from_yaml.get('skip_block_list') | |
| single_pass_call_kwargs.pop('num_inference_steps', None) | |
| single_pass_call_kwargs.pop('first_pass', None) | |
| single_pass_call_kwargs.pop('second_pass', None) | |
| single_pass_call_kwargs.pop('downscale_factor', None) | |
| print(f"Calling base pipeline padded HxW {height_padded}x{width_padded}, Frames {actual_num_frames} - Padded {num_frames_padded} on {base_device}") | |
| result_images_tensor = worker['base_pipeline'](**single_pass_call_kwargs).images | |
| if result_images_tensor is None: | |
| raise ValueError("Generation failed.") | |
| # Slicing e salvamento alinhados | |
| slice_h_end = -pad_bottom if pad_bottom > 0 else None | |
| slice_w_end = -pad_right if pad_right > 0 else None | |
| result_images_tensor = result_images_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end] | |
| video_np = result_images_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy() | |
| video_np = np.clip(video_np, 0, 1) * 255.0 | |
| video_np = video_np.astype(np.uint8) | |
| temp_dir = tempfile.mkdtemp() | |
| output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4") | |
| try: | |
| with imageio.get_writer(output_video_path, fps=call_kwargs['framerate'], macro_block_size=1) as video_writer: | |
| for frame_idx in range(video_np.shape[0]): | |
| if progress_callback: | |
| progress_callback(frame_idx / video_np.shape[0], desc="Saving video") | |
| video_writer.append_data(video_np[frame_idx]) | |
| except Exception as e: | |
| print(f"Error saving video with macro_block_size=1: {e}") | |
| with imageio.get_writer(output_video_path, fps=call_kwargs['framerate'], format='FFMPEG', codec='libx264', quality=8) as video_writer: | |
| for frame_idx in range(video_np.shape[0]): | |
| if progress_callback: | |
| progress_callback(frame_idx / video_np.shape[0], desc="Saving video fallback ffmpeg") | |
| video_writer.append_data(video_np[frame_idx]) | |
| return output_video_path, used_seed | |
| except Exception as e: | |
| print(f"!!!!!!!! ERRO no Worker {worker['id']}: {e} !!!!!!!!") | |
| raise e | |
| finally: | |
| print(f"Worker {worker['id']} Tarefa finalizada. Limpando cache e liberando worker...") | |
| with torch.cuda.device(base_device): | |
| torch.cuda.empty_cache() | |
| with torch.cuda.device(upscaler_device): | |
| torch.cuda.empty_cache() | |
| worker['lock'].release() | |
| # A instância do serviço é criada aqui, mas os modelos só serão carregados no primeiro clique. | |
| video_generation_service = VideoService() | |