Aduc-sdr-2_5s

Paused

App Files Files Community

x2XcarleX2x commited on Sep 26

Commit

8902d93

verified ·

1 Parent(s): 35ed6ca

Update app_wan.py

Browse files

Files changed (1) hide show

app_wan.py +63 -41

app_wan.py CHANGED Viewed

@@ -14,9 +14,14 @@ MAX_FRAMES_MODEL = 81
 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
-# === Importa o serviço de geração (manager) ===
 from aduc_framework.managers.wan_manager import WanManager
 wan_manager = WanManager()
 # === Wrapper da UI para o Serviço ===
 def ui_generate_video(
@@ -33,42 +38,57 @@ def ui_generate_video(
     duration_seconds,
     steps,
     guidance_scale,
-    guidance_scale_2,
     seed,
     randomize_seed,
     progress=gr.Progress(track_tqdm=True),
 ):
-    def to_int_safe(v, default=0):
-        try: return int(v)
-        except: return default
-    def to_float_safe(v, default=1.0):
-        try: return float(v)
-        except: return default
-    # Prepara a lista de imagens de condição
-    start_item = [start_image_pil, to_int_safe(start_frame_text, 0), 1.0]
-    items = [start_item]
-    if handle_image_pil is not None:
-        items.append([handle_image_pil, to_int_safe(handle_frame_text, 17), to_float_safe(handle_peso, 1.0)])
-    items.append([end_image_pil, to_int_safe(end_frame_text, MAX_FRAMES_MODEL - 1), to_float_safe(end_peso, 1.0)])
-    # Chama o manager, que agora retorna 4 valores
-    video_path, current_seed = wan_manager.generate_video_from_conditions(
-        images_condition_items=items,
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        duration_seconds=float(duration_seconds),
-        steps=int(steps),
-        guidance_scale=float(guidance_scale),
-        guidance_scale_2=float(guidance_scale_2),
-        seed=int(seed),
-        randomize_seed=bool(randomize_seed),
-    )
-    return video_path, current_seed,
 # === Interface Gradio ===
 css = '''
 .fillable{max-width: 1100px !important}
 .dark .progress-text {color: white}
@@ -77,50 +97,51 @@ css = '''
 with gr.Blocks(theme=gr.themes.Glass(), css=css) as app:
     gr.Markdown("# Wan 2.2 Aduca-SDR")
     with gr.Row(elem_id="general_items"):
         with gr.Column(scale=2):
             with gr.Group():
                 with gr.Row():
-                    # Coluna: Start
                     with gr.Column():
                         start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
                         start_frame_tb = gr.Textbox(label="Start Frame Index", value="0", interactive=False)
-                    # Coluna: Handle (opcional)
                     with gr.Column():
-                        handle_image = gr.Image(type="pil", label="Handle Image", sources=["upload", "clipboard"])
                         handle_frame_tb = gr.Textbox(label="Handle Frame Index", value="17")
                         handle_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Handle Weight")
-                    # Coluna: End
                     with gr.Column():
-                        end_image = gr.Image(type="pil", label="End Frame", sources=["upload", "clipboard"])
                         end_frame_tb = gr.Textbox(label="End Frame Index", value=str(MAX_FRAMES_MODEL - 1), interactive=False)
                         end_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="End Weight")
                 prompt = gr.Textbox(
                     label="Prompt",
-                    info="Descreva a transição e a cena. Ex: 'a beautiful woman walking on the beach, cinematic'."
                 )
                 with gr.Accordion("Advanced Settings", open=False):
                     duration_seconds_input = gr.Slider(
                         minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.2,
-                        label="Video Duration (seconds)",
                         info=f"Será ajustado para o formato 4n+1. Mín: {MIN_FRAMES_MODEL} frames, Máx: {MAX_FRAMES_MODEL} frames."
                     )
                     negative_prompt_input = gr.Textbox(
                         label="Negative Prompt",
-                        value=wan_manager.default_negative_prompt,
                         lines=3
                     )
-                    steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=8, label="Inference Steps")
                     guidance_scale_input = gr.Slider(
-                        minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale (High Noise)"
                     )
                     guidance_scale_2_input = gr.Slider(
-                        minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale (Low Noise)"
                     )
                     with gr.Row():
                         seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
@@ -138,6 +159,7 @@ with gr.Blocks(theme=gr.themes.Glass(), css=css) as app:
         prompt, negative_prompt_input, duration_seconds_input,
         steps_slider, guidance_scale_input, guidance_scale_2_input,
         seed_input, randomize_seed_checkbox,
     ]
     ui_outputs = [output_video, seed_input]

 MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
 MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
+# === Importa os serviços de geração (managers) ===
 from aduc_framework.managers.wan_manager import WanManager
+from aduc_framework.managers.wan_manager_s2v import WanManagerS2V
+print("Initializing managers...")
 wan_manager = WanManager()
+wan_manager_s2v = WanManagerS2V()
+print("Managers initialized.")
 # === Wrapper da UI para o Serviço ===
 def ui_generate_video(
     duration_seconds,
     steps,
     guidance_scale,
+    guidance_scale_2, # Usado apenas no I2V
     seed,
     randomize_seed,
+    audio_path,
     progress=gr.Progress(track_tqdm=True),
 ):
+    # <<< LÓGICA DE DIRECIONAMENTO >>>
+    if audio_path and os.path.exists(audio_path):
+        print("Audio file provided. Redirecting to Speech-to-Video (S2V) manager.")
+        video_path, current_seed = wan_manager_s2v.generate_video(
+            start_image=start_image_pil,
+            audio_path=audio_path,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            steps=int(steps),
+            guidance_scale=float(guidance_scale), # S2V usa apenas um guidance_scale
+            seed=int(seed),
+            randomize_seed=bool(randomize_seed),
+        )
+    else:
+        print("No audio file provided. Using Image-to-Video (I2V) interpolation manager.")
+        def to_int_safe(v, default=0):
+            try: return int(v)
+            except: return default
+        def to_float_safe(v, default=1.0):
+            try: return float(v)
+            except: return default
+        # Prepara a lista de imagens de condição para o I2V
+        start_item = [start_image_pil, to_int_safe(start_frame_text, 0), 1.0]
+        items = [start_item]
+        if handle_image_pil is not None:
+            items.append([handle_image_pil, to_int_safe(handle_frame_text, 17), to_float_safe(handle_peso, 1.0)])
+        items.append([end_image_pil, to_int_safe(end_frame_text, MAX_FRAMES_MODEL - 1), to_float_safe(end_peso, 1.0)])
+        video_path, current_seed = wan_manager.generate_video_from_conditions(
+            images_condition_items=items,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            duration_seconds=float(duration_seconds),
+            steps=int(steps),
+            guidance_scale=float(guidance_scale),
+            guidance_scale_2=float(guidance_scale_2),
+            seed=int(seed),
+            randomize_seed=bool(randomize_seed),
+        )
+    return video_path, current_seed
 # === Interface Gradio ===
+# ... (o restante da UI permanece o mesmo, pois os inputs já estão lá)
 css = '''
 .fillable{max-width: 1100px !important}
 .dark .progress-text {color: white}
 with gr.Blocks(theme=gr.themes.Glass(), css=css) as app:
     gr.Markdown("# Wan 2.2 Aduca-SDR")
+    gr.Markdown("Forneça um arquivo de áudio para usar o modo **Speech-to-Video**. Deixe em branco para usar o modo **Image-to-Video** (interpolação).")
     with gr.Row(elem_id="general_items"):
         with gr.Column(scale=2):
             with gr.Group():
                 with gr.Row():
                     with gr.Column():
                         start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
                         start_frame_tb = gr.Textbox(label="Start Frame Index", value="0", interactive=False)
                     with gr.Column():
+                        handle_image = gr.Image(type="pil", label="Handle Image (I2V only)", sources=["upload", "clipboard"])
                         handle_frame_tb = gr.Textbox(label="Handle Frame Index", value="17")
                         handle_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Handle Weight")
                     with gr.Column():
+                        end_image = gr.Image(type="pil", label="End Frame (I2V only)", sources=["upload", "clipboard"])
                         end_frame_tb = gr.Textbox(label="End Frame Index", value=str(MAX_FRAMES_MODEL - 1), interactive=False)
                         end_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="End Weight")
                 prompt = gr.Textbox(
                     label="Prompt",
+                    info="Descreva a cena ou a ação. Ex: 'a beautiful woman singing a song'."
                 )
+                audio_input = gr.Audio(type="filepath", label="Audio (Optional, for S2V mode)")
                 with gr.Accordion("Advanced Settings", open=False):
                     duration_seconds_input = gr.Slider(
                         minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.2,
+                        label="Video Duration (I2V only)",
                         info=f"Será ajustado para o formato 4n+1. Mín: {MIN_FRAMES_MODEL} frames, Máx: {MAX_FRAMES_MODEL} frames."
                     )
                     negative_prompt_input = gr.Textbox(
                         label="Negative Prompt",
+                        value=wan_manager.default_negative_prompt, # Pode usar o mesmo default
                         lines=3
                     )
+                    steps_slider = gr.Slider(minimum=1, maximum=40, step=1, value=20, label="Inference Steps")
                     guidance_scale_input = gr.Slider(
+                        minimum=0.0, maximum=10.0, step=0.5, value=4.5, label="Guidance Scale"
                     )
                     guidance_scale_2_input = gr.Slider(
+                        minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale (Low Noise, I2V only)"
                     )
                     with gr.Row():
                         seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
         prompt, negative_prompt_input, duration_seconds_input,
         steps_slider, guidance_scale_input, guidance_scale_2_input,
         seed_input, randomize_seed_checkbox,
+        audio_input,
     ]
     ui_outputs = [output_video, seed_input]