x2XcarleX2x commited on
Commit
8902d93
·
verified ·
1 Parent(s): 35ed6ca

Update app_wan.py

Browse files
Files changed (1) hide show
  1. app_wan.py +63 -41
app_wan.py CHANGED
@@ -14,9 +14,14 @@ MAX_FRAMES_MODEL = 81
14
  MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
15
  MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
16
 
17
- # === Importa o serviço de geração (manager) ===
18
  from aduc_framework.managers.wan_manager import WanManager
 
 
 
19
  wan_manager = WanManager()
 
 
20
 
21
  # === Wrapper da UI para o Serviço ===
22
  def ui_generate_video(
@@ -33,42 +38,57 @@ def ui_generate_video(
33
  duration_seconds,
34
  steps,
35
  guidance_scale,
36
- guidance_scale_2,
37
  seed,
38
  randomize_seed,
 
39
  progress=gr.Progress(track_tqdm=True),
40
  ):
41
- def to_int_safe(v, default=0):
42
- try: return int(v)
43
- except: return default
44
- def to_float_safe(v, default=1.0):
45
- try: return float(v)
46
- except: return default
47
-
48
- # Prepara a lista de imagens de condição
49
- start_item = [start_image_pil, to_int_safe(start_frame_text, 0), 1.0]
50
- items = [start_item]
51
- if handle_image_pil is not None:
52
- items.append([handle_image_pil, to_int_safe(handle_frame_text, 17), to_float_safe(handle_peso, 1.0)])
53
- items.append([end_image_pil, to_int_safe(end_frame_text, MAX_FRAMES_MODEL - 1), to_float_safe(end_peso, 1.0)])
54
-
55
- # Chama o manager, que agora retorna 4 valores
56
- video_path, current_seed = wan_manager.generate_video_from_conditions(
57
- images_condition_items=items,
58
- prompt=prompt,
59
- negative_prompt=negative_prompt,
60
- duration_seconds=float(duration_seconds),
61
- steps=int(steps),
62
- guidance_scale=float(guidance_scale),
63
- guidance_scale_2=float(guidance_scale_2),
64
- seed=int(seed),
65
- randomize_seed=bool(randomize_seed),
66
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
-
69
- return video_path, current_seed,
70
 
71
  # === Interface Gradio ===
 
72
  css = '''
73
  .fillable{max-width: 1100px !important}
74
  .dark .progress-text {color: white}
@@ -77,50 +97,51 @@ css = '''
77
 
78
  with gr.Blocks(theme=gr.themes.Glass(), css=css) as app:
79
  gr.Markdown("# Wan 2.2 Aduca-SDR")
 
 
80
 
81
  with gr.Row(elem_id="general_items"):
82
  with gr.Column(scale=2):
83
  with gr.Group():
84
  with gr.Row():
85
- # Coluna: Start
86
  with gr.Column():
87
  start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
88
  start_frame_tb = gr.Textbox(label="Start Frame Index", value="0", interactive=False)
89
 
90
- # Coluna: Handle (opcional)
91
  with gr.Column():
92
- handle_image = gr.Image(type="pil", label="Handle Image", sources=["upload", "clipboard"])
93
  handle_frame_tb = gr.Textbox(label="Handle Frame Index", value="17")
94
  handle_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Handle Weight")
95
 
96
- # Coluna: End
97
  with gr.Column():
98
- end_image = gr.Image(type="pil", label="End Frame", sources=["upload", "clipboard"])
99
  end_frame_tb = gr.Textbox(label="End Frame Index", value=str(MAX_FRAMES_MODEL - 1), interactive=False)
100
  end_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="End Weight")
101
 
102
  prompt = gr.Textbox(
103
  label="Prompt",
104
- info="Descreva a transição e a cena. Ex: 'a beautiful woman walking on the beach, cinematic'."
105
  )
106
 
 
 
107
  with gr.Accordion("Advanced Settings", open=False):
108
  duration_seconds_input = gr.Slider(
109
  minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.2,
110
- label="Video Duration (seconds)",
111
  info=f"Será ajustado para o formato 4n+1. Mín: {MIN_FRAMES_MODEL} frames, Máx: {MAX_FRAMES_MODEL} frames."
112
  )
113
  negative_prompt_input = gr.Textbox(
114
  label="Negative Prompt",
115
- value=wan_manager.default_negative_prompt,
116
  lines=3
117
  )
118
- steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=8, label="Inference Steps")
119
  guidance_scale_input = gr.Slider(
120
- minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale (High Noise)"
121
  )
122
  guidance_scale_2_input = gr.Slider(
123
- minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale (Low Noise)"
124
  )
125
  with gr.Row():
126
  seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
@@ -138,6 +159,7 @@ with gr.Blocks(theme=gr.themes.Glass(), css=css) as app:
138
  prompt, negative_prompt_input, duration_seconds_input,
139
  steps_slider, guidance_scale_input, guidance_scale_2_input,
140
  seed_input, randomize_seed_checkbox,
 
141
  ]
142
  ui_outputs = [output_video, seed_input]
143
 
 
14
  MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
15
  MAX_DURATION = round(MAX_FRAMES_MODEL / FIXED_FPS, 1)
16
 
17
+ # === Importa os serviços de geração (managers) ===
18
  from aduc_framework.managers.wan_manager import WanManager
19
+ from aduc_framework.managers.wan_manager_s2v import WanManagerS2V
20
+
21
+ print("Initializing managers...")
22
  wan_manager = WanManager()
23
+ wan_manager_s2v = WanManagerS2V()
24
+ print("Managers initialized.")
25
 
26
  # === Wrapper da UI para o Serviço ===
27
  def ui_generate_video(
 
38
  duration_seconds,
39
  steps,
40
  guidance_scale,
41
+ guidance_scale_2, # Usado apenas no I2V
42
  seed,
43
  randomize_seed,
44
+ audio_path,
45
  progress=gr.Progress(track_tqdm=True),
46
  ):
47
+ # <<< LÓGICA DE DIRECIONAMENTO >>>
48
+ if audio_path and os.path.exists(audio_path):
49
+ print("Audio file provided. Redirecting to Speech-to-Video (S2V) manager.")
50
+ video_path, current_seed = wan_manager_s2v.generate_video(
51
+ start_image=start_image_pil,
52
+ audio_path=audio_path,
53
+ prompt=prompt,
54
+ negative_prompt=negative_prompt,
55
+ steps=int(steps),
56
+ guidance_scale=float(guidance_scale), # S2V usa apenas um guidance_scale
57
+ seed=int(seed),
58
+ randomize_seed=bool(randomize_seed),
59
+ )
60
+ else:
61
+ print("No audio file provided. Using Image-to-Video (I2V) interpolation manager.")
62
+ def to_int_safe(v, default=0):
63
+ try: return int(v)
64
+ except: return default
65
+ def to_float_safe(v, default=1.0):
66
+ try: return float(v)
67
+ except: return default
68
+
69
+ # Prepara a lista de imagens de condição para o I2V
70
+ start_item = [start_image_pil, to_int_safe(start_frame_text, 0), 1.0]
71
+ items = [start_item]
72
+ if handle_image_pil is not None:
73
+ items.append([handle_image_pil, to_int_safe(handle_frame_text, 17), to_float_safe(handle_peso, 1.0)])
74
+ items.append([end_image_pil, to_int_safe(end_frame_text, MAX_FRAMES_MODEL - 1), to_float_safe(end_peso, 1.0)])
75
+
76
+ video_path, current_seed = wan_manager.generate_video_from_conditions(
77
+ images_condition_items=items,
78
+ prompt=prompt,
79
+ negative_prompt=negative_prompt,
80
+ duration_seconds=float(duration_seconds),
81
+ steps=int(steps),
82
+ guidance_scale=float(guidance_scale),
83
+ guidance_scale_2=float(guidance_scale_2),
84
+ seed=int(seed),
85
+ randomize_seed=bool(randomize_seed),
86
+ )
87
 
88
+ return video_path, current_seed
 
89
 
90
  # === Interface Gradio ===
91
+ # ... (o restante da UI permanece o mesmo, pois os inputs já estão lá)
92
  css = '''
93
  .fillable{max-width: 1100px !important}
94
  .dark .progress-text {color: white}
 
97
 
98
  with gr.Blocks(theme=gr.themes.Glass(), css=css) as app:
99
  gr.Markdown("# Wan 2.2 Aduca-SDR")
100
+ gr.Markdown("Forneça um arquivo de áudio para usar o modo **Speech-to-Video**. Deixe em branco para usar o modo **Image-to-Video** (interpolação).")
101
+
102
 
103
  with gr.Row(elem_id="general_items"):
104
  with gr.Column(scale=2):
105
  with gr.Group():
106
  with gr.Row():
 
107
  with gr.Column():
108
  start_image = gr.Image(type="pil", label="Start Frame", sources=["upload", "clipboard"])
109
  start_frame_tb = gr.Textbox(label="Start Frame Index", value="0", interactive=False)
110
 
 
111
  with gr.Column():
112
+ handle_image = gr.Image(type="pil", label="Handle Image (I2V only)", sources=["upload", "clipboard"])
113
  handle_frame_tb = gr.Textbox(label="Handle Frame Index", value="17")
114
  handle_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Handle Weight")
115
 
 
116
  with gr.Column():
117
+ end_image = gr.Image(type="pil", label="End Frame (I2V only)", sources=["upload", "clipboard"])
118
  end_frame_tb = gr.Textbox(label="End Frame Index", value=str(MAX_FRAMES_MODEL - 1), interactive=False)
119
  end_peso_sl = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="End Weight")
120
 
121
  prompt = gr.Textbox(
122
  label="Prompt",
123
+ info="Descreva a cena ou a ação. Ex: 'a beautiful woman singing a song'."
124
  )
125
 
126
+ audio_input = gr.Audio(type="filepath", label="Audio (Optional, for S2V mode)")
127
+
128
  with gr.Accordion("Advanced Settings", open=False):
129
  duration_seconds_input = gr.Slider(
130
  minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.2,
131
+ label="Video Duration (I2V only)",
132
  info=f"Será ajustado para o formato 4n+1. Mín: {MIN_FRAMES_MODEL} frames, Máx: {MAX_FRAMES_MODEL} frames."
133
  )
134
  negative_prompt_input = gr.Textbox(
135
  label="Negative Prompt",
136
+ value=wan_manager.default_negative_prompt, # Pode usar o mesmo default
137
  lines=3
138
  )
139
+ steps_slider = gr.Slider(minimum=1, maximum=40, step=1, value=20, label="Inference Steps")
140
  guidance_scale_input = gr.Slider(
141
+ minimum=0.0, maximum=10.0, step=0.5, value=4.5, label="Guidance Scale"
142
  )
143
  guidance_scale_2_input = gr.Slider(
144
+ minimum=0.0, maximum=10.0, step=0.5, value=1.0, label="Guidance Scale (Low Noise, I2V only)"
145
  )
146
  with gr.Row():
147
  seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
 
159
  prompt, negative_prompt_input, duration_seconds_input,
160
  steps_slider, guidance_scale_input, guidance_scale_2_input,
161
  seed_input, randomize_seed_checkbox,
162
+ audio_input,
163
  ]
164
  ui_outputs = [output_video, seed_input]
165