openaudio-s1-mini

Paused

ford442 commited on 17 days ago

Commit

7db287a

verified ·

1 Parent(s): 1988507

Update fish_speech/models/text2semantic/inference.py

Files changed (1) hide show

fish_speech/models/text2semantic/inference.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import queue
 import threading
@@ -135,7 +136,8 @@ def decode_one_token_ar(
             layer.attention.kv_cache.k_cache.fill_(0)
             layer.attention.kv_cache.v_cache.fill_(0)
-    input_pos = torch.tensor([0], dtype=torch.long).to(hidden_states.device)
     model.forward_generate_fast(hidden_states, input_pos)
     a = codebooks[0] - model.tokenizer.semantic_begin_id
     a[a < 0] = 0
@@ -143,9 +145,8 @@ def decode_one_token_ar(
     codebooks.append(a)
     for codebook_idx in range(1, model.config.num_codebooks):
-        input_pos = torch.tensor(
-            [codebook_idx], dtype=torch.long
-        ).to(hidden_states.device)
         logits = model.forward_generate_fast(hidden_states, input_pos)
         short_logits = logits[:, :, :1024]
@@ -704,3 +705,4 @@ def main(
 if __name__ == "__main__":
     main()

 import os
 import queue
 import threading
             layer.attention.kv_cache.k_cache.fill_(0)
             layer.attention.kv_cache.v_cache.fill_(0)
+    # FIX: Use new_zeros to avoid torch.compile issues with device argument
+    input_pos = hidden_states.new_zeros((1,), dtype=torch.long)
     model.forward_generate_fast(hidden_states, input_pos)
     a = codebooks[0] - model.tokenizer.semantic_begin_id
     a[a < 0] = 0
     codebooks.append(a)
     for codebook_idx in range(1, model.config.num_codebooks):
+        # FIX: Use new_full to avoid torch.compile issues with device argument
+        input_pos = hidden_states.new_full((1,), codebook_idx, dtype=torch.long)
         logits = model.forward_generate_fast(hidden_states, input_pos)
         short_logits = logits[:, :, :1024]
 if __name__ == "__main__":
     main()