longlian
/

text-to-video-lvd-ms

Model card Files Files and versions

longlian commited on Apr 13, 2024

Commit

89ec139

·

verified ·

1 Parent(s): d0a6163

Update lvd_pipeline.py

Files changed (1) hide show

lvd_pipeline.py +2 -1

lvd_pipeline.py CHANGED Viewed

@@ -758,7 +758,8 @@ class GroundedTextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMix
                 # we represent the location information as (xmin,ymin,xmax,ymax)
                 boxes = torch.zeros(max_objs, 4, device=device,
                                     dtype=self.text_encoder.dtype)
-                boxes[:n_objs] = torch.tensor(lvd_gligen_boxes_frame)
                 text_embeddings = torch.zeros(
                     max_objs, self.unet.cross_attention_dim, device=device, dtype=self.text_encoder.dtype
                 )

                 # we represent the location information as (xmin,ymin,xmax,ymax)
                 boxes = torch.zeros(max_objs, 4, device=device,
                                     dtype=self.text_encoder.dtype)
+                if n_objs:
+                    boxes[:n_objs] = torch.tensor(lvd_gligen_boxes_frame)
                 text_embeddings = torch.zeros(
                     max_objs, self.unet.cross_attention_dim, device=device, dtype=self.text_encoder.dtype
                 )