dinov3-viz-sat493m

Running on Zero

pszemraj commited on Oct 9

Commit

be45a3f

verified ·

1 Parent(s): 169b580

update docs

its actually called axial rope (in 2d)

Files changed (1) hide show

app.py CHANGED Viewed

@@ -67,7 +67,7 @@ def compute_dynamic_size(height, width, max_dim: int = 720, patch_size: int = 16
 def load_model(name):
-    """Load model with CORRECT dtype"""
     global processor, model
     cleanup_memory()
@@ -91,7 +91,7 @@ load_model(DEFAULT_NAME)
 def preprocess_image(img):
     """
     Custom preprocessing that respects aspect ratio & uses dynamic sizing.
-    DINOv3's 3D RoPE handles variable sizes, no need to force 224x224
     """
     # Convert to RGB if needed
     if img.mode != "RGB":
@@ -359,7 +359,7 @@ with gr.Blocks(
         Satellite-pretrained models are intended for: geographic patterns, land use classification. structural analysis, etc. Try comparing similarity maps for the same image created by the model pretrained on sat493m vs. the one on lvd1689m (<i>general web</i>).
         <br><br>
         <b>Dynamic Resolution:</b> Images are processed at up to {MAX_IMAGE_DIM}px (longer side) while preserving aspect ratio.
-        DINOv3's 3D RoPE embeddings handle variable sizes.
         <br>
     </div>
     """

 def load_model(name):
+    """Load model with dtype"""
     global processor, model
     cleanup_memory()
 def preprocess_image(img):
     """
     Custom preprocessing that respects aspect ratio & uses dynamic sizing.
+    DINOv3's 2D axial RoPE handles variable sizes, no need to force 224x224
     """
     # Convert to RGB if needed
     if img.mode != "RGB":
         Satellite-pretrained models are intended for: geographic patterns, land use classification. structural analysis, etc. Try comparing similarity maps for the same image created by the model pretrained on sat493m vs. the one on lvd1689m (<i>general web</i>).
         <br><br>
         <b>Dynamic Resolution:</b> Images are processed at up to {MAX_IMAGE_DIM}px (longer side) while preserving aspect ratio.
+        DINOv3's 2D axial RoPE embeddings handle variable sizes.
         <br>
     </div>
     """