fixing bindings

Files changed (5) hide show

activation/activation_kernels.cu CHANGED Viewed

@@ -226,16 +226,15 @@ void gelu_quick(torch::Tensor& out,    // [..., d]
 }
 void gelu(torch::Tensor& out,    // [..., d]
-          torch::Tensor& input,
-          std::string approximation)  // [..., d]
 {
-  if (approximation == "none") {
-    LAUNCH_ACTIVATION_KERNEL(vllm::gelu_kernel);
-  } else if (approximation == "tanh") {
-    LAUNCH_ACTIVATION_KERNEL(vllm::gelu_tanh_kernel);
-  } else {
-    throw std::invalid_argument("Invalid approximation");
-  }
 }
 void silu(torch::Tensor& out,    // [..., d]

 }
 void gelu(torch::Tensor& out,    // [..., d]
+          torch::Tensor& input)  // [..., d]
 {
+  LAUNCH_ACTIVATION_KERNEL(vllm::gelu_kernel);
+}
+void gelu_tanh(torch::Tensor& out,    // [..., d]
+               torch::Tensor& input)  // [..., d]
+{
+  LAUNCH_ACTIVATION_KERNEL(vllm::gelu_tanh_kernel);
 }
 void silu(torch::Tensor& out,    // [..., d]

torch-ext/activation/__init__.py CHANGED Viewed

@@ -30,8 +30,8 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
-def gelu(out: torch.Tensor, x: torch.Tensor, approximation: str = "none") -> None:
-    ops.gelu(out, x, approximation)
     return out
 def silu(out: torch.Tensor, x: torch.Tensor) -> None:
@@ -39,6 +39,11 @@ def silu(out: torch.Tensor, x: torch.Tensor) -> None:
     return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -56,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 __all__ = [
     "silu_and_mul",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
     "layers",
 ]

     return out
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
     return out
 def silu(out: torch.Tensor, x: torch.Tensor) -> None:
     return out
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]

torch-ext/activation/layers.py CHANGED Viewed

@@ -52,11 +52,29 @@ class Gelu(nn.Module):
     can_torch_compile: bool = True
-    def forward(self, x: torch.Tensor, approximation: str = "none"):
         out = torch.empty_like(x)
-        ops.gelu(out, x, approximation)
         return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

     can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
         out = torch.empty_like(x)
+        ops.gelu(out, x)
         return out
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+    The function computes x -> gelu_tanh(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

torch-ext/torch_binding.cpp CHANGED Viewed

@@ -35,6 +35,18 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
   // Quick GELU implementation.
   ops.def("gelu_quick(Tensor! out, Tensor input) -> ()");
   ops.impl("gelu_quick", torch::kCUDA, &gelu_quick);
 }
 REGISTER_EXTENSION(TORCH_EXTENSION_NAME)

   // Quick GELU implementation.
   ops.def("gelu_quick(Tensor! out, Tensor input) -> ()");
   ops.impl("gelu_quick", torch::kCUDA, &gelu_quick);
+  // GELU with `tanh` approximation.
+  ops.def("gelu_tanh(Tensor! out, Tensor input) -> ()");
+  ops.impl("gelu_tanh", torch::kCUDA, &gelu_tanh);
+  // SiLU implementation.
+  ops.def("silu(Tensor! out, Tensor input) -> ()");
+  ops.impl("silu", torch::kCUDA, &silu);
+  // GELU with none approximation.
+  ops.def("gelu(Tensor! out, Tensor input) -> ()");
+  ops.impl("gelu", torch::kCUDA, &gelu);
 }
 REGISTER_EXTENSION(TORCH_EXTENSION_NAME)

torch-ext/torch_binding.h CHANGED Viewed

@@ -18,3 +18,9 @@ void gelu_new(torch::Tensor &out, torch::Tensor &input);
 void gelu_fast(torch::Tensor &out, torch::Tensor &input);
 void gelu_quick(torch::Tensor &out, torch::Tensor &input);

 void gelu_fast(torch::Tensor &out, torch::Tensor &input);
 void gelu_quick(torch::Tensor &out, torch::Tensor &input);
+void gelu_tanh(torch::Tensor &out, torch::Tensor &input);
+void silu(torch::Tensor &out, torch::Tensor &input);
+void gelu(torch::Tensor &out, torch::Tensor &input);