add_act_kernels

by medmekk HF Staff - opened Sep 18

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+583

-37

This view is limited to 50 files because it contains too many changes. See the raw diff here.

Files changed (50) hide show

activation/activation_kernels.cu +20 -1
build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py +18 -0
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
build/torch27-cxx11-cu118-x86_64-linux/activation/{_activation_be5bedb_dirty.abi3.so → _activation_20250917153858.abi3.so} +2 -2
build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py +3 -3
build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py +51 -0
build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py +18 -0
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
build/torch27-cxx11-cu126-x86_64-linux/activation/{_activation_be5bedb_dirty.abi3.so → _activation_20250917153858.abi3.so} +2 -2
build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py +3 -3
build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py +51 -0
build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py +18 -0
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
build/torch27-cxx11-cu128-x86_64-linux/activation/{_activation_be5bedb_dirty.abi3.so → _activation_20250917153858.abi3.so} +2 -2
build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py +3 -3
build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py +51 -0
build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py +18 -0
build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
build/torch28-cxx11-cu126-x86_64-linux/activation/{_activation_be5bedb_dirty.abi3.so → _activation_20250917153858.abi3.so} +2 -2
build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py +3 -3
build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py +51 -0
build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py +18 -0
build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so +3 -0
build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +0 -3
build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py +3 -3
build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py +51 -0
build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py +18 -0
build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc +0 -0
build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc +0 -0
build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc +0 -0
build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250917153858.abi3.so +3 -0
build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so +0 -3
build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py +3 -3
build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py +51 -0
tests/__pycache__/__init__.cpython-312.pyc +0 -0
tests/kernels/__pycache__/__init__.cpython-312.pyc +0 -0
tests/kernels/__pycache__/allclose_default.cpython-312.pyc +0 -0
tests/kernels/__pycache__/test_activation.cpython-312-pytest-8.4.2.pyc +0 -0
tests/kernels/__pycache__/utils.cpython-312.pyc +0 -0

activation/activation_kernels.cu CHANGED Viewed

@@ -44,7 +44,7 @@ __device__ __forceinline__ T gelu_kernel(const T& x) {
   // https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L36-L38
   const float f = (float)x;
   constexpr float ALPHA = M_SQRT1_2;
-  return (T)(f * 0.5f * (1.0f + ::erf(f * ALPHA)));
 }
 template <typename T>
@@ -183,6 +183,7 @@ __global__ void activation_kernel(
 namespace vllm {
 template <typename T>
 __device__ __forceinline__ T gelu_new_kernel(const T& x) {
   const float x3 = (float)(x * x * x);
@@ -223,3 +224,21 @@ void gelu_quick(torch::Tensor& out,    // [..., d]
 {
   LAUNCH_ACTIVATION_KERNEL(vllm::gelu_quick_kernel);
 }

   // https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L36-L38
   const float f = (float)x;
   constexpr float ALPHA = M_SQRT1_2;
+  return (T)(f * 0.5f * (1.0f + erf(f * ALPHA)));
 }
 template <typename T>
 namespace vllm {
 template <typename T>
 __device__ __forceinline__ T gelu_new_kernel(const T& x) {
   const float x3 = (float)(x * x * x);
 {
   LAUNCH_ACTIVATION_KERNEL(vllm::gelu_quick_kernel);
 }
+void gelu(torch::Tensor& out,    // [..., d]
+          torch::Tensor& input)  // [..., d]
+{
+  LAUNCH_ACTIVATION_KERNEL(vllm::gelu_kernel);
+}
+void gelu_tanh(torch::Tensor& out,    // [..., d]
+               torch::Tensor& input)  // [..., d]
+{
+  LAUNCH_ACTIVATION_KERNEL(vllm::gelu_tanh_kernel);
+}
+void silu(torch::Tensor& out,    // [..., d]
+          torch::Tensor& input)  // [..., d]
+{
+  LAUNCH_ACTIVATION_KERNEL(vllm::silu_kernel);
+}

build/torch27-cxx11-cu118-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 __all__ = [
     "silu_and_mul",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
     "layers",
 ]

     return out
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]

build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ

build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ

build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu118-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ

build/torch27-cxx11-cu118-x86_64-linux/activation/{_activation_be5bedb_dirty.abi3.so → _activation_20250917153858.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aee7c6869a9e318ad81cb84460c58ca0dac2dc85f4ed739b12fe57641f766332
-size 2546984

 version https://git-lfs.github.com/spec/v1
+oid sha256:618cdba5f19eabc1f9c1d33e130ef03ab1b11b52f1e7b00b73f2a10d5cf1e62f
+size 2773664

build/torch27-cxx11-cu118-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"

 import torch
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_20250917153858::{op_name}"

build/torch27-cxx11-cu118-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

         ops.silu_and_mul(out, x)
         return out
+class Silu(nn.Module):
+    """An activation function for SiLU.
+    The function computes x -> silu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+class Gelu(nn.Module):
+    """An activation function for GELU.
+    The function computes x -> gelu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+    The function computes x -> gelu_tanh(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

build/torch27-cxx11-cu126-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 __all__ = [
     "silu_and_mul",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
     "layers",
 ]

     return out
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]

build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ

build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ

build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ

build/torch27-cxx11-cu126-x86_64-linux/activation/{_activation_be5bedb_dirty.abi3.so → _activation_20250917153858.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f075a6e0d47a2d382d16291b1c5d7d1d98111e2bbc5891b14b627e3c1778b699
-size 2621536

 version https://git-lfs.github.com/spec/v1
+oid sha256:87ee9280b670b3323378c17d75ee7082f419987a568769fe8479bf509ee6c245
+size 2852232

build/torch27-cxx11-cu126-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"

 import torch
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_20250917153858::{op_name}"

build/torch27-cxx11-cu126-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

         ops.silu_and_mul(out, x)
         return out
+class Silu(nn.Module):
+    """An activation function for SiLU.
+    The function computes x -> silu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+class Gelu(nn.Module):
+    """An activation function for GELU.
+    The function computes x -> gelu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+    The function computes x -> gelu_tanh(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

build/torch27-cxx11-cu128-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 __all__ = [
     "silu_and_mul",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
     "layers",
 ]

     return out
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]

build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ

build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ

build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch27-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ

build/torch27-cxx11-cu128-x86_64-linux/activation/{_activation_be5bedb_dirty.abi3.so → _activation_20250917153858.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc2406aa2fa09dd7bc1fd5e87cdcdf55edfc7e0853fad5f977e2500e08fa8899
-size 3565432

 version https://git-lfs.github.com/spec/v1
+oid sha256:28ca9a3e35c49ae49694d7c6c77f85f3664622cad9c857bf13dfbf3bc144ae1b
+size 4127912

build/torch27-cxx11-cu128-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"

 import torch
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_20250917153858::{op_name}"

build/torch27-cxx11-cu128-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

         ops.silu_and_mul(out, x)
         return out
+class Silu(nn.Module):
+    """An activation function for SiLU.
+    The function computes x -> silu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+class Gelu(nn.Module):
+    """An activation function for GELU.
+    The function computes x -> gelu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+    The function computes x -> gelu_tanh(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

build/torch28-cxx11-cu126-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 __all__ = [
     "silu_and_mul",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
     "layers",
 ]

     return out
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]

build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ

build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ

build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu126-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ

build/torch28-cxx11-cu126-x86_64-linux/activation/{_activation_be5bedb_dirty.abi3.so → _activation_20250917153858.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c88e87951b92ea55313ef79a34d284cb2a23713d3bdafee735caa4fc955b9dcb
-size 2610616

 version https://git-lfs.github.com/spec/v1
+oid sha256:0fcd47dd765bba10bb09f65388f6c1b357b117b2611c17bae5bf8214499a9e39
+size 2837224

build/torch28-cxx11-cu126-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"

 import torch
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_20250917153858::{op_name}"

build/torch28-cxx11-cu126-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

         ops.silu_and_mul(out, x)
         return out
+class Silu(nn.Module):
+    """An activation function for SiLU.
+    The function computes x -> silu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+class Gelu(nn.Module):
+    """An activation function for GELU.
+    The function computes x -> gelu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+    The function computes x -> gelu_tanh(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

build/torch28-cxx11-cu128-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 __all__ = [
     "silu_and_mul",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
     "layers",
 ]

     return out
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]

build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ

build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ

build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu128-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ

build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_20250917153858.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e6d88c71eebabc842f6a566de7cfaf24d3d90a30572eae584a3b51dcb7e838e
+size 4117000

build/torch28-cxx11-cu128-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cf784c7ab178c476fc6268efe820b1948c7c5b8f049c046c851b03067da5dd59
-size 3558616

build/torch28-cxx11-cu128-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"

 import torch
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_20250917153858::{op_name}"

build/torch28-cxx11-cu128-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

         ops.silu_and_mul(out, x)
         return out
+class Silu(nn.Module):
+    """An activation function for SiLU.
+    The function computes x -> silu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+class Gelu(nn.Module):
+    """An activation function for GELU.
+    The function computes x -> gelu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+    The function computes x -> gelu_tanh(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

build/torch28-cxx11-cu129-x86_64-linux/activation/__init__.py CHANGED Viewed

@@ -30,6 +30,20 @@ def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0)
     return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
@@ -47,11 +61,15 @@ def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
 __all__ = [
     "silu_and_mul",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
     "layers",
 ]

     return out
+def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu(out, x)
+    return out
+def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.silu(out, x)
+    return out
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+    ops.gelu_tanh(out, x)
+    return out
 def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
     ops.gelu_fast(out, x)
     return out
 __all__ = [
     "silu_and_mul",
+    "mul_and_silu",
     "gelu_and_mul",
     "gelu_tanh_and_mul",
     "fatrelu_and_mul",
     "gelu_fast",
     "gelu_new",
     "gelu_quick",
+    "gelu_tanh",
+    "silu",
+    "gelu",
     "layers",
 ]

build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/__init__.cpython-313.pyc differ

build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/_ops.cpython-313.pyc differ

build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc CHANGED Viewed

Binary files a/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc and b/build/torch28-cxx11-cu129-x86_64-linux/activation/__pycache__/layers.cpython-313.pyc differ

build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_20250917153858.abi3.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3c1b86db31b04bd5fe75b0c9d6915ba2766a2456ea9bd1a20f2d75c4b1acf35
+size 4154880

build/torch28-cxx11-cu129-x86_64-linux/activation/_activation_be5bedb_dirty.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9e7cca3169eea8cbd67c61706d102548e49aadc936f8c2943efef3e7c4c0ee0d
-size 3592400

build/torch28-cxx11-cu129-x86_64-linux/activation/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _activation_be5bedb_dirty
-ops = torch.ops._activation_be5bedb_dirty
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_activation_be5bedb_dirty::{op_name}"

 import torch
+from . import _activation_20250917153858
+ops = torch.ops._activation_20250917153858
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_activation_20250917153858::{op_name}"

build/torch28-cxx11-cu129-x86_64-linux/activation/layers.py CHANGED Viewed

@@ -23,6 +23,57 @@ class SiluAndMul(nn.Module):
         ops.silu_and_mul(out, x)
         return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

         ops.silu_and_mul(out, x)
         return out
+class Silu(nn.Module):
+    """An activation function for SiLU.
+    The function computes x -> silu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.silu(out, x)
+        return out
+class Gelu(nn.Module):
+    """An activation function for GELU.
+    The function computes x -> gelu(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu(out, x)
+        return out
+class GeluTanh(nn.Module):
+    """An activation function for GELU with `tanh` approximation.
+    The function computes x -> gelu_tanh(x).
+    Shapes:
+        x: (num_tokens, d) or (batch_size, seq_len, d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
+    """
+    can_torch_compile: bool = True
+    def forward(self, x: torch.Tensor):
+        out = torch.empty_like(x)
+        ops.gelu_tanh(out, x)
+        return out
 class MulAndSilu(nn.Module):
     """An activation function for SwiGLU.

tests/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (142 Bytes). View file

tests/kernels/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (150 Bytes). View file

tests/kernels/__pycache__/allclose_default.cpython-312.pyc ADDED Viewed

Binary file (842 Bytes). View file

tests/kernels/__pycache__/test_activation.cpython-312-pytest-8.4.2.pyc ADDED Viewed

Binary file (11.7 kB). View file

tests/kernels/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (2.75 kB). View file