Swish is infinitely differentiable, i.e., class $C^\infty$.

$$x \sigma(x),$$

where $\sigma$ is the .

## Visualizations

ELU

Derivative of ELU

## Code

def swish(x, alpha):
return x * torch.sigmoid(x)


from torch import nn
import matplotlib.pyplot as plt
import torch
from typing import Union, Optional
from pathlib import Path
import json

def visualize_activation(
x: torch.Tensor, acti: torch.nn.Module,
save_path: Optional[Union[str, Path]] = None
) -> dict:
"""Visualize activation function on the domain of x"""

y = acti(x)

# Calculate the grad of the activation function
acti(x).sum().backward()

activation_dict = {
"x": x.detach().numpy().tolist(),
"y": y.detach().numpy().tolist(),
"yp": yp.detach().numpy().tolist()
}

if save_path is not None:
if isinstance(save_path, str):
save_path = Path(save_path)
save_path.parent.mkdir(parents=True, exist_ok=True)
with open(save_path, "w") as f:
json.dump(activation_dict, f, indent=4)

return activation_dict

class Swish(nn.Module):

def __init__(self) -> None:
super().__init__()

def forward(self, x: torch.Tensor) -> torch.Tensor:
return x * torch.sigmoid(x)

def __str__(self) -> str:
return f"Activation Function: {super().__str__()}"

if __name__ == "__main__":

swish = Swish()

print(swish)

save_path = "data/activations/swish.json"
x = torch.linspace(-2, 2, 1000)
data = visualize_activation(x, swish, save_path=save_path)

fig, ax = plt.subplots()
ax.plot(data["x"], data["y"])
ax.plot(data["x"], data["yp"])
ax.set_title("Swish")
plt.show()

pass


Planted: by ;

Dynamic Backlinks to cards/machine-learning/neural-networks/activation-swish:
cards/machine-learning/neural-networks/activation-swish Links to:

L Ma (2018). 'Swish', Datumorphism, 11 April. Available at: https://datumorphism.leima.is/cards/machine-learning/neural-networks/activation-swish/.