Both ReLu and Leaky ReLu have discontinuous derivatives. ELU is smooth for first order derivative, i.e., ELU is class $C^1$.

$$\begin{cases} x, & \text{if }x>=0 \\ \exp(x) - 1, & \text{else.} \end{cases}$$

## Visualizations

ELU

Derivative of ELU

## Code

def elu(x, alpha):


from torch import nn
import matplotlib.pyplot as plt
import torch
from typing import Union, Optional
from pathlib import Path
import json

def visualize_activation(
x: torch.Tensor, acti: torch.nn.Module,
save_path: Optional[Union[str, Path]] = None
) -> dict:
"""Visualize activation function on the domain of x"""

y = acti(x)

# Calculate the grad of the activation function
acti(x).sum().backward()

activation_dict = {
"x": x.detach().numpy().tolist(),
"y": y.detach().numpy().tolist(),
"yp": yp.detach().numpy().tolist()
}

if save_path is not None:
if isinstance(save_path, str):
save_path = Path(save_path)
save_path.parent.mkdir(parents=True, exist_ok=True)
with open(save_path, "w") as f:
json.dump(activation_dict, f, indent=4)

return activation_dict

class ELU(nn.Module):

def __init__(self) -> None:
super().__init__()

def forward(self, x: torch.Tensor) -> torch.Tensor:

def __str__(self) -> str:
return f"Activation Function: {super().__str__()}"

if __name__ == "__main__":

elu = ELU()

print(elu)

save_path = "data/activations/elu.json"
x = torch.linspace(-2, 2, 1000)
data = visualize_activation(x, elu, save_path=save_path)

fig, ax = plt.subplots()
ax.plot(data["x"], data["y"])
ax.plot(data["x"], data["yp"])
ax.set_title("ELU")
plt.show()

pass


Planted: by ;

No backlinks identified. Reference this note using the Note ID cards/machine-learning/neural-networks/activation-elu.md in other notes to connect them.

L Ma (2018). 'ELU', Datumorphism, 11 April. Available at: https://datumorphism.leima.is/cards/machine-learning/neural-networks/activation-elu/.