Source code for tint.models.transformer

import torch as th
import torch.nn as nn
import torch.nn.functional as F


TIME_DIM = 1


[docs]class TransformerEncoder(nn.Module): """ A base transformer encoder model class. Args: d_model (int): Input size of the model. nhead (int): Number of heads. Default to 1 dim_feedforward (int): Dimension of the feedforward network model. Default to 32 num_layers (int): Number of layers. Default to 1 dropout (float): Dropout rates. Default to 0.0 activation (str): Activation function. Default to ``'relu'`` layer_norm_eps (float): Eps value in layer normalization components. Default to 1e-5 norm_first (bool): If ``True``, layer norm is done prior to attention and feedforward operations, respectively. Default to ``False`` enable_nested_tensor (bool): If ``True``, input will automatically convert to nested tensor. Default to ``False`` many_to_one (bool): Whether to reduce the temporal dimension. Default to ``False`` References: https://pytorch.org/docs/stable/nn.html#transformer-layers Examples: >>> from tint.models import TransformerEncoder <BLANKLINE> >>> transformer = TransformerEncoder(10) >>> transformer = TransformerEncoder(10, nhead=2, dropout=0.1) """ def __init__( self, d_model: int, nhead: int = 1, dim_feedforward: int = 32, num_layers: int = 1, dropout: float = 0.0, activation: str = "relu", layer_norm_eps: float = 1e-5, norm_first: bool = False, enable_nested_tensor: bool = False, many_to_one: bool = False, ): super().__init__() encoder_layer = nn.TransformerEncoderLayer( d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, activation=activation, layer_norm_eps=layer_norm_eps, norm_first=norm_first, batch_first=True, ) self.transformer_encoder = nn.TransformerEncoder( encoder_layer=encoder_layer, num_layers=num_layers, enable_nested_tensor=enable_nested_tensor, ) self.many_to_one = many_to_one self._size = 1 @property def src_mask(self): """ Generate a square mask for the sequence. The masked positions are filled with float('-inf'). Unmasked positions are filled with float(0.0). Returns: th.Tensor: A mask. """ mask = (th.triu(th.ones(self._size, self._size)) == 1).transpose(0, 1) mask = ( mask.float() .masked_fill(mask == 0, float("-inf")) .masked_fill(mask == 1, float(0.0)) ) return mask
[docs] def forward(self, x: th.Tensor) -> th.Tensor: # Update size given inputs self._size = x.shape[1] # Apply self attention out = self.transformer_encoder( src=x, mask=self.src_mask, ) # Normalize outputs out = F.normalize(out, dim=-1, p=2) # If many_to_one, reduce temporal dimension if self.many_to_one: out = out.sum(TIME_DIM) return out