Source code for deel.torchlip.modules.linear
# -*- coding: utf-8 -*-
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
# CRIAQ and ANITI - https://www.deel.ai/
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# Copyright IRT Antoine de Saint Exupéry et Université Paul Sabatier Toulouse III - All
# rights reserved. DEEL is a research program operated by IVADO, IRT Saint Exupéry,
# CRIAQ and ANITI - https://www.deel.ai/
# =====================================================================================
import torch
from torch.nn.utils.parametrizations import spectral_norm
from ..utils import bjorck_norm
from ..normalizers import DEFAULT_EPS_BJORCK
from ..normalizers import DEFAULT_EPS_SPECTRAL
from ..utils import frobenius_norm
from .module import LipschitzModule
[docs]class SpectralLinear(torch.nn.Linear, LipschitzModule):
def __init__(
self,
in_features: int,
out_features: int,
bias: bool = True,
k_coef_lip: float = 1.0,
eps_spectral: int = DEFAULT_EPS_SPECTRAL,
eps_bjorck: int = DEFAULT_EPS_BJORCK,
):
"""
This class is a Linear Layer constrained such that all singular of it's kernel
are 1. The computation based on BjorckNormalizer algorithm.
The computation is done in two steps:
1. reduce the larget singular value to 1, using iterated power method.
2. increase other singular values to 1, using BjorckNormalizer algorithm.
Args:
in_features: Size of each input sample.
out_features: Size of each output sample.
bias: If ``False``, the layer will not learn an additive bias.
k_coef_lip: Lipschitz constant to ensure.
eps_spectral: stopping criterion for the iterative power algorithm.
eps_bjorck: stopping criterion Bjorck algorithm.
Shape:
- Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
additional dimensions and :math:`H_{in} = \\text{in\\_features}`
- Output: :math:`(N, *, H_{out})` where all but the last dimension
are the same shape as the input and
:math:`H_{out} = \\text{out\\_features}`.
This documentation reuse the body of the original torch.nn.Linear doc.
"""
torch.nn.Linear.__init__(
self,
in_features=in_features,
out_features=out_features,
bias=bias,
)
LipschitzModule.__init__(self, k_coef_lip)
torch.nn.init.orthogonal_(self.weight)
if self.bias is not None:
self.bias.data.fill_(0.0)
spectral_norm(
self,
name="weight",
eps=eps_spectral,
)
bjorck_norm(self, name="weight", eps=eps_bjorck)
self.apply_lipschitz_factor()
def vanilla_export(self) -> torch.nn.Linear:
layer = torch.nn.Linear(
in_features=self.in_features,
out_features=self.out_features,
bias=self.bias is not None,
)
layer.weight.data = self.weight.detach()
if self.bias is not None:
layer.bias.data = self.bias.detach()
return layer
[docs]class FrobeniusLinear(torch.nn.Linear, LipschitzModule):
"""
This class is a Linear Layer constrained such that the Frobenius norm of the weight
is 1. In the case of a single output neuron, it is equivalent and faster than the
SpectralLinear layer. For multi-neuron case, the "disjoint_neurons" parameter
affects the behaviour:
- if ``disjoint_neurons`` is True (default), it corresponds to the stacking of
independent 1-Lipschitz neurons.
- if ``disjoint_neurons`` is False, the matrix weight is normalized by its Frobenius
norm.
Args:
in_features: Size of each input sample.
out_features: Size of each output sample.
bias: If ``False``, the layer will not learn an additive bias.
disjoint_neurons: Normalize, independently per neuron or not, the matrix weight.
k_coef_lip: Lipschitz constant to ensure.
"""
def __init__(
self,
in_features: int,
out_features: int,
bias: bool = True,
disjoint_neurons: bool = True,
k_coef_lip: float = 1.0,
):
torch.nn.Linear.__init__(
self,
in_features=in_features,
out_features=out_features,
bias=bias,
)
LipschitzModule.__init__(self, k_coef_lip)
torch.nn.init.orthogonal_(self.weight)
if self.bias is not None:
self.bias.data.fill_(0.0)
frobenius_norm(self, name="weight", disjoint_neurons=disjoint_neurons)
self.apply_lipschitz_factor()
def vanilla_export(self):
layer = torch.nn.Linear(
in_features=self.in_features,
out_features=self.out_features,
bias=self.bias is not None,
)
layer.weight.data = self.weight.detach()
if self.bias is not None:
layer.bias.data = self.bias.detach()
return layer