Source code for fedsim.models.simple_models

r"""

Simple Model Architectures
--------------------------

In this file, you can find a number of models that are commonly used in FL community.
These models are used in `Communication-Efficient Learning of Deep Networks from
Decentralized Data`_.

.. _Communication-Efficient Learning of Deep Networks from Decentralized
    Data: https://arxiv.org/abs/1602.05629
"""

# adopted from the following repository:
# https://github.com/c-gabri/Federated-Learning-PyTorch/blob/master/src/models.py
from torch import nn
from torch.nn import functional as F
from torchvision.transforms import Resize

from .utils import get_output_size


[docs]class SimpleMLP(nn.Module):
    """A simple two layer Multi-Layer Perceptron.
    This is referred to as 2NN in McMahan's FedAvg paper.

    Args:
        num_classes (int, optional): number of classes. Defaults to 10.
            Assigning None or a negative integer means no classifier.
        num_channels (int, optional): number of channels of input. Defaults to 1.
        in_height (int, optional): input height to resize to. Defaults to 28.
        in_width (int, optional): input width to resize to. Defaults to 28.
        feature_size (int, optional): number of features. Defaults to 200.
    """

    def __init__(
        self,
        num_classes=10,
        num_channels=1,
        in_height=28,
        in_width=28,
        feature_size=200,
    ):
        super(SimpleMLP, self).__init__()
        self.feature_size = feature_size

        self.resize = Resize((in_height, in_width))
        self.fc1 = nn.Linear(num_channels * in_height * in_width, feature_size)
        self.fc2 = nn.Linear(feature_size, feature_size)
        if num_classes is not None and num_classes > 0:
            self.classifier = nn.Linear(feature_size, num_classes)
        else:
            self.classifier = None

[docs]    def forward(self, x):
        x = self.get_features(x)
        if self.classifier is not None:
            x = self.classifier(x)
        return x

[docs]    def get_features(self, x):
        r"""Gets the extracted features. Goes through all cells except the classifier.

        Args:
            x (Tensor): input tensor with shape
                :math:`(N\times C\times D_1\times D_2\times \dots\times D_n)`
                where ``N`` is batch size and ``C`` is dtermined by ``num_channels``.

        Returns:
            Tensor: output tensor with shape
                :math:`(N\times O)` where ``O`` is determined by ``feature_size``
        """
        x = self.resize(x)
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x


[docs]class SimpleCNN(nn.Module):
    """A simple two layer CNN Perceptron.

    Args:
        num_classes (int, optional): number of classes. Defaults to 10.
            Assigning None or a negative integer means no classifier.
        num_channels (int, optional): number of channels of input. Defaults to 1.
        in_height (int, optional): input height to resize to. Defaults to 28.
        in_width (int, optional): input width to resize to. Defaults to 28.
        feature_size (int, optional): number of features. Defaults to 512.
    """

    def __init__(
        self,
        num_classes=10,
        num_channels=1,
        in_height=28,
        in_width=28,
        num_filters1=32,
        num_filters2=64,
        feature_size=512,
    ):
        super(SimpleCNN, self).__init__()
        self.feature_size = feature_size
        k = 5
        s = 1
        p = 1
        self.resize = Resize((in_height, in_width))

        self.conv1 = nn.Conv2d(
            num_channels, num_filters1, kernel_size=k, stride=s, padding=p
        )

        self.conv2 = nn.Conv2d(
            num_filters1, num_filters2, kernel_size=k, stride=s, padding=p
        )

        # calculate the output size
        # 1st conv
        out_h = get_output_size(in_height, p, k, s)
        out_w = get_output_size(in_height, p, k, s)
        # 1st maxpool
        out_h = get_output_size(out_h, 1, 2, 2)
        out_w = get_output_size(out_w, 1, 2, 2)
        # 2nd conv
        out_h = get_output_size(out_h, p, k, s)
        out_w = get_output_size(out_w, p, k, s)
        # 2nd maxpool
        out_h = get_output_size(out_h, 1, 2, 2)
        out_w = get_output_size(out_w, 1, 2, 2)

        self.fc = nn.Linear(num_filters2 * out_w * out_h, feature_size)
        if num_classes is not None and num_classes > 0:
            self.classifier = nn.Linear(feature_size, num_classes)
        else:
            self.classifier = None

[docs]    def forward(self, x):
        x = self.get_features(x)
        if self.classifier is not None:
            x = self.classifier(x)
        return x

[docs]    def get_features(self, x):
        r"""Gets the extracted features. Goes through all cells except the classifier.

        Args:
            x (Tensor): input tensor with shape
                :math:`(N\times C\times D_1\times D_2\times \dots\times D_n)`
                where ``N`` is batch size and ``C`` is dtermined by ``num_channels``.

        Returns:
            Tensor: output tensor with shape
                :math:`(N\times O)` where ``O`` is determined by ``feature_size``
        """
        x = self.resize(x)
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2, padding=1)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2, padding=1)
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc(x))
        return x


[docs]class SimpleCNN2(nn.Module):
    """A simple two layer CNN Perceptron.
    This is similar to CNN model in McMahan's FedAvg paper.

    Args:
        num_classes (int, optional): number of classes. Defaults to 10.
            Assigning None or a negative integer means no classifier.
        num_channels (int, optional): number of channels of input. Defaults to 1.
        in_height (int, optional): input height to resize to. Defaults to 28.
        in_width (int, optional): input width to resize to. Defaults to 28.
        hidden_size (int, optional): number of hidden neurons. Defaults to 384.
        feature_size (int, optional): number of features. Defaults to 192.
    """

    def __init__(
        self,
        num_classes=10,
        num_channels=3,
        in_height=24,
        in_width=24,
        num_filters1=64,
        num_filters2=64,
        hidden_size=384,
        feature_size=192,
    ):
        super(SimpleCNN2, self).__init__()
        self.feature_size = feature_size
        k = 5
        s = 1
        p = "same"
        self.resize = Resize((in_height, in_width))

        self.conv1 = nn.Conv2d(
            num_channels, num_filters1, kernel_size=k, stride=s, padding=p
        )

        self.conv2 = nn.Conv2d(
            num_filters1, num_filters2, kernel_size=k, stride=s, padding=p
        )

        # calculate the output size
        # 1st conv
        out_h = get_output_size(in_height, p, k, s)
        out_w = get_output_size(in_height, p, k, s)
        # 1st maxpool
        out_h = get_output_size(out_h, 0, 3, 2)
        out_w = get_output_size(out_w, 0, 3, 2)
        # 2nd conv
        out_h = get_output_size(out_h, p, k, s)
        out_w = get_output_size(out_w, p, k, s)
        # 2nd maxpool
        out_h = get_output_size(out_h, 1, 2, 2)
        out_w = get_output_size(out_w, 1, 2, 2)

        self.fc1 = nn.Linear(num_filters2 * out_w * out_h, hidden_size)
        self.fc2 = nn.Linear(hidden_size, feature_size)
        if num_classes is not None and num_classes > 0:
            self.classifier = nn.Linear(feature_size, num_classes)
        else:
            self.classifier = None

[docs]    def forward(self, x):
        x = self.get_features(x)
        if self.classifier is not None:
            x = self.classifier(x)
        return x

[docs]    def get_features(self, x):
        r"""Gets the extracted features. Goes through all cells except the classifier.

        Args:
            x (Tensor): input tensor with shape
                :math:`(N\times C\times D_1\times D_2\times \dots\times D_n)`
                where ``N`` is batch size and ``C`` is dtermined by ``num_channels``.

        Returns:
            Tensor: output tensor with shape
                :math:`(N\times O)` where ``O`` is determined by ``feature_size``
        """
        x = self.resize(x)
        x = F.relu(self.conv1(x))
        x = F.pad(x, (0, 1, 0, 1), value=0)
        x = F.local_response_norm(x, size=4, alpha=0.001 / 9)
        x = F.max_pool2d(x, kernel_size=3, stride=2, padding=0)
        x = F.relu(self.conv2(x))
        x = F.local_response_norm(x, size=4, alpha=0.001 / 9)
        x = F.pad(x, (0, 1, 0, 1), value=0)
        x = F.max_pool2d(x, kernel_size=3, stride=2, padding=0)
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x