Vbai-DPA-2.4

Name: Vbai-DPA-2.4
Author: Neurazum

6 languages

license:cc-by-nc-sa-4.0

Neurazum

Image Model

OTHER

New

0 downloads

Early-stage

Try on Hugging Face Add to Compare

Edge AI:

Mobile

Laptop

Server

Unknown

Mobile

Laptop

Server

Quick Summary

AI model with specialized capabilities.

Code Examples

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Modelpythonpytorch

import torch
import torch.nn as nn
import torch.nn.functional as F


class CBAMAttentionCheckpoint(nn.Module):
    def __init__(self, channels, reduction=8):
        super(CBAMAttentionCheckpoint, self).__init__()

        # Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
        reduced_channels = max(channels // reduction, 1)
        self.channel_attention = nn.Sequential(
            nn.Conv2d(channels, reduced_channels, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(reduced_channels, channels, 1, bias=False),
        )

        # Spatial attention (7x7 conv as in checkpoint, NO BIAS)
        self.spatial_attention = nn.Sequential(
            nn.Conv2d(2, 1, 7, padding=3, bias=False),
        )

    def forward(self, x):
        avg_pool = F.adaptive_avg_pool2d(x, 1)
        max_pool = F.adaptive_max_pool2d(x, 1)

        avg_out = self.channel_attention(avg_pool)
        max_out = self.channel_attention(max_pool)

        channel_att = torch.sigmoid(avg_out + max_out)
        x = x * channel_att

        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_in = torch.cat([avg_out, max_out], dim=1)

        spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
        x = x * spatial_att

        return x


class CheckpointVbaiDPA24(nn.Module):
    def __init__(self, model_type='f', num_classes=6):
        super(CheckpointVbaiDPA24, self).__init__()

        self.model_type = model_type
        self.num_classes = num_classes

        # Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
        if model_type == 'f':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 16, 3, padding=1),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(16, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(16, reduction=8),
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
            ])
            fc_input = 64 * 28 * 28
            fc_sizes = [256, 128, 6]

        elif model_type == 'c':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 32, 3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(32, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(32, reduction=8),
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
            ])
            fc_input = 128 * 28 * 28
            fc_sizes = [512, 256, 6]

        elif model_type == 'q':
            self.conv_layers = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(256, 512, 3, padding=1),
                nn.BatchNorm2d(512),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            self.attention_modules = nn.ModuleList([
                CBAMAttentionCheckpoint(64, reduction=8),
                CBAMAttentionCheckpoint(128, reduction=8),
                CBAMAttentionCheckpoint(256, reduction=8),
                CBAMAttentionCheckpoint(512, reduction=8),
            ])
            fc_input = 512 * 14 * 14
            fc_sizes = [1024, 512, 6]

        # Edge detection branch
        self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
        # Edge fc input size depends on pooling
        self.edge_fc = nn.Linear(64 * 56 * 56, 128)

        # Main classifier
        self.classifier = nn.Sequential(
            nn.Linear(fc_input, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[1]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[1], fc_sizes[2]),
        )

        # Combined classifier (with edge features)
        self.combined_classifier = nn.Sequential(
            nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(fc_sizes[0], fc_sizes[2]),
        )

    def forward(self, x, edge_x=None):
        if self.model_type == 'f' or self.model_type == 'c':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)
            attention_map = x

        elif self.model_type == 'q':
            x = self.conv_layers[0:4](x)
            x = self.attention_modules[0](x)

            x = self.conv_layers[4:8](x)
            x = self.attention_modules[1](x)

            x = self.conv_layers[8:12](x)
            x = self.attention_modules[2](x)

            x = self.conv_layers[12:16](x)
            x = self.attention_modules[3](x)
            attention_map = x

        x = x.view(x.size(0), -1)

        x = self.classifier[0](x)
        x = self.classifier[1](x)
        features = self.classifier[2](x)

        if edge_x is not None:
            try:
                edge_x = F.relu(self.edge_conv1(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_x = F.relu(self.edge_conv2(edge_x))
                edge_x = F.max_pool2d(edge_x, 2, 2)

                edge_features = edge_x.view(edge_x.size(0), -1)
                edge_features = self.edge_fc(edge_features)

                combined = torch.cat([features, edge_features], dim=1)
                output = self.combined_classifier(combined)
            except Exception as e:
                output = self.classifier[3:](features)
        else:
            output = self.classifier[3:](features)

        return output, attention_map

Deploy This Model

Production-ready deployment in minutes

Together.ai

Instant API access to this model

Fastest API

Production-ready inference API. Start free, scale to millions.

Try Free API

Replicate

One-click model deployment

Easiest Setup

Run models in the cloud with simple API. No DevOps required.

Deploy Now

Disclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.