Vbai-DPA-2.4
1
6 languages
license:cc-by-nc-sa-4.0
by
Neurazum
Image Model
OTHER
New
0 downloads
Early-stage
Edge AI:
Mobile
Laptop
Server
Unknown
Mobile
Laptop
Server
Quick Summary
AI model with specialized capabilities.
Code Examples
Modelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapModelpythonpytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
class CBAMAttentionCheckpoint(nn.Module):
def __init__(self, channels, reduction=8):
super(CBAMAttentionCheckpoint, self).__init__()
# Channel attention (using Conv2d 1x1 as in checkpoint, NO BIAS)
reduced_channels = max(channels // reduction, 1)
self.channel_attention = nn.Sequential(
nn.Conv2d(channels, reduced_channels, 1, bias=False),
nn.ReLU(),
nn.Conv2d(reduced_channels, channels, 1, bias=False),
)
# Spatial attention (7x7 conv as in checkpoint, NO BIAS)
self.spatial_attention = nn.Sequential(
nn.Conv2d(2, 1, 7, padding=3, bias=False),
)
def forward(self, x):
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.channel_attention(avg_pool)
max_out = self.channel_attention(max_pool)
channel_att = torch.sigmoid(avg_out + max_out)
x = x * channel_att
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
spatial_in = torch.cat([avg_out, max_out], dim=1)
spatial_att = torch.sigmoid(self.spatial_attention(spatial_in))
x = x * spatial_att
return x
class CheckpointVbaiDPA24(nn.Module):
def __init__(self, model_type='f', num_classes=6):
super(CheckpointVbaiDPA24, self).__init__()
self.model_type = model_type
self.num_classes = num_classes
# Conv layers: Sequential with Conv+BN+ReLU+Pool pattern
if model_type == 'f':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 16, 3, padding=1),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(16, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(16, reduction=8),
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
])
fc_input = 64 * 28 * 28
fc_sizes = [256, 128, 6]
elif model_type == 'c':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(32, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(32, reduction=8),
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
])
fc_input = 128 * 28 * 28
fc_sizes = [512, 256, 6]
elif model_type == 'q':
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(256, 512, 3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2),
)
self.attention_modules = nn.ModuleList([
CBAMAttentionCheckpoint(64, reduction=8),
CBAMAttentionCheckpoint(128, reduction=8),
CBAMAttentionCheckpoint(256, reduction=8),
CBAMAttentionCheckpoint(512, reduction=8),
])
fc_input = 512 * 14 * 14
fc_sizes = [1024, 512, 6]
# Edge detection branch
self.edge_conv1 = nn.Conv2d(1, 32, 3, padding=1)
self.edge_conv2 = nn.Conv2d(32, 64, 3, padding=1)
# Edge fc input size depends on pooling
self.edge_fc = nn.Linear(64 * 56 * 56, 128)
# Main classifier
self.classifier = nn.Sequential(
nn.Linear(fc_input, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[1]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[1], fc_sizes[2]),
)
# Combined classifier (with edge features)
self.combined_classifier = nn.Sequential(
nn.Linear(fc_sizes[0] + 128, fc_sizes[0]),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(fc_sizes[0], fc_sizes[2]),
)
def forward(self, x, edge_x=None):
if self.model_type == 'f' or self.model_type == 'c':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
attention_map = x
elif self.model_type == 'q':
x = self.conv_layers[0:4](x)
x = self.attention_modules[0](x)
x = self.conv_layers[4:8](x)
x = self.attention_modules[1](x)
x = self.conv_layers[8:12](x)
x = self.attention_modules[2](x)
x = self.conv_layers[12:16](x)
x = self.attention_modules[3](x)
attention_map = x
x = x.view(x.size(0), -1)
x = self.classifier[0](x)
x = self.classifier[1](x)
features = self.classifier[2](x)
if edge_x is not None:
try:
edge_x = F.relu(self.edge_conv1(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_x = F.relu(self.edge_conv2(edge_x))
edge_x = F.max_pool2d(edge_x, 2, 2)
edge_features = edge_x.view(edge_x.size(0), -1)
edge_features = self.edge_fc(edge_features)
combined = torch.cat([features, edge_features], dim=1)
output = self.combined_classifier(combined)
except Exception as e:
output = self.classifier[3:](features)
else:
output = self.classifier[3:](features)
return output, attention_mapDeploy This Model
Production-ready deployment in minutes
Together.ai
Instant API access to this model
Production-ready inference API. Start free, scale to millions.
Try Free APIReplicate
One-click model deployment
Run models in the cloud with simple API. No DevOps required.
Deploy NowDisclosure: We may earn a commission from these partners. This helps keep LLMYourWay free.