import torch
import torch.nn as nn
import torch.nn.functional as F
class PCFN(nn.Module):
'''
使用帶有GELU的啟用函式的1*1卷積對擴充套件的隱藏空間進行跨通道互動。 然後將隱藏特徵分割成兩塊 對其中一塊使用3*3卷積核GELU啟用函式 編碼區域性上下文
將處理後的結果和另一塊合併
'''
def __init__(self, dim, growth_rate=2.0, p_rate=0.25):
super().__init__()
hidden_dim = int(dim * growth_rate)
p_dim = int(hidden_dim * p_rate)
self.conv_0 = nn.Conv2d(dim, hidden_dim, 1, 1, 0)
self.conv_1 = nn.Conv2d(p_dim, p_dim, 3, 1, 1)
self.act = nn.GELU()
self.conv_2 = nn.Conv2d(hidden_dim, dim, 1, 1, 0)
self.p_dim = p_dim
self.hidden_dim = hidden_dim
def forward(self, x):
if self.training:
'''
split 和 cat操作都會開闢新的記憶體
'''
x = self.act(self.conv_0(x))
x1, x2 = torch.split(x, [self.p_dim, self.hidden_dim - self.p_dim], dim=1)
x1 = self.act(self.conv_1(x1))
x = self.conv_2(torch.cat([x1, x2], dim=1))
else:
'''
所有的都是原地操作 更節省記憶體
'''
x = self.act(self.conv_0(x))
x[:, :self.p_dim, :, :] = self.act(self.conv_1(x[:, :self.p_dim, :, :]))
x = self.conv_2(x)
return x