轉自:https://www.cnblogs.com/miraclepbc/p/14378379.html
資料集描述
總共200200類影像,每一類影像都存放在一個以類別名稱命名的資料夾下,每張圖片的命名格式如下圖:
資料預處理
首先分析一下我們在資料預處理階段的目標和工作流程
-
獲取每張影像以及對應的標籤
-
劃分測試集和訓練集
-
透過寫資料集類的方式,獲取資料集並進一步獲得DataLoader
-
列印圖片,驗證效果
獲取影像及標籤
all_imgs_path = glob.glob(r'E:\birds\birds\*\*.jpg') # 獲取所有影像路徑列表
all_labels_name = [i.split('\\')[3].split('.')[1] for i in all_imgs_path] # 獲取每張影像的標籤名
label_to_index = dict([(v, k) for k, v in enumerate(unique_labels)]) # 將標籤名對映到數值
# 獲取每張圖片的數值標籤
all_labels = []
for img in all_imgs_path:
for k, v in label_to_index.items():
if k in img:
all_labels.append(v)
劃分測試集和訓練集
以下程式碼可以作為模板來用,不做額外解釋
np.random.seed(2021)
index = np.random.permutation(len(all_imgs_path))
all_imgs_path = np.array(all_imgs_path)[index]
all_labels = np.array(all_labels)[index]
s = int(len(all_imgs_path) * 0.8)
train_path = all_imgs_path[:s]
train_labels = all_labels[:s]
test_path = all_imgs_path[s:]
test_labels = all_labels[s:]
透過寫資料集類的方式,獲取資料集並進一步獲得DataLoader
以下程式碼可以作為模板來用,不做額外解釋
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor()
])
class BirdsDataset(data.Dataset):
def __init__(self, img_paths, labels, transform):
self.imgs = img_paths
self.labels = labels
self.transforms = transform
def __getitem__(self, index):
img = self.imgs[index]
label = self.labels[index]
pil_img = Image.open(img)
pil_img = pil_img.convert('RGB') # 這一句是專門用來解決一種RuntimeError的
np_img = np.array(pil_img, dtype = np.uint8)
if np_img.shape == 2:
img_data = np.repeat(np_img[:, :, np.newaxis], 3, axis = 2)
pil_data = Image.fromarray(img_data)
data = self.transforms(pil_img)
return data, label
def __len__(self):
return len(self.imgs)
train_ds = BirdsDataset(train_path, train_labels, transform)
test_ds = BirdsDataset(test_path, test_labels, transform)
train_dl = data.DataLoader(train_ds, batch_size = 32) # 這裡只是提取卷積基,不做訓練,因此不用shuffle
test_dl = data.DataLoader(test_ds, batch_size = 32)
結果檢視
取出一個批次的資料,繪圖
img_batch, label_batch = next(iter(train_dl))
plt.figure(figsize = (12, 8)) # 定義畫布大小
index_to_label = dict([(k, v) for k, v in enumerate(unique_labels)])
for i, (img, label) in enumerate(zip(img_batch[:3], label_batch[:3])):
img = img.permute(1, 2, 0).numpy() # 將channel放在最後一維
plt.subplot(1, 3, i + 1)
plt.title(index_to_label.get(label.item()))
plt.imshow(img)
結果如下:
提取卷積基
這一階段的工作流程如下:
-
獲取DenseNet預訓練模型,使用feature部分
-
使用卷積基提取影像特徵,並存放在列表中
預訓練模型獲取
my_densenet = models.densenet121(pretrained = True).features
if torch.cuda.is_available():
my_densenet = my_densenet.cuda()
for p in my_densenet.parameters():
p.requires_grad = False
提取影像特徵
train_features = []
train_features_labels = []
for im, la in train_dl:
out = my_densenet(im.cuda())
out = out.view(out.size(0), -1) # 這裡需要進行扁平化操作,因為後面要進行線性模型預測
train_features.extend(out.cpu().data) # 這裡注意是extend,extend可以將一個列表加到另一個列表的後面
train_features_labels.extend(la)
test_features = []
test_features_labels = []
for im, la in test_dl:
out = my_densenet(im.cuda())
out = out.view(out.size(0), -1)
test_features.extend(out.cpu().data)
test_features_labels.extend(la)
重新定義資料集
因為後面要透過線性模型來預測,因此之前的影像資料集就不好用了
因此需要用剛剛提取到的特徵,重新制作資料集
class FeatureDataset(data.Dataset):
def __init__(self, feature_list, label_list):
self.feature_list = feature_list
self.label_list = label_list
def __getitem__(self, index):
return self.feature_list[index], self.label_list[index]
def __len__(self):
return len(self.feature_list)
train_feature_ds = FeatureDataset(train_features, train_features_labels)
test_feature_ds = FeatureDataset(test_features, test_features_labels)
train_feature_dl = data.DataLoader(train_feature_ds, batch_size = 32, shuffle = True)
test_feature_dl = data.DataLoader(test_feature_ds, batch_size = 32)
模型定義與預測
這裡定義一個線性模型即可
模型定義
class FCModel(nn.Module):
def __init__(self, in_size, out_size):
super().__init__()
self.linear = nn.Linear(in_size, out_size)
def forward(self, input):
return self.linear(input)
in_feature_size = train_features[0].shape[0]
net = FCModel(in_feature_size, 200)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device)
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr = 0.00001)
epochs = 30
模型訓練
def fit(epoch, model, trainloader, testloader):
correct = 0
total = 0
running_loss = 0
model.train()
for x, y in trainloader:
y = torch.tensor(y, dtype = torch.long)
x, y = x.to(device), y.to(device)
y_pred = model(x)
loss = loss_func(y_pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
y_pred = torch.argmax(y_pred, dim = 1)
correct += (y_pred == y).sum().item()
total += y.size(0)
running_loss += loss.item()
epoch_acc = correct / total
epoch_loss = running_loss / len(trainloader.dataset)
test_correct = 0
test_total = 0
test_running_loss = 0
model.eval()
with torch.no_grad():
for x, y in testloader:
y = torch.tensor(y, dtype = torch.long)
x, y = x.to(device), y.to(device)
y_pred = model(x)
loss = loss_func(y_pred, y)
y_pred = torch.argmax(y_pred, dim = 1)
test_correct += (y_pred == y).sum().item()
test_total += y.size(0)
test_running_loss += loss.item()
epoch_test_acc = test_correct / test_total
epoch_test_loss = test_running_loss / len(testloader.dataset)
print('epoch: ', epoch,
'loss: ', round(epoch_loss, 3),
'accuracy: ', round(epoch_acc, 3),
'test_loss: ', round(epoch_test_loss, 3),
'test_accuracy: ', round(epoch_test_acc, 3))
return epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc
train_loss = []
train_acc = []
test_loss = []
test_acc = []
for epoch in range(epochs):
epoch_loss, epoch_acc, epoch_test_loss, epoch_test_acc = fit(epoch, net, train_feature_dl, test_feature_dl)
train_loss.append(epoch_loss)
train_acc.append(epoch_acc)
test_loss.append(epoch_test_loss)
test_acc.append(epoch_test_acc)
訓練結果