import collections import os import shutil import tqdm
import numpy as np import PIL.Image import torch import torchvision
1. 基础配置
(1) check pytorch version
1 2 3 4
torch.__version__ # PyTorch version torch.version.cuda # Corresponding CUDA version torch.backends.cudnn.version() # Corresponding cuDNN version torch.cuda.get_device_name(0) # GPU type
torch.nonzero(tensor) # Index of non-zero elements torch.nonzero(tensor == 0) # Index of zero elements torch.nonzero(tensor).size(0) # Number of non-zero elements torch.nonzero(tensor == 0).size(0) # Number of zero elements
(11)张量扩展
1 2
# Expand tensor of shape 64*512 to shape 64*512*7*7. torch.reshape(tensor, (64, 512, 1, 1)).expand(64, 512, 7, 7)
# Total parameters num_params = sum(p.numel() for p in model.parameters()) # Trainable parameters num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
# Common practise for initialization. for m in model.modules(): ifisinstance(m, torch.nn.Conv2d): torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias isnotNone: torch.nn.init.constant_(m.bias, val=0.0)
classFeatureExtractor(torch.nn.Module): """Helper class to extract several convolution features from the given pre-trained model. Attributes: _model, torch.nn.Module. _layers_to_extract, list<str> or set<str> Example: >>> model = torchvision.models.resnet152(pretrained=True) >>> model = torch.nn.Sequential(collections.OrderedDict( list(model.named_children())[:-1])) >>> conv_representation = FeatureExtractor( pretrained_model=model, layers_to_extract={'layer1', 'layer2', 'layer3', 'layer4'})(image) """ def__init__(self, pretrained_model, layers_to_extract): torch.nn.Module.__init__(self) self._model = pretrained_model self._model.eval() self._layers_to_extract = set(layers_to_extract)
defforward(self, x): with torch.no_grad(): conv_representation = [] for name, layer in self._model.named_children(): x = layer(x) if name in self._layers_to_extract: conv_representation.append(x) return conv_representation
model = torchvision.models.resnet18(pretrained=True) for param in model.parameters(): param.requires_grad = False model.fc = nn.Linear(512, 100) # Replace the last fc layer optimizer = torch.optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)
以较大学习率微调全连接层,较小学习率微调卷积层
1 2 3 4 5 6 7
model = torchvision.models.resnet18(pretrained=True) finetuned_parameters = list(map(id, model.fc.parameters())) conv_parameters = (p for p in model.parameters() ifid(p) notin finetuned_parameters) parameters = [{'parameters': conv_parameters, 'lr': 1e-3}, {'parameters': model.fc.parameters()}] optimizer = torch.optim.SGD(parameters, lr=1e-2, momentum=0.9, weight_decay=1e-4)
for t in epoch(80): for images, labels in tqdm.tqdm(train_loader, desc='Epoch %3d' % (t + 1)): images, labels = images.cuda(), labels.cuda() scores = model(images) loss = loss_function(scores, labels) optimizer.zero_grad() loss.backward() optimizer.step()
(3) label smothing
1 2 3 4 5 6 7 8 9 10 11 12 13
for images, labels in train_loader: images, labels = images.cuda(), labels.cuda() N = labels.size(0) # C is the number of classes. smoothed_labels = torch.full(size=(N, C), fill_value=0.1 / (C - 1)).cuda() smoothed_labels.scatter_(dim=1, index=torch.unsqueeze(labels, dim=1), value=0.9)
score = model(images) log_prob = torch.nn.functional.log_softmax(score, dim=1) loss = -torch.sum(log_prob * smoothed_labels) / N optimizer.zero_grad() loss.backward() optimizer.step()
(4) Mixup
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
beta_distribution = torch.distributions.beta.Beta(alpha, alpha) for images, labels in train_loader: images, labels = images.cuda(), labels.cuda()
X = torch.reshape(N, D, H * W) # Assume X has shape N*D*H*W X = torch.bmm(X, torch.transpose(X, 1, 2)) / (H * W) # Bilinear pooling assert X.size() == (N, D, D) X = torch.reshape(X, (N, D * D)) X = torch.sign(X) * torch.sqrt(torch.abs(X) + 1e-5) # Signed-sqrt normalization X = torch.nn.functional.normalize(X) # L2 normalization
(6) L1 正则化
1 2 3 4 5 6 7 8 9 10 11 12
l1_regularization = torch.nn.L1Loss(reduction='sum') loss = ... # Standard cross-entropy loss for param in model.parameters(): loss += torch.sum(torch.abs(param)) loss.backward()
def forward(self, x): ... x = torch.nn.functional.dropout(x, p=0.5, training=self.training)
model(x)前用 model.train()和 model.eval()切换网络状态。不需要计算梯度的代码块用 with torch.no_grad()包含起来。model.eval()和torch.no_grad()的区别在于,model.eval()是将网络切换为测试状态,例如BN和随机失活(dropout)在训练和测试阶段使用不同的计算方法。torch.no_grad()是关闭PyTorch张量的自动求导机制,以减少存储使用和加速计算,得到的结果无法进行loss.backward()。torch.nn.CrossEntropyLoss的输入不需要经过Softmax。