神经网络量化基础
前言
后量化训练(Post Training Quantization):这种方法是最常用的,其中weight跟上述一样也是被提前量化好的然后activation也会基于之前校准过程中记录下的固定的scale和zero_point进行量化,整个过程不存在量化参数scale和zero_point的再计算;
本文是在阅读博客时对代码的整理,旨在对量化的基础过程有更加清晰的认识。主要包括,基础训练模型的构建,对训练模型的测试和一些基础的量化函数求解,如:“计算尺度因子,零点,进行量化操作和反量化操作。
本文采用pytorch手工构建了一个基础网络模型,数据集采用mnist数据集,并对网络进行量化。不在赘述,直接看训练代码。
网络模型构建
1. 网络结构
首先说明定义网络的结构,如下所示:
class Net(nn.Module): def __init__(self, num_channels=1): super(Net, self).__init__() self.conv1 = nn.Conv2d(num_channels, 40, 3, 1) self.conv2 = nn.Conv2d(40, 40, 3, 1, groups=20) # 这里用分组网络,可以增大量化带来的误差 self.fc = nn.Linear(5*5*40, 10) def forward(self, x): # x = F.relu(self.conv1(x)) x = F.max_pool2d(x, 2, 2) x = F.relu(self.conv2(x)) x = F.max_pool2d(x, 2, 2) x = x.view(-1, 5*5*40) x = self.fc(x) return x
实现样式如下所示,由两个卷积层和一个线性层构成。
Net( (conv1): Conv2d(1, 40, kernel_size=(3, 3), stride=(1, 1)) (conv2): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), groups=20) (fc): Linear(in_features=1000, out_features=10, bias=True) )
2. 网络训练
python from model import * import torch import torch.nn as nn import torch.optim as optim from torchvision import datasets, transforms import os import os.path as osp def train(model, device, train_loader, optimizer, epoch): model.train() lossLayer = torch.nn.CrossEntropyLoss() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = lossLayer(output, target) loss.backward() optimizer.step() if batch_idx % 50 == 0: print('Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), loss.item() )) def test(model, device, test_loader): model.eval() test_loss = 0 correct = 0 lossLayer = torch.nn.CrossEntropyLoss(reduction='sum') for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += lossLayer(output, target).item() pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {:.0f}%\n'.format( test_loss, 100. * correct / len(test_loader.dataset) )) if __name__ == "__main__": batch_size = 64 test_batch_size = 64 seed = 1 epochs = 15 lr = 0.01 momentum = 0.5 save_model = True using_bn = False torch.manual_seed(seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') train_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True ) test_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=test_batch_size, shuffle=True, num_workers=1, pin_memory=True ) if using_bn: model = NetBN().to(device) #采用的家BN网络模型 else: model = Net().to(device) #采用不加BN型的网络模型 optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) for epoch in range(1, epochs + 1): train(model, device, train_loader, optimizer, epoch) test(model, device, test_loader) if save_model: if not osp.exists('ckpt'): os.makedirs('ckpt') if using_bn: torch.save(model.state_dict(), 'ckpt/mnist_cnn.pt') else: torch.save(model.state_dict(), 'ckpt/mnist_cnn.pt')
在对网络模型训练之后会得到一个权重文件,即ckpt/mnist_cnnbn.pt
。
3. 对训练模型进行测试
首先我们对权重进行加载,然后计算出在全精度情况下,模型的准确度。
def full_inference(model, test_loader): correct = 0 for i, (data, target) in enumerate(test_loader, 1): output = model(data) pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() print('\nTest set: Full Model Accuracy: {:.0f}%\n'.format(100. * correct / len(test_loader.dataset))) batch_size = 64 using_bn = False if __name__ == "__main__": batch_size = 64 using_bn = False train_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True ) test_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=batch_size, shuffle=False, num_workers=1, pin_memory=True ) if using_bn: model = NetBN() model.load_state_dict(torch.load('ckpt/mnist_cnnbn.pt', map_location='cpu')) else: model = Net() model.load_state_dict(torch.load('ckpt/mnist_cnn.pt', map_location='cpu')) model.eval() full_inference(model, test_loader)