Pytorch实战（一）

MNIST手写数字识别项目因为数据量小、识别任务简单而成为图像识别入门的第一课，MNIST手写数字识别项目有如下特点：

识别难度低，即使把图片展开为一维数据，且只使用全连接层也能获得超过98%的识别准确度；

计算量小，不需要GPU加速也可以快速训练完成；

数据易得，教程易得。

本文中的数据来源于Kaggle网站，并非Pytorch自带的数据集（个人认为使用Pytorch封装好的MNIST数据集不利于入门）。接下来我们展示不同思路下的MNIST识别结果。

1. 将MNIST视为回归问题和分类问题的差别

一般来说，我们认为MNIST是一个图像分类问题，而非回归问题，因为图像的特征并没有随着0-9数字增大而显现出的对应的特征值变化关系，即图像中的像素值与数字没有表现出相关关系。

全连接回归模式（使用MSELoss）准确率：

全连接回归网络

排行榜上清一色的0.9以上，这个成绩真的拿不出手啊。

全连接分类模式（使用CrossEntropy）准确率：

全连接分类模式——两次迭代

全连接分类模式——两百次迭代

使用分类模式，仅需两次迭代就能达到0.93的准确率，并且200次迭代之后，准确率达到了0.97。

2. 卷积神经网络

将图片一维展开尚且能达到0.97的准确率，如果让图片保持原状，使用卷积神经网络来识别能够得到什么结果呢。

卷积神经网络准确率：

卷积神经网络准确率

项目包含两个py文件，mnist_models.py和main.py，下面是mnist_models.py中的源代码：

import torch as tdef to_image(data): data = data.view(-1,1,28,28) return dataclass fc_net(t.nn.Module): ''' 全连接网络 ''' def __init__(self): super(fc_net,self).__init__() self.fc1 = t.nn.Sequential(t.nn.Linear(784,200),t.nn.ReLU()) self.fc2 = t.nn.Sequential(t.nn.Linear(200,100),t.nn.ReLU()) self.fc3 = t.nn.Sequential(t.nn.Linear(100,20),t.nn.ReLU()) self.fc4 = t.nn.Linear(20,10) def forward(self,x): x = self.fc1(x) x = self.fc2(x) x = self.fc3(x) x = self.fc4(x) return xclass conv_net(t.nn.Module): ''' 卷积网络，需先将数据转为2维图片形式 ''' def __init__(self): super(conv_net,self).__init__() self.conv1 = t.nn.Sequential( t.nn.Conv2d(1,10,5,1,1), t.nn.MaxPool2d(2), t.nn.ReLU(), t.nn.BatchNorm2d(10) ) self.conv2 = t.nn.Sequential( t.nn.Conv2d(10,20,5,1,1), t.nn.MaxPool2d(2), t.nn.ReLU(), t.nn.BatchNorm2d(20) # num_features为通道数 ) self.fc1 = t.nn.Sequential( t.nn.Linear(500,60), t.nn.Dropout(0.5), t.nn.ReLU() ) self.fc2 = t.nn.Sequential( t.nn.Linear(60,20), t.nn.Dropout(0.5), t.nn.ReLU() ) self.fc3 = t.nn.Linear(20,10) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = x.view(-1,500) x = self.fc1(x) x = self.fc2(x) x = self.fc3(x) return xclass AlexNet(t.nn.Module): ''' 类似AlexNet的神经网络，因为电脑配置及MNIST数据集图片尺寸问题，将Kernel_size和stride都改小了 ''' def __init__(self, num_classes=1000): super(AlexNet, self).__init__() self.features = t.nn.Sequential( t.nn.Conv2d(1, 64, kernel_size=5, stride=1, padding=2), t.nn.ReLU(inplace=True), t.nn.MaxPool2d(kernel_size=3, stride=1), t.nn.Conv2d(64, 192, kernel_size=3, padding=2), t.nn.ReLU(inplace=True), t.nn.MaxPool2d(kernel_size=3, stride=2), t.nn.Conv2d(192, 384, kernel_size=3, padding=1), t.nn.ReLU(inplace=True), t.nn.Conv2d(384, 256, kernel_size=3, padding=1), t.nn.ReLU(inplace=True), t.nn.Conv2d(256, 256, kernel_size=3, padding=1), t.nn.ReLU(inplace=True), t.nn.MaxPool2d(kernel_size=3, stride=2), ) self.classifier = t.nn.Sequential( t.nn.Dropout(), t.nn.Linear(256 * 6 * 6, 4096), t.nn.ReLU(inplace=True), t.nn.Dropout(), t.nn.Linear(4096, 4096), t.nn.ReLU(inplace=True), t.nn.Linear(4096, num_classes), ) def forward(self, x): x = self.features(x) print(x.shape) x = x.view(x.size(0), 256 * 6 * 6) print(x.shape) x = self.classifier(x) return x

下面是main.py中的源代码

import osimport pandas as pdimport torch.nn.functional as Ffrom torchvision import modelsimport matplotlib.pyplot as pltimport torch as tfrom tqdm import tqdmfrom torch.autograd import Variablefrom mnist_models import conv_net,to_image,fc_net,AlexNetimport signal# 设置模型参数TYPE = 'cla'METHOD = 'conv'EPOCHS = 400BATCH_SIZE = 500LR = 0.001# 读取数据train = pd.read_csv('./data/train.csv')data = train.drop('label',axis=1)test = pd.read_csv('./data/test.csv')test_data = t.from_numpy(test.values).float()data = data.values# 标签与自变量处理y = train['label'].valuesy = t.from_numpy(y).long()data = t.from_numpy(data).float()data,y = Variable(data),Variable(y)# 初始化模型if METHOD == 'conv': data = to_image(data) # 将数据转为二维 test_data = to_image(test_data) net = conv_net()elif METHOD == 'fc': net = fc_net()elif METHOD == 'res': # 使用resnet18进行迁移学习，微调参数，如果冻结参数，将resnet作为特征选择器的话，训练速度更快。 # 因为resnet参数过多，不建议使用CPU运算，使用Xeon E5620一个EPOCH要训练三个小时 data = to_image(data) test_data = to_image(test_data) net = models.resnet18(pretrained=True) # 固定参数 for p in net.parameters(): p.requires_grad = False # 因为MNIST图片是单通道，并且尺寸较小，所以需要对resnet进行一些细节修改 net.conv1 = t.nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=3, bias=False) net.maxpool = t.nn.MaxPool2d(kernel_size=2, stride=1, padding=1) net.avgpool = t.nn.AvgPool2d(5, stride=1) num_ftrs = net.fc.in_features net.fc = t.nn.Linear(num_ftrs,10)elif METHOD == 'alex': data = to_image(data) test_data = to_image(test_data) net = AlexNet()else: raise Exception("Wrong Method!")# 如果模型文件存在则尝试加载模型参数if os.path.exists('H:/learning_notes/MNIST/%s.pth' % METHOD): try: net.load_state_dict(t.load('H:/learning_notes/MNIST/%s.pth' % METHOD)) except Exception as e: print(e) print("Parameters Error")# 定义模型代价函数if TYPE == 'reg': criterion = t.nn.MSELoss()elif TYPE == 'cla': criterion = t.nn.CrossEntropyLoss()else: raise Exception("Wrong Type!")# 定义优化器if METHOD == 'res': # 如果是用的resnet，则只训练最后的全连接层的参数 optim = t.optim.Adam(net.fc.parameters(),lr = 0.001,weight_decay=0.0)else: optim = t.optim.Adam(net.parameters(),lr=0.001,weight_decay=0.0)# plt.ion() # 用于绘制动态图# losses = []# 用于捕捉KeyboardInterrupt错误，效果比try except好得多# 可以人为终止训练，并将训练得到的参数保存下来，实现断点训练def exit(signum, frame): print("Model Saved") t.save(net.state_dict(), 'H:/learning_notes/MNIST/%s.pth' % METHOD) raise KeyboardInterruptsignal.signal(signal.SIGINT, exit)signal.signal(signal.SIGTERM, exit)# 开始训练for epoch in tqdm(range(EPOCHS)): index = 0 if epoch % 100 == 0: for param_group in optim.param_groups: LR = LR * 0.9 param_group['lr'] = LR for i in tqdm(range(int(len(data)/BATCH_SIZE)),total=int(len(data)/BATCH_SIZE)): batch_x = data[index:index + BATCH_SIZE] batch_y = y[index:index + BATCH_SIZE] prediction = net.forward(batch_x) loss = criterion(prediction, batch_y) optim.zero_grad() loss.backward() optim.step() index += BATCH_SIZE # 进入下一个batch # if loss <= 0.3: # losses.append(loss) # plt.plot(losses) # plt.pause(0.001) print(loss)t.save(net.state_dict(),'H:/learning_notes/MNIST/%s.pth' % METHOD)# plt.ioff()submission = pd.read_csv("./data/sample_submission.csv")print('=======Predicting========')# 切换成验证模式，验证模式下DROPOUT将不起作用net.eval()test_data = Variable(test_data)result = t.Tensor()index = 0# 分段进行预测，节省内存for i in tqdm(range(int(test_data.shape[0]/BATCH_SIZE)),total=int(test_data.shape[0]/BATCH_SIZE)): label_prediction = net(test_data[index:index+BATCH_SIZE]) index += BATCH_SIZE result = t.cat((result,label_prediction),0)# 结果处理if TYPE == 'cla': _,submission['Label'] = t.max(result.data,1) # t.max返回一个元祖，第一个元素是最大元素值，第二个元素是最大元素位置elif TYPE == 'reg': submission['Label'] = submission['Label'].astype('int') submission['Label'] = submission['Label'].apply(lambda x:9 if x>= 10 else x)submission.to_csv("submission.csv",index=False)

本站仅提供存储服务，所有内容均由用户发布，如发现有害或侵权内容，请点击举报。