网络编程
位置:首页>> 网络编程>> Python编程>> pytorch cnn 识别手写的字实现自建图片数据

pytorch cnn 识别手写的字实现自建图片数据

作者:瓦力冫  发布时间:2023-04-18 02:39:22 

标签:pytorch,cnn,识别手写

本文主要介绍了pytorch cnn 识别手写的字实现自建图片数据,分享给大家,具体如下:


# library
# standard library
import os
# third-party library
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import torchvision
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
# torch.manual_seed(1)  # reproducible
# Hyper Parameters
EPOCH = 1        # train the training data n times, to save time, we just train 1 epoch
BATCH_SIZE = 50
LR = 0.001       # learning rate

root = "./mnist/raw/"

def default_loader(path):
 # return Image.open(path).convert('RGB')
 return Image.open(path)

class MyDataset(Dataset):
 def __init__(self, txt, transform=None, target_transform=None, loader=default_loader):
   fh = open(txt, 'r')
   imgs = []
   for line in fh:
     line = line.strip('\n')
     line = line.rstrip()
     words = line.split()
     imgs.append((words[0], int(words[1])))
   self.imgs = imgs
   self.transform = transform
   self.target_transform = target_transform
   self.loader = loader
   fh.close()
 def __getitem__(self, index):
   fn, label = self.imgs[index]
   img = self.loader(fn)
   img = Image.fromarray(np.array(img), mode='L')
   if self.transform is not None:
     img = self.transform(img)
   return img,label
 def __len__(self):
   return len(self.imgs)

train_data = MyDataset(txt= root + 'train.txt', transform = torchvision.transforms.ToTensor())
train_loader = DataLoader(dataset = train_data, batch_size=BATCH_SIZE, shuffle=True)

test_data = MyDataset(txt= root + 'test.txt', transform = torchvision.transforms.ToTensor())
test_loader = DataLoader(dataset = test_data, batch_size=BATCH_SIZE)

class CNN(nn.Module):
 def __init__(self):
   super(CNN, self).__init__()
   self.conv1 = nn.Sequential(     # input shape (1, 28, 28)
     nn.Conv2d(
       in_channels=1,       # input height
       out_channels=16,      # n_filters
       kernel_size=5,       # filter size
       stride=1,          # filter movement/step
       padding=2,         # if want same width and length of this image after con2d, padding=(kernel_size-1)/2 if stride=1
     ),               # output shape (16, 28, 28)
     nn.ReLU(),           # activation
     nn.MaxPool2d(kernel_size=2),  # choose max value in 2x2 area, output shape (16, 14, 14)
   )
   self.conv2 = nn.Sequential(     # input shape (16, 14, 14)
     nn.Conv2d(16, 32, 5, 1, 2),   # output shape (32, 14, 14)
     nn.ReLU(),           # activation
     nn.MaxPool2d(2),        # output shape (32, 7, 7)
   )
   self.out = nn.Linear(32 * 7 * 7, 10)  # fully connected layer, output 10 classes

def forward(self, x):
   x = self.conv1(x)
   x = self.conv2(x)
   x = x.view(x.size(0), -1)      # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
   output = self.out(x)
   return output, x  # return x for visualization
cnn = CNN()
print(cnn) # net architecture

optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)  # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss()            # the target label is not one-hotted

# training and testing
for epoch in range(EPOCH):
 for step, (x, y) in enumerate(train_loader):  # gives batch data, normalize x when iterate train_loader
   b_x = Variable(x)  # batch x
   b_y = Variable(y)  # batch y

output = cnn(b_x)[0]        # cnn output
   loss = loss_func(output, b_y)  # cross entropy loss
   optimizer.zero_grad()      # clear gradients for this training step
   loss.backward()         # backpropagation, compute gradients
   optimizer.step()        # apply gradients

if step % 50 == 0:
     cnn.eval()
     eval_loss = 0.
     eval_acc = 0.
     for i, (tx, ty) in enumerate(test_loader):
       t_x = Variable(tx)
       t_y = Variable(ty)
       output = cnn(t_x)[0]
       loss = loss_func(output, t_y)
       eval_loss += loss.data[0]
       pred = torch.max(output, 1)[1]
       num_correct = (pred == t_y).sum()
       eval_acc += float(num_correct.data[0])
     acc_rate = eval_acc / float(len(test_data))
     print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(test_data)), acc_rate))

图片和label 见上一篇文章《pytorch 把MNIST数据集转换成图片和txt》

结果如下:

pytorch cnn 识别手写的字实现自建图片数据

来源:http://www.waitingfy.com/archives/3549

0
投稿

猜你喜欢

手机版 网络编程 asp之家 www.aspxhome.com