我的代码在kaggle上跑的。
线性AE
import os
import datetime
import numpy as np
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.utils import save_image
from torchvision.datasets import MNIST
if not os.path.exists('./dc_img'):
os.mkdir('./dc_img')
def to_img(x):
x = 0.5 * (x + 1)
x = x.clamp(0, 1)
x = x.view(x.size(0), 1, 28, 28)
return x
num_epochs = 20
batch_size = 128
learning_rate = 1e-3
img_transform = transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
transforms.Normalize([0.5], [0.5])
])
dataset = MNIST('./data', transform=img_transform, download=True)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
class autoencoder(nn.Module):
def __init__(self):
super(autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Linear(28*28, 128),
nn.ReLU(True),
nn.Linear(128, 64),
nn.ReLU(True),
nn.Linear(64, 12),
nn.ReLU(True),
nn.Linear(12, 3)
)
self.decoder = nn.Sequential(
nn.Linear(3, 12),
nn.ReLU(True),
nn.Linear(12, 64),
nn.ReLU(True),
nn.Linear(64, 128),
nn.ReLU(True),
nn.Linear(128, 28*28),
nn.Tanh()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = autoencoder()
print(model)
# 查看网络流程
net = nn.Sequential(
nn.Linear(28*28, 128),nn.ReLU(True),
nn.Linear(128, 64),nn.ReLU(True),
nn.Linear(64, 12),nn.ReLU(True),
nn.Linear(12, 3),
nn.Linear(3, 12),nn.ReLU(True),
nn.Linear(12, 64),nn.ReLU(True),
nn.Linear(64, 128),nn.ReLU(True),
nn.Linear(128, 28*28),nn.Tanh())
X = torch.rand(size=(1,28*28), dtype=torch.float32)
for layer in net:
print(layer.__class__.__name__,'output shape: \t',X.shape)
X = layer(X)
if torch.cuda.is_available():
model.cuda()
print('cuda is OK!')
# model = model.to('cuda')
else:
print('cuda is NO!')
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,weight_decay=1e-5)
starttime = datetime.datetime.now()
for epoch in range(num_epochs):
for data in dataloader:
img, label = data
img = img.view(img.size(0), -1)
img = Variable(img).cuda()
# ===================forward=====================
output = model(img)
loss = criterion(output, img)
# ===================backward====================
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ===================log========================
endtime = datetime.datetime.now()
print('epoch [{}/{}], loss:{:.4f}, time:{:.2f}s'.format(epoch+1, num_epochs, loss.item(), (endtime-starttime).seconds))
# if epoch % 10 == 0:
pic = to_img(output.cpu().data)
save_image(pic, './dc_img/image_{}.png'.format(epoch))
torch.save(model.state_dict(), './conv_autoencoder.pth')
import matplotlib.pyplot as plt
from scipy.stats import norm
n = 15 #15*15 225个数字图片
digit_size = 28
figure = np.zeros((digit_size*n,digit_size*n))#最终图片
grid_x = norm.ppf(np.linspace(0.05,0.95,n))#假设隐变量空间符合高斯分布
grid_y = norm.ppf(np.linspace(0.05,0.95,n))#ppf随机取样
for i,yi in enumerate(grid_x):
for j, xi in enumerate(grid_y):
z_sample = np.array([[[[xi, yi]]]],dtype=np.float32)#重复z_sample多次,形成一个完整的batch
z_sample = np.tile(z_sample, batch_size*392).reshape(batch_size,784)
z_sample = torch.from_numpy(z_sample) # 转tensor
z_sample = z_sample.cuda()# 放到cuda上
output=model(z_sample)
digit=output[0].reshape(digit_size, digit_size)#128*784->28*28
digit = digit.cpu().detach().numpy() # 转numpy
figure[i*digit_size:(i+1)*digit_size,j*digit_size:(j+1)*digit_size] = digit
plt.figure(figsize=(10, 10))
plt.imshow(figure, cmap='Greys_r')
plt.show()
卷积AE
import os
import datetime
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.utils import save_image
from torchvision.datasets import MNIST
if not os.path.exists('./dc_img'):
os.mkdir('./dc_img')
def to_img(x):
x = 0.5 * (x + 1)
x = x.clamp(0, 1)
x = x.view(x.size(0), 1, 28, 28)
return x
num_epochs = 100
batch_size = 128
learning_rate = 1e-3
img_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5), (0.5))
])
dataset = MNIST('./data', transform=img_transform, download=True)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
class autoencoder(nn.Module):
def __init__(self):
super(autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(1, 16, 3, stride=3, padding=1), # b, 16, 10, 10
nn.ReLU(True),
nn.MaxPool2d(2, stride=2), # b, 16, 5, 5
nn.Conv2d(16, 8, 3, stride=2, padding=1), # b, 8, 3, 3
nn.ReLU(True),
nn.MaxPool2d(2, stride=1) # b, 8, 2, 2
)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(8, 16, 3, stride=2), # b, 16, 5, 5
nn.ReLU(True),
nn.ConvTranspose2d(16, 8, 5, stride=3, padding=1), # b, 8, 15, 15
nn.ReLU(True),
nn.ConvTranspose2d(8, 1, 2, stride=2, padding=1), # b, 1, 28, 28
nn.Tanh()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = autoencoder()
print(model)
# 查看网络流程
net = nn.Sequential(
nn.Conv2d(1, 16, 3, stride=3, padding=1), nn.ReLU(True),nn.MaxPool2d(2, stride=2),
nn.Conv2d(16, 8, 3, stride=2, padding=1),nn.ReLU(True),nn.MaxPool2d(2, stride=1),
nn.ConvTranspose2d(8, 16, 3, stride=2), nn.ReLU(True),
nn.ConvTranspose2d(16, 8, 5, stride=3, padding=1), nn.ReLU(True),
nn.ConvTranspose2d(8, 1, 2, stride=2, padding=1), nn.Tanh())
X = torch.rand(size=(1,1,28,28), dtype=torch.float32)
for layer in net:
print(layer.__class__.__name__,'output shape: \t',X.shape)
X = layer(X)
if torch.cuda.is_available():
model.cuda()
print('cuda is OK!')
# model = model.to('cuda')
else:
print('cuda is NO!')
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,weight_decay=1e-5)
starttime = datetime.datetime.now()
for epoch in range(num_epochs):
for data in dataloader:
img, label = data
img = Variable(img).cuda()
# ===================forward=====================
output = model(img)
loss = criterion(output, img)
# ===================backward====================
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ===================log========================
endtime = datetime.datetime.now()
print('epoch [{}/{}], loss:{:.4f}, time:{:.2f}s'.format(epoch+1, num_epochs, loss.item(), (endtime-starttime).seconds))
# if epoch % 10 == 0:
pic = to_img(output.cpu().data)
save_image(pic, './dc_img/image_{}.png'.format(epoch))
# torch.save(model.state_dict(), './conv_autoencoder.pth')
import matplotlib.pyplot as plt
from scipy.stats import norm
import numpy as np
n = 15 #15*15 225个数字图片
digit_size = 28
figure = np.zeros((digit_size*n,digit_size*n))#最终图片
grid_x = norm.ppf(np.linspace(0.05,0.95,n))#假设隐变量空间符合高斯分布
grid_y = norm.ppf(np.linspace(0.05,0.95,n))#ppf随机取样
for i,yi in enumerate(grid_x):
for j, xi in enumerate(grid_y):
z_sample = np.array([[[[xi, yi]]]],dtype=np.float32)#重复z_sample多次,形成一个完整的batch
z_sample = np.tile(z_sample, batch_size*392).reshape(batch_size,1,28,28)
z_sample = torch.from_numpy(z_sample) # 转tensor
z_sample = z_sample.cuda()# 放到cuda上
output=model(z_sample)
digit=output[0].reshape(digit_size, digit_size)#128*784->28*28
digit = digit.cpu().detach().numpy() # 转numpy
figure[i*digit_size:(i+1)*digit_size,j*digit_size:(j+1)*digit_size] = digit
plt.figure(figsize=(10, 10))
plt.imshow(figure, cmap='Greys_r')
plt.show()
参考:
pytorch-beginner/08-AutoEncoder at master · L1aoXingyu/pytorch-beginner (github.com)