cienanos 发布的文章

import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

batch_size = 64
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
train_dataset = datasets.MNIST(root='./dataset/mnist/',
                               train=True,
                               download=True,
                               transform=transform)
train_loader = DataLoader(dataset=train_dataset,
                          shuffle=True,
                          batch_size=batch_size)
test_dataset = datasets.MNIST(root='./dataset/mnist/',
                              train=False,
                              download=True,
                              transform=transform)
test_loader = DataLoader(dataset=test_dataset,
                         shuffle=False,
                         batch_size=batch_size)


class Net(torch.nn.Module):  # 继承Module类
    def __init__(self):
        super(Net, self).__init__()  # 继承父类
        self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)  # 卷积层1,使用2维的卷积核,1通道->10通道,卷积核大小为5x5,
        self.pooling = torch.nn.MaxPool2d(2)  # 池化层,使用二维的最大池化层,图像大小的height和width都减半并向下取整
        self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)  # 卷积层2, 10通道->20通道,卷积核大小为5x5,
        # (batch_size,13,13,10)->(batch_size,11,11,20)
        self.l1 = torch.nn.Linear(320, 256)  # 全连接层
        self.l2 = torch.nn.Linear(256, 128)
        self.l3 = torch.nn.Linear(128, 64)
        self.l4 = torch.nn.Linear(64, 10)

    def forward(self, x):
        batch_size = x.size(0)
        x = F.gelu(self.pooling(self.conv1(
            x)))  # (batch_size,28,28,1)->(batch_size,26,26,10)->(batch_size,13,13,10) 因为没有做填充(padding)所以图像的大小减小了
        x = F.gelu(self.pooling(self.conv2(x)))  # (batch_size,13,13,10)->(batch_size,6,6,20)->(batch_size,4,4,20)
        x = x.view(batch_size, -1)  # (batch_size,4*4*20)将张量展平接入全连接层
        x = F.gelu(self.l1(x))
        x = F.gelu(self.l2(x))
        x = F.gelu(self.l3(x))
        x = self.l4(x)

        return x  # 通过带有激活函数的线性层获得输出


model = Net()  # 实例化神经网络
device = torch.device('cuda:0' if torch.cuda.is_available() else 'CPU')  # 如果能使用GPU,就将模型移植到GPU上用cuda核心进行并行计算
model.to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01)


def train(epoch):
    running_loss = 0
    for batch_idx, data in enumerate(train_loader):
        inputs, target = data
        inputs, target = inputs.to(device), target.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        running_loss += loss
        if batch_idx % 300 == 299:
            print('[%d,%5d] loss:%.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
            running_loss = 0


def test():
    corrent = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, prediction = torch.max(outputs, dim=1)
            total += labels.size(0)
            corrent += (prediction == labels).sum().item()
    print('Accuracy on test set %.2f %%' % (100 * corrent / total))


if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        test()

import torch  # 引入模块PyTorch
from torchvision import transforms  # 从torch视觉中引入转换函数
from torchvision import datasets  # 导入数据库
from torch.utils.data import DataLoader  # 导入数据加载器
import torch.nn.functional as F  # 导入激活函数
import torch.optim as optim  # 导入优化器

# 使用的数字分类数据集,只有一个灰度通道0-255
batch_size = 64
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图像的通道从[0,255]映射到[0,1]
    transforms.Normalize((0.1307,), (0.3081,))  # 根据手写数字集的平均值和标准差来实现归一化
])  # 定义mnist数据集转化管道
train_dataset = datasets.MNIST(root='./dataset/mnist/',  # 设置数据集存放的根目录
                               train=True,  # 选择是否是训练集
                               download=True,  # 是否从网上下载
                               transform=transform)  # 使用可选参数trannsform来使用前面定义的转换
train_loader = DataLoader(dataset=train_dataset,  # 使用数据加载器来读入训练集数据
                          shuffle=True,  # 使用可选参数shuffle=True来打乱训练集
                          batch_size=batch_size)  # 使用可选参数batch_size来确定每批的数据数量
test_dataset = datasets.MNIST(root='./dataset/mnist/',
                              train=False,  # train=False 在这里代表选用了测试集
                              download=True,
                              transform=transform)
test_loader = DataLoader(dataset=test_dataset,
                         shuffle=False,  # 测试机数据不用打乱
                         batch_size=batch_size)  # 每批数据数量同前面一样


class Net(torch.nn.Module):  # 创建网络类
    def __init__(self):  # 初始化类
        super(Net, self).__init__()  # 继承父类
        self.l1 = torch.nn.Linear(784, 512)  # 实现线性层
        self.l2 = torch.nn.Linear(512, 256)
        self.l3 = torch.nn.Linear(256, 128)
        self.l4 = torch.nn.Linear(128, 64)
        self.l5 = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 784)  # 将输入的shape=(28,28)的张量展平为(batch_size,28*28)
        x = F.gelu(self.l1(x))  # 将输入通过线性层后再经由激活函数gelu进入到下一线性层
        x = F.gelu(self.l2(x))
        x = F.gelu(self.l3(x))
        x = F.gelu(self.l4(x))
        return self.l5(x)


model = Net()  # 将神经网络模型实例化给model
criterion = torch.nn.CrossEntropyLoss()  # 定义多分类损失函数
# 使用SGD(Stochastic Gradient for Tensor Decomposition)随机梯度下降优化器来修正模型的参数
# 并定义了学习率和动量来加速网络的收敛
optimizer = torch.optim.SGD(model.parameters(), lr=0.01,
                            momentum=.5)


def train(epoch):  # 定义训练函数
    running_loss = 0  # 初始化损失为0
    for batch_idx, (x, y) in enumerate(train_loader):  # 从可迭代对象train_loader中获取获取批数,还有输入和输出
        inputs, target = x, y  # 将x,y分别给输入和目标值
        optimizer.zero_grad()  # 初始化将梯度置0
        outputs = model(inputs)  # 输入经由model的正向传播得到输入
        loss = criterion(outputs, target)  # 通过criterion函数来计算损失
        loss.backward()  # 将损失函数进行反向传播计算梯度
        optimizer.step()  # 根据梯度来更新参数
        running_loss += loss  # 计算每300批的损失值
        if batch_idx % 300 == 299:
            print('[%d,%5d] loss:%.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))  # 输出300批损失值的平均值
            running_loss = 0  # 将损失值置0


def test():  # 定义测试集
    corrent = 0  # 定义正确率
    total = 0  # 定义总数
    with torch.no_grad():  # 因为是测试集,不用计算跟踪梯度
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            _, prediction = torch.max(outputs, dim=1)  # torch.max()函数按维度dim返回最大值的那个元素和索引
            total += labels.size(0)  # labels的size(0)就是每批数据的数目
            corrent += (prediction == labels).sum().item()  # 用预测正确的数目的和除以总数目来获得正确率
    print('Accuracy on test set %d %%' % (100 * corrent / total))


if __name__ == '__main__':
    for epoch in range(10):  # 训练十轮
        train(epoch)
        test()

import torch
import torch.nn.functional as F  # 从torch引入激活函数

x_data = torch.tensor([[1.0], [2.0], [3.0]]).cuda()  # 将数据放在GPU上
y_data = torch.tensor([[0.0], [0.0], [1.0]]).cuda()


class LogisticRegressionModel(torch.nn.Module):  # 继承torch.nn.Module
    def __init__(self):  # 初始化类
        super(LogisticRegressionModel, self).__init__()  # 继承父类
        self.linear = torch.nn.Linear(1, 1)  # 创建线性层

    def forward(self, x):  # 定义正向传播
        y_pred = F.sigmoid(self.linear(x))  # 将线性层输出的结果经过sigmoid激活函数
        return y_pred


model = LogisticRegressionModel().cuda()  # 实例化对象为model然后将model的计算图放在GPU上
criterion = torch.nn.BCELoss(size_average=False).cuda()  # 创建损失函数,BCELoss为二分类的损失函数,并设置size_average=False不平均损失
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)  # 创建优化器SGD来优化model的parameters,并设置learning_rate=1e-2
for epoch in range(100):  # 进行100轮训练
    y_pred = model(x_data)  # 通过model的正向传播得到y_pred
    loss = criterion(y_pred, y_data)  # 将预测值与真实值进行计算损失函数
    print(epoch + 1, loss.item())  # 输出轮数和损失函数
    optimizer.zero_grad()  # 进行反向传播前将梯度置0
    loss.backward()  # 损失函数进行反向传播来计算梯度
    optimizer.step()  # 根据计算的梯度来更新参数权重
print(model(torch.tensor([[4.0]]).cuda()))  # 使用模型来预测值

这里选择将模型和loss还有数据都在GPU上,方便后续能够将大型神经网络的计算图放在GPU上来加快训练速度

import torch  # 引入PyTorch模块

x = torch.tensor([[1.0], [2.0], [3.0]]).cuda()  # 创建张量x,y并使用cuda()将数据存放在GPU上使用cuda核心来进行并行计算
y = torch.tensor([[2.0], [4.0], [6.0]]).cuda()  # 在数据量和网络小时,使用CPU速度会更快


class LinearModel(torch.nn.Module):  # 创建线性模型类并继承类torch.nn.Module
    def __init__(self):  # 初始化类
        super(LinearModel, self).__init__()  # 继承父类的初始化
        self.linear = torch.nn.Linear(1, 1)  # 通过torch的线性神经网络层来创建类属性linear

    def forward(self, x):  # 自定义正向传播方式
        y_pred = self.linear(x)  # 输出经由线性层输出得到y_pred
        return y_pred  # 最后返回y_pred的值


model = LinearModel().cuda()  # 将模型的计算图放在GPU上
criterion = torch.nn.MSELoss(size_average=False).cuda()  # 为y_pred与y_true的损失定义损失函数,并不平均损失函数,同时将损失函数放在GPU上
optimizer = torch.optim.Adam(model.parameters(),
                             lr=0.01)  # 定义优化器Aadm来优化模型(model)的参数(parameters)并给定学习率learning_rate(lr)来优化线性层权重
for epoch in range(1000):  # 训练100个周期
    y_pred = model(x)  # y_pred(即预测值)由模型(model)正向传播获得
    loss = criterion(y_pred, y)  # 通过损失函数criterion来计算y_true和y_pred即真实值和预测值之间的误差$loss=\sqrt{(y-\hat{y})^2}$
    print(epoch + 1, loss)  # 打印出训练轮数和损失值
    optimizer.zero_grad()  # 将梯度初始化为0
    loss.backward()  # 通过反向传播来求的梯度
    optimizer.step()  # 优化器通过反向传播获得的梯度来更新参数
print('w=', model.linear.weight.item())
print('b=', model.linear.bias.item())
x_test = torch.tensor([[4.0]]).cuda()  # 模型的计算图和损失函数都放在GPU上,测试数据也需要放在GPU上
y_test = model(x_test)
print('y_pred=', y_test.data)

一般来数据的device=('cuda:0')就说明数据放在了GPU的显存上
不使用cuda()函数时,默认数据都是放在内存中由CPU进行训练