pytorch 教程

AI拉呱

29 min readMay 5, 2020

pytorch教程

toch 是lua语言编写的tensor张量操作库。
PyTorch提供了两个高级功能： * 具有强大的GPU加速的张量计算（如Numpy） * 包含自动求导系统的深度神经网络

基础教程

核心组件

张量
tensor 的Autograd
nn.Module类，用来建立任何其他神经类分类
优化器
- * 随机梯度下降（SGD），
- * Adam, Adadelta, Adagrad, SpareAdam,
- * L-BFGS，
- * RMSprop
损失函数
- * 二元和多类交叉熵，
- * mean squared and mean absolute errors
- * smooth L1 loss
- * neg log-likelihood loss
- * Kullback-Leibler divergence

基本流程

将神经网络构造为自定义类(从nn.Module集成），其中包含隐藏层张量以及forward通过各种层和激活函数传播输入张量的方法
使用此forward方法通过网络传播特征张量得到一个输出的output张量
计算了loss通过比较output在地上真相，并使用内置的损失函数
传播的梯度loss使用自动分化能力（Autograd）与backward方法
使用损耗的梯度来更新网络的权重（这是通过执行所谓的优化器的一个步骤来实现的）optimizer.step()。

dataset

数据需要分为：训练数据集（train）、验证集（valid）、测试集（test）==8：1：1
制作存放有图片路径及其标签的 txt
Datasets类是pytorch读取数据的基类，
制作图片数据的索引（相对路径，相对训练的py文件的地址）
构建Dataset子类
数据增强与数据标准化
数据中心化，仅减均值
数据标准化：减均值，再除以标准差
transforms的二十二个方法
裁剪 — — Crop
中心裁剪:transforms.CenterCrop
随机裁剪:transforms.RandomCrop
随机长宽比裁剪:transforms.RandomResizedCrop
上下左右中心裁剪:transforms.FiveCrop
上下左右中心裁剪后翻转，transforms.TenCrop
翻转和旋转 — — Flip and Rotation
依概率 p 水平翻转:transforms.RandomHorizontalFlip(p=0.5)
依概率 p 垂直翻转:transforms.RandomVerticalFlip(p=0.5)
随机旋转:transforms.RandomRotation
图像变换
resize:transforms.Resize
标准化:transforms.Normalize转为 tensor，并归一化至[0–1]:transforms.ToTensor
填充:transforms.Pad
修改亮度、对比度和饱和度:transforms.ColorJitter
转灰度图:transforms.Grayscale
线性变换:transforms.LinearTransformation()
仿射变换:transforms.RandomAffine
依概率 p 转为灰度图:transforms.RandomGrayscale
将数据转换为 PILImage:transforms.ToPILImage
transforms.Lambda:Apply a user-defined lambda as a transform.
对 transforms 操作，使数据增强更灵活
transforms.RandomChoice(transforms)，从给定的一系列
2.
transforms 中选一个进行操作 transforms.RandomApply(transforms, p=0.5)，给一个 transform 加上概率，依概率进行操作
transforms.RandomOrder，将 transforms 中的操作随机打乱

模型

模型的搭建

首先，必须继承 nn.Module 这个类，要让 PyTorch 知道这个类是一个 Module。
其次，在__init__(self)中设置好需要的“组件”(如 conv、pooling、Linear、BatchNorm 等)。
最后，在 forward(self, x)中用定义好的“组件”进行组装，就像搭积木，把网络结构搭建出来，这样一个模型就定义好了。
第一行是初始化，往后定义了一系列组件，如由 Conv2d 构成的 conv1，有 MaxPool2d 构成的 poo1l，这些操作均由 torch.nn 􏰀供，torch.nn 中的操作可查看文档

模型 Finetune

损失函数和优化器

torch 简单的操作

import torcht=torch.tensor([[1.,-1.],[1.,-1]])
print(t)
print(t.dtype)
print(t.shape)
print(t.device)

张量

# -*- coding:utf-8 -*-
# /usr/bin/python
'''
@Author  :  Errol 
@Describe:  
@Evn     :  
@Date    :   - 
'''
import torch
import numpy as np# 判断是否是张量
print(torch.is_tensor([[1,2],[2,3]]))
# 判断张量的元素个数
a = torch.tensor(2)
print(torch.numel(a))tensor_a = torch.randn(1,2,3)
print(torch.numel(tensor_a))
tensor_b = torch.zeros(4,5)
print(tensor_b,type(tensor_b),type(type(tensor_b)))# 创建对角为1，其他为0的2维张量
eyes = torch.eye(4)
print(eyes)# numpy的array转化为torch的tensor
numpy_arr = np.array([[1,2],[2,3]])
print('numpy_arr',numpy_arr,type(numpy_arr),type(type(numpy_arr)))
arr_tensor = torch.from_numpy(numpy_arr)
print("arr_tensor",arr_tensor,type(arr_tensor),type(type(numpy_arr)))
arr_tensor[(1,-1)]=5
print(arr_tensor)# 在区间start 和end上均匀间隔steps 个点
linspace_tensor = torch.linspace(0,10,steps = 100,)
print(linspace_tensor,type(linspace_tensor),type(type(linspace_tensor)))
print('torch.Size()',linspace_tensor.size())# 在区间10^start 10^end上以对数刻度均匀间隔的steps个点。输出1维张量的长度为steps
logs_tensor = torch.logspace(-10,10,steps = 10)
print("logs_tensor",logs_tensor,type(logs_tensor))# 全1 张量
ones_tensor = torch.ones((3,3))
print("ones_tensor",ones_tensor,type(ones_tensor),ones_tensor.size())# 全0 tensor张量
zeros_tensor = torch.zeros(2,3)
print('zeros_tensor',zeros_tensor,zeros_tensor.size())# 0到1内的随机数张量
rand_tensor = torch.randn(4)
print('rand_tensor',rand_tensor,type(rand_tensor))
rand_tensor = torch.randn([4,4])
print('rand_tensor',rand_tensor,type(rand_tensor))# 固定间隔取值,左闭右开
arange_rand = torch.arange(1,4,0.2)
print('arange_rand',arange_rand,type(arange_rand),arange_rand.size())# 张量前后串联
x = torch.randn([2,3])
y = torch.cat((x,x,x,x),0)# 按行拼接
print("y",y.size(),y)y = torch.cat((x,x,x,x),1)# 按列拼接
print("y",y.size(),y)# 拆分
z = torch.split(y,(1,1),dim =0)
print("z",z,type(z))# 转至
y1 = torch.t(y)
print("y1", y1, type(y1),y1.size())
#
# # 创建正太分布的张量
# std_tensor = torch.normal(mean=torch.arange(1, 6))
# print('std_tensor',std_tensor,std_tensor.size())# 保存对象
torch.save(z,f = './z.pkl')
# 加载对象
zz = torch.load(f = './z.pkl')
print('zz',zz,type(zz))# 计算输入向量的绝对值
y2 = torch.abs(y1)
print('y2',y2,type(y2),y2.size())x1 = torch.abs(torch.FloatTensor([-1.1,2.1,-6,-8]))
print(x1,type(x1),x1.size())# 求每个张量值的反余弦值
a = torch.rand(4)
print("a",a)
a = torch.acos(a)
print("a反余弦函数",a)# 求每个张量值的余弦值
a = torch.rand(4)
print("a",a)
a = torch.cos(a)
print("a余弦函数",a)# 求每个张量值的双曲余弦值
a = torch.rand(4)
print("a",a)
a = torch.cosh(a)
print("a双曲余弦函数",a)# 求每个张量元素值的反正弦值
a = torch.rand(4)
print("a",a)
a = torch.asin(a)
print("a反正弦函数",a)# 求每个张量元素值的反正切函数
a = torch.rand(4)
print("a",a)
a = torch.atan(a)
print("a反正切",a)# 张量加固定的标量值
b = torch.add(a,30)
print('b',b,b.size())# 除法 张量的每个值除以标量值
zz = torch.div(b,3)
print("除法",zz,zz.size())# 除法 张量的每个值除以另一个张量的每个值
zz = torch.div(b,b)
print("除法两个维度相同的张量相处",zz,zz.size())
# 复杂数学计算：tensor + (tensor1/tensor2)*value
aa = torch.addcdiv(a,2,a,b)
print('aa',aa,aa.size())# 复杂计算：tensor + （tensor1*tensor2）*value
bb = torch.addcmul(a,2,a,b)
print('bb',bb,bb.size())# 天井函数，对张量的每个元素向上取整
a = torch.rand(4)
print("a",a)
a = torch.ceil(a)
print("天井函数向上取整",a)# 幂函数，以张量的每个元素作为底数，
a = torch.rand(4)
bb = torch.pow(a,4)
print('幂函数',bb,bb.size())# 指数对每个张量的元素取以e为底的指数
exp_one = torch.exp(ones_tensor)
print("指数",exp_one,exp_one.size())# 四舍五入
round_tensor = torch.round(exp_one)
print('四舍五入round_tensor',round_tensor)# 计算自然对数
a = torch.randn(5)
b = torch.log(a)
print('以自然数为底的对数',b,b.size())# 计算除法取余数
# 计算除法余数。 除数与被除数可能同时含有整数和浮点数。此时，余数的正负与被除数相同。
aa =torch.fmod(torch.Tensor([-3, -2, -1, 1, 2, 3]), 2)
print('除法取余数',aa)# 求平均值
mean_aa = torch.mean(aa,0)
print('平均值',mean_aa)# 求中位数
median_aa = torch.median(aa,0)
print("中位数", median_aa)# 求众数
mode_aa = torch.median(a,0)
print("众数",mode_aa)# 求标准差
std_tensor = torch.std(a,0)
print('标准差',std_tensor)# 求和
sum_tensor = torch.sum(a)
print("求和",sum_tensor)#方差
var_a = torch.var(a,0)
print("方差",var_a)# 两个张量的对应元素值比较，相等为1，不等为0
aa_tensor = torch.eq(a,a)
print("判断相等",aa_tensor)# 两个张量具有相同的形状和者相同的值，则为True
aa_equal = torch.equal(a,a)
print("相等",aa_equal)# 找最大值
max_a  = torch.max(a)
print('最大值',max_a)# 找最小值
min_a = torch.min(a)
print('最小值',min_a)# torch.Tensor是默认的tensor类型（torch.FlaotTensor)的简称。
aa = torch.FloatTensor([[-1,2,3],[3,-4,5]])
print(aa,type(aa),aa.size())
# 取绝对值覆盖原来的值 func_
aa.abs_()
print(aa,type(aa),aa.size())
# 扩展
x = torch.Tensor([[1],[2],[3]])
print(x.size())
x= x.expand(3,4)
print(x.size(),x)

网络搭建

class torch.nn.Module 所有网络的基类

# -*- coding:utf-8 -*-
# /usr/bin/python
'''
@Author  :  Errol 
@Describe:  
@Evn     :  
@Date    :   - 
'''
import torch.nn as nn
import torch.nn.functional as Fclass Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1,20,5)
        self.conv2 = nn.Conv2d(20, 20, 5)    def forward(self, x):
        x=F.relu(self.conv1(x))
        return F.relu(self.conv2(x))model = Model()
print('model.conv1',model.conv1)
print("model.conv2", model.conv2)for sub_module in model.children():
    print(sub_module,type(type(sub_module)))
# 当前模型的迭代器
for module in model.modules():
    print(module,type(type(module)))# 返回，包含模型当前子模块的迭代器，yield模块名字和模块本身
for name, module in model.named_children():
    if name in ['conv2d', 'conv5']:
        print(module)# 返回一个 包含模型所有参数 的迭代器。
print(" 返回一个 包含模型所有参数 的迭代器。")
for param in model.parameters():
    print(type(param.data), param.size())print('module.state_dict().keys()',module.state_dict().keys())import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
class Net(nn.Module):    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
net = Net()
print(net)
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight# 模拟网络输入
input_data = torch.randn(1,1,32,32)
out = net(input_data)
print("out",out)#损失函数
input_data = torch.randn(1,1,32,32)
output = net(input_data)
target = torch.randn(10)
target = target.view(1,-1)
print('target',target,type(target),target.size())
criterion = nn.MSELoss()
loss = criterion(output, target)
print('误差为：',loss)# 反向传播
net.zero_grad()
print('反向传播前的conv1的偏差值')
print(net.conv1.bias.grad)
loss.backward()
print('反向传播前的conv1的偏差值')
print(net.conv1.bias.grad)# 更新权重
learning_rate = 0.01
for f in  net.parameters():
    f.data.sub_(f.grad.data * learning_rate)params = list(net.parameters())
print(len(params))
print(params[0].size())# 优化器
optimizer = optim.SGD(net.parameters(),Ir = 0.01)
optimizer.zero_grad()
output = net(input_data)
loss = criterion(output,target)
loss.backward()
optimizer.step()

torchaudio 学习

# -*- coding:utf-8 -*-
# /usr/bin/python
'''
@Author  :  Errol 
@Describe:  
@Evn     :  
@Date    :   - 
'''import torch
import torchaudio
import matplotlib.pyplot as pltfile_path = './data/000-test.wav'
waveform,sample_rate = torchaudio.load(file_path)
print('waveform',waveform,type(waveform),'\nsample_rate:',sample_rate,type(sample_rate))
print("shape of waveform:{}".format(waveform.size))
print("sample rate of waveform:{}".format(sample_rate))
print("waveform.t(){}".format(waveform.t()))
print('type(waveform.t()))',type(waveform.t()))print("Min of waveform: {}\nMax of waveform: {}\nMean of waveform: {}".format(waveform.min(), waveform.max(), waveform.mean()))
# 正则化音频数据
def normalize(waveform):
    tensor_minusmean = waveform - waveform.mean()
    return tensor_minusmean/(tensor_minusmean.abs().max())plt.figure()
plt.plot(waveform.t().numpy())
#plt.show()# 声谱图
specgram = torchaudio.transforms.Spectrogram()(waveform)
print('specgram',specgram,type(specgram),specgram.size())log_specgram = specgram.log2()
print('log_specgram',log_specgram,type(log_specgram),log_specgram.size())log_specgram = log_specgram[0,:,:]
print('log_specgram2',log_specgram,type(log_specgram),log_specgram.size())numpy_specgram = log_specgram.numpy()
print('numpy_specgram',numpy_specgram,type(numpy_specgram))# plt.figure()
# # plt.imshow(numpy_specgram,cmap ="gray")
# # plt.show()# mel 梅尔谱图
melspecgram = torchaudio.transforms.MelSpectrogram()(waveform)
print('melspecgram',melspecgram,type(melspecgram),melspecgram.size())log_specgram = melspecgram.log2()
print('log_mel_specgram',log_specgram,type(log_specgram),log_specgram.size())log_specgram = log_specgram[0,:,:].detach()
print('log_specgram2',log_specgram,type(log_specgram),log_specgram.size())numpy_specgram = log_specgram.numpy()
print('numpy_specgram',numpy_specgram,type(numpy_specgram))# plt.figure()
# plt.imshow(numpy_specgram,cmap ="gray")
# plt.show()# 重采样
new_sampel_rate = sample_rate / 2
channels = 0
transformed = torchaudio.transforms.Resample(sample_rate,new_sampel_rate)(waveform[channels,:].view(1,-1))
print('transformed',transformed, type(transformed),transformed.size())
# plt.figure()
# plt.plot(transformed[0,:].numpy())
# plt.show()# 编码音频数据
transformed11 = torchaudio.transforms.MuLawEncoding()(waveform)
print('transformed33',transformed11, type(transformed11),transformed11.size())transformed = transformed11[0,:]
print('transformed44',transformed, type(transformed),transformed.size())
# plt.figure()
# plt.plot(transformed.numpy())
# plt.show()# 解码
restructed = torchaudio.transforms.MuLawDecoding()(transformed11)
print('restructed',restructed, type(restructed),restructed.size())
print('waveform',waveform, type(waveform),waveform.size())
plt.figure()
plt.plot(restructed[0,:].numpy())
# plt.show()# 对比编码前和解码后的音频差异
err = ((waveform -restructed).abs() / waveform.abs()).median()
print("差异:{:.2%}".format(err),type(err),err.size())# 加载kaldin_fft = 400.0
frame_length = n_fft / sample_rate * 1000.0
frame_shift = frame_length / 2.0params = {
    "channel": 0,
    "dither": 0.0,
    "window_type": "hanning",
    "frame_length": frame_length,
    "frame_shift": frame_shift,
    "remove_dc_offset": False,
    "round_to_power_of_two": False,
    "sample_frequency": sample_rate,
}specgram = torchaudio.compliance.kaldi.spectrogram(waveform, **params)print("Shape of spectrogram: {}".format(specgram.size()))plt.figure()
plt.imshow(specgram.t().numpy(), cmap='gray')
plt.show()# We also support computing the filterbank features from waveforms, matching Kaldi’s implementation.
fbank = torchaudio.compliance.kaldi.fbank(waveform, **params)print("Shape of fbank: {}".format(fbank.size()))plt.figure()
plt.imshow(fbank.t().numpy(), cmap='gray')

建立神经网络

步骤

1.定义一个包含可训练参数的神经网络
2.迭代整个输入
3.通过神经网络处理输入
4.计算损失(loss)
5.反向传播梯度到神经网络的参数
6.更新网络的参数，典型的用一个简单的更新方法：weight = weight — learning_rate *gradient

示例

numpy NN 示例

# -*- coding:utf-8 -*-
# /usr/bin/python
'''
-------------------------------------------------
   File Name   :  numpy_nn
   Description :  AIM: numpy nn
                  Functions: 1. 
                             2. 
   Envs        :  python == 
                  pip install  -i https://pypi.douban.com/simple 
   Author      :  errol
   Date        :  2020/5/4  21:57
   CodeStyle   :  规范,简洁,易懂,可阅读,可维护,可移植!
-------------------------------------------------
   Change Activity:
          2020/5/4 : text
-------------------------------------------------
'''import numpy as np# N是批量大小; D_in是输入维度;
# 49/5000 H是隐藏的维度; D_out是输出维度。
N, D_in, H, D_out = 64, 1000, 100, 10# 创建随机输入和输出数据
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)# 随机初始化权重
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)learning_rate = 1e-6
for t in range(5000):
    # 前向传递：计算预测值y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)    # 计算和打印损失loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)    # 反向传播，计算w1和w2对loss的梯度
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)    # 更新权重
    w1 -= learning_rate * grad_w1

pytorch 张量

# -*- coding: utf-8 -*-import torch
dtype = torch.float
device = torch.device("cpu")
# device = torch.device（“cuda：0”）＃取消注释以在GPU上运行# N是批量大小; D_in是输入维度;
# H是隐藏的维度; D_out是输出维度。
N, D_in, H, D_out = 64, 1000, 100, 10#创建随机输入和输出数据
x = torch.randn(N, D_in, device=device, dtype=dtype)
y = torch.randn(N, D_out, device=device, dtype=dtype)# 随机初始化权重
w1 = torch.randn(D_in, H, device=device, dtype=dtype)
w2 = torch.randn(H, D_out, device=device, dtype=dtype)learning_rate = 1e-6
for t in range(500):
    # 前向传递：计算预测y
    h = x.mm(w1)
    h_relu = h.clamp(min=0)
    y_pred = h_relu.mm(w2)    # 计算和打印损失
    loss = (y_pred - y).pow(2).sum().item()
    print(t, loss)    # Backprop计算w1和w2相对于损耗的梯度
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h < 0] = 0
    grad_w1 = x.t().mm(grad_h)    # 使用梯度下降更新权重
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

pytorch 教程

pytorch教程

基础教程

核心组件

基本流程

dataset

模型

模型的搭建

模型 Finetune

损失函数和优化器

torch 简单的操作

张量

网络搭建

torchaudio 学习

建立神经网络

步骤

示例

numpy NN 示例

pytorch 张量

Written by AI拉呱

No responses yet