-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
286 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# 数据操作 | ||
# 简单介绍如何使用torch去做基本操作 | ||
|
||
# 首先,我们导入 torch。请注意,虽然它被称为PyTorch,但我们应该导入 torch 而不是 pytorch | ||
import torch | ||
|
||
# 张量表示由一个数值组成的数组,这个数组可能有多个维度 | ||
x = torch.arange(12) | ||
var = x.shape # 张量的形状 | ||
print(x.numel()) # 张量中元素的总数 | ||
|
||
# 要改变一个张量的形状而不改变元素数量和元素值,可以调用 reshape 函数 | ||
X = x.reshape(3, 4) | ||
var2 = X.shape | ||
print(X.numel()) | ||
|
||
# 使用全0、全1、其他常量或者从特定分布中随机采样的数字 | ||
x1 = torch.zeros((2, 3, 4)) # 2层3行4列 | ||
x2 = torch.ones((2, 3, 4)) | ||
x3 = torch.randn(3, 4) | ||
|
||
# 通过提供包含数值的 Python 列表(或嵌套列表)来为所需张量中的每个元素赋予确定值 | ||
x4 = torch.tensor([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) | ||
|
||
# 常见的标准算术运算符(+、-、*、/ 和 **)都可以被升级为按元素运算 | ||
x = torch.tensor([1.0, 2, 4, 8]) | ||
y = torch.tensor([2, 2, 2, 2]) | ||
x5 = x + y | ||
x6 = x - y | ||
x7 = x * y | ||
x8 = x / y | ||
x9 = x ** y | ||
|
||
# 按元素方式应用更多的计算 | ||
x10 = torch.exp(x) | ||
|
||
# 我们也可以把多个张量 连结(concatenate) 在一起 | ||
X = torch.arange(12, dtype=torch.float32).reshape((3, 4)) | ||
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]) | ||
x11 = torch.cat((X, Y), dim=0) # 续接 | ||
x12 = torch.cat((X, Y), dim=1) # 同向量接 | ||
|
||
# 通过 逻辑运算符 构建二元张量 | ||
X == Y | ||
|
||
# 对张量中的所有元素进行求和会产生一个只有一个元素的张量 | ||
x13 = X.sum() | ||
|
||
# 即使形状不同,我们仍然可以通过调用 广播机制 (broadcasting mechanism) 来执行按元素操作 | ||
a = torch.arange(3).reshape((3, 1)) | ||
b = torch.arange(2).reshape((1, 2)) | ||
x14 = a + b | ||
|
||
# 可以用 [-1] 选择最后一个元素,可以用 [1:3] 选择第二个和第三个元素 | ||
X[-1], X[1:3] | ||
|
||
# 除读取外,我们还可以通过指定索引来将元素写入矩阵 | ||
X[1, 2] = 9 | ||
|
||
# 为多个元素赋值相同的值,我们只需要索引所有元素,然后为它们赋值 | ||
X[0:2, :] = 12 | ||
X | ||
|
||
# 运行一些操作可能会导致为新结果分配内存 | ||
before = id(Y) | ||
Y = Y + X | ||
id(Y) == before | ||
|
||
# 执行原地操作 | ||
Z = torch.zeros_like(Y) | ||
print('id(Z):', id(Z)) | ||
Z[:] = X + Y | ||
print('id(Z):', id(Z)) | ||
|
||
# 如果在后续计算中没有重复使用 X,我们也可以使用 X[:] = X + Y 或 X += Y 来减少操作的内存开销 | ||
before = id(X) | ||
X += Y | ||
id(X) == before | ||
|
||
# 转换为 NumPy 张量 | ||
A = X.numpy() | ||
B = torch.tensor(A) | ||
type(A), type(B) | ||
|
||
# 将大小为1的张量转换为 Python 标量 | ||
a = torch.tensor([3.5]) | ||
a, a.item(), float(a), int(a) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# 数据预处理 | ||
|
||
# 创建一个人工数据集,并存储在csv(逗号分隔值)文件 | ||
import os | ||
|
||
os.makedirs(os.path.join('..', 'data'), exist_ok=True) | ||
data_file = os.path.join('..', 'data', 'house_tiny.csv') | ||
with open(data_file, 'w') as f: | ||
f.write('NumRooms,Alley,Price\n') | ||
f.write('NA,Pave,127500\n') | ||
f.write('2,NA,106000\n') | ||
f.write('4,NA,178100\n') | ||
f.write('NA,NA,140000\n') | ||
|
||
# 从创建的csv文件中加载原始数据集 | ||
import pandas as pd | ||
|
||
data = pd.read_csv(data_file) | ||
print(data) | ||
|
||
# 为了处理缺失的数据,典型的方法包括插值和删除, 这里,我们将考虑插值 | ||
inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2] | ||
inputs = inputs.fillna(inputs.mean()) | ||
print(inputs) | ||
|
||
# 对于inputs中的类别值或离散值,我们将“NaN”视为一个类别 | ||
inputs = pd.get_dummies(inputs, dummy_na=True) | ||
print(inputs) | ||
|
||
|
||
# 现在inputs和outputs中的所有条目都是数值类型,它们可以转换为张量格式 | ||
import torch | ||
|
||
X, y = torch.tensor(inputs.values), torch.tensor(outputs.values) | ||
X, y |