数组拼接是将多个数组合并成一个数组的操作。
1. np.concatenate() - 通用拼接函数import numpy as np
# 一维数组拼接
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
c = np.concatenate([a, b]) # [1 2 3 4 5 6]
# 二维数组按轴拼接
arr1 = np.array([[1, 2], [3, 4]])
arr2 = np.array([[5, 6], [7, 8]])
# 按行拼接(axis=0,默认)
row_concat = np.concatenate([arr1, arr2], axis=0)
# [[1 2]
# [3 4]
# [5 6]
# [7 8]]
# 按列拼接(axis=1)
col_concat = np.concatenate([arr1, arr2], axis=1)
# [[1 2 5 6]
# [3 4 7 8]]
2. np.vstack() - 垂直堆叠(按行)
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
vstack_result = np.vstack([a, b])
# [[1 2 3]
# [4 5 6]]
# 等同于 concatenate(axis=0)
3. np.hstack() - 水平堆叠(按列)
a = np.array([[1], [2], [3]])
b = np.array([[4], [5], [6]])
hstack_result = np.hstack([a, b])
# [[1 4]
# [2 5]
# [3 6]]
# 等同于 concatenate(axis=1)
4. np.dstack() - 深度堆叠(按第三维)
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
dstack_result = np.dstack([a, b])
# [[[1 5]
# [2 6]]
# [[3 7]
# [4 8]]]
5. np.column_stack() - 列堆叠
# 一维数组转为列向量后水平拼接
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
col_stack = np.column_stack([a, b])
# [[1 4]
# [2 5]
# [3 6]]
6. np.row_stack() - 行堆叠
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
row_stack = np.row_stack([a, b])
# [[1 2 3]
# [4 5 6]]
数组分割是将一个数组拆分成多个子数组的操作。
1. np.split() - 通用分割函数arr = np.array([1, 2, 3, 4, 5, 6])
# 等量分割(必须能整除)
result = np.split(arr, 3)
# [array([1, 2]), array([3, 4]), array([5, 6])]
# 按位置分割
result = np.split(arr, [2, 4]) # 在第2和第4个位置后分割
# [array([1, 2]), array([3, 4]), array([5, 6])]
# 二维数组分割
arr_2d = np.array([[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12]])
# 按行分割(axis=0)
rows_split = np.split(arr_2d, 3, axis=0)
# 按列分割(axis=1)
cols_split = np.split(arr_2d, 2, axis=1)
2. np.vsplit() - 垂直分割(按行)
arr = np.array([[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12]])
vsplit_result = np.vsplit(arr, 3)
# [array([[1, 2, 3, 4]]),
# array([[5, 6, 7, 8]]),
# array([[9, 10, 11, 12]])]
# 按指定行数分割
vsplit_result2 = np.vsplit(arr, [1]) # 在第1行后分割
# [array([[1, 2, 3, 4]]),
# array([[5, 6, 7, 8],
# [9, 10, 11, 12]])]
3. np.hsplit() - 水平分割(按列)
arr = np.array([[1, 2, 3, 4],
[5, 6, 7, 8]])
hsplit_result = np.hsplit(arr, 2)
# [array([[1, 2],
# [5, 6]]),
# array([[3, 4],
# [7, 8]])]
# 按指定列数分割
hsplit_result2 = np.hsplit(arr, [1, 3]) # 在第1列和第3列后分割
4. np.array_split() - 不等量分割
arr = np.array([1, 2, 3, 4, 5, 6, 7])
# 可以处理不能整除的情况
result = np.array_split(arr, 4)
# [array([1, 2]), array([3, 4]), array([5, 6]), array([7])]
# 二维数组不等量分割
arr_2d = np.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
[10, 11, 12]])
result = np.array_split(arr_2d, 3, axis=0) # 按行不等量分割
5. np.dsplit() - 深度分割(按第三维)
arr = np.array([[[1, 2, 3, 4],
[5, 6, 7, 8]],
[[9, 10, 11, 12],
[13, 14, 15, 16]]])
dsplit_result = np.dsplit(arr, 4)
# 分成4个深度层的数组
# 模拟图像块拼接
image_blocks = [np.ones((100, 100)) * i for i in range(4)]
# 拼成2x2的网格
top_row = np.hstack([image_blocks[0], image_blocks[1]])
bottom_row = np.hstack([image_blocks[2], image_blocks[3]])
full_image = np.vstack([top_row, bottom_row])
示例2:数据批处理分割
# 数据集分批处理
data = np.random.randn(1000, 10) # 1000个样本,10个特征
# 分成训练集、验证集、测试集
train, val, test = np.split(data, [700, 850])
print(f"训练集: {train.shape}, 验证集: {val.shape}, 测试集: {test.shape}")
示例3:滑动窗口创建
def create_sliding_windows(arr, window_size, stride=1):
"""创建滑动窗口"""
n_windows = (len(arr) - window_size) // stride + 1
windows = np.array([arr[i*stride:i*stride+window_size]
for i in range(n_windows)])
return windows
arr = np.arange(10)
windows = create_sliding_windows(arr, window_size=3, stride=2)
# 避免多次小拼接,尽量一次完成
# 不推荐
result = arr1
for arr in arrays_list:
result = np.concatenate([result, arr])
# 推荐
result = np.concatenate([arr1] + arrays_list)
# 预分配内存(对于大型数组)
total_size = sum(arr.shape[0] for arr in arrays_list)
result = np.empty((total_size, arrays_list[0].shape[1]))
pos = 0
for arr in arrays_list:
result[pos:pos+arr.shape[0]] = arr
pos += arr.shape[0]
这些拼接和分割函数是NumPy数据处理的核心工具,熟练掌握能极大提高数据处理的效率。