8.7.数组操作

import numpy as np
import copy

Linux 5.4.0-74-generic
Python 3.9.5 @ GCC 7.3.0
Latest build date 2021.06.16
numpy version:  1.20.3

创建示例数据

data = copy.deepcopy(np.arange(50).reshape(10, 5))

改变数组形状(`shape`)

array.shape

# 创建示例数据
array = copy.deepcopy(data)
print(f"Original shape = {array.shape}")

# 会直接修改原始数组
array.shape = (5, 10)
print(f"New shape = {array.shape}")
# 可以设置某个维度为 -1，此时该维度的长度会被自动计算
array.shape = (25, -1)
print(f"New shape = {array.shape}")

Original shape = (10, 5)
New shape = (5, 10)
New shape = (25, 2)

np.reshape(a, newshape, order='C')

array.reshape不会直接修改原始数组，而是返回副本：

array.reshape(5, 10)
np.reshape(array, (5, 10))
array.reshape(5, -1)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]])

np.resize(a, newshape)

array.resize会就地修改原始数组，np.resize会返回副本：

array.resize(25, 2)
new_array = np.resize(array, (5, 10))
print(new_array.shape)

(5, 10)

.resize/.reshape有可能会改变数组的ndim属性：

print(f"Original ndim = {array.ndim}")
array.resize(5, 5, 2)
print(f"New ndim = {array.ndim}")

Original ndim = 2
New ndim = 3

array.flatten() / array.reval()

array.flatten方法将多维数组转为一维数组，总是返回副本：

array.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])

array.ravel方法将多维数组转为一维数组，非必要情况下不会返回副本，一般会返回视图：

array.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])

print(array.flatten().flags["OWNDATA"])
print(array.ravel().flags["OWNDATA"])
print(array.ravel(order="F").flags["OWNDATA"])

True
False
True

array.T

转置。

array = np.arange(12).reshape(3, 4)
print(array.T)

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]

更改维度数(`ndim`)

np.squeeze(a, axis=None) / array.squeeze()

将大小等于1的轴删除，返回副本：

array = data.reshape(1, 25, 2)
print(np.squeeze(array).shape)

array = data.reshape(25, 2, 1)
print(array.squeeze(axis=2).shape)

(25, 2)
(25, 2)

np.atleast_1d(*arys)

将输入转换为至少一维的数组。

print(np.atleast_1d(1))
print(np.atleast_1d(1, 2))

[1]
[array([1]), array([2])]

np.atleast_2d(*arys)

将输入转换为至少二维的数组。

print(np.atleast_2d([1, 2, 3, 4]))

[[1 2 3 4]]

np.atleast_3d(*arys)

将输入转换为至少三维的数组。

print(np.atleast_3d([[1, 2], [3, 4]]))

[[[1]
  [2]]

 [[3]
  [4]]]

np.expand_dims()

在指定位置插入大小为1的新轴。

print(data.shape)
print(np.expand_dims(data, 1).shape)

(10, 5)
(10, 1, 5)

轴操作(`axis`)

np.swapaxes(a, axis1, axis2) / array.swapaxes()

交换指定的两个轴（axis1和axis2），返回视图：

print(data.shape)
print(data.swapaxes(0, 1).shape)
print(data.swapaxes(0, 1).shape)
print(data.swapaxes(0, 1).flags["OWNDATA"])

(10, 5)
(5, 10)
(5, 10)
False

np.transpose(a, axes=None) / array.transpose()

改变轴的顺序，返回视图。如果axes=None，则默认将轴序改为逆序，例如原轴序shape=(1,2,3)，逆序则为(3,2,1）。

array = data.reshape((5, 5, 2))
print(array.transpose().shape)
print(array.transpose(0, 2, 1).shape)

(2, 5, 5)
(5, 2, 5)

np.moveaxis(a, source, destination)

将a的source轴移动到位置destination。source、destination可以是int of list，一次性移动多个轴。

x = np.zeros((3, 4, 5))
print(np.moveaxis(x, 0, -1).shape)
print(np.moveaxis(x, -1, 0).shape)

(4, 5, 3)
(5, 3, 4)

以下代码的效果一样：

x = np.zeros((3, 4, 5))
print(np.transpose(x).shape)
print(np.swapaxes(x, 0, -1).shape)
print(np.moveaxis(x, [0, 2], [-1, -2]).shape)
print(np.moveaxis(x, [0, 1, 2], [-1, -2, -3]).shape)

(5, 4, 3)
(5, 4, 3)
(4, 5, 3)
(5, 4, 3)

np.rollaxis(a, axis, start=0)

向后滚动指定的轴，直到它位于给定位置。使用np.moveaxis代替np.rollaxis可能更好。

a = np.ones((3, 4, 5, 6))
print("np.rollaxis(a, 2).shape", "-->", np.rollaxis(a, 2).shape)
print("np.rollaxis(a, 3, 1).shape", "-->", np.rollaxis(a, 3, 1).shape)
print("np.rollaxis(a, 1, 4).shape", "-->", np.rollaxis(a, 1, 4).shape)

np.rollaxis(a, 2).shape --> (5, 3, 4, 6)
np.rollaxis(a, 3, 1).shape --> (3, 6, 4, 5)
np.rollaxis(a, 1, 4).shape --> (3, 5, 6, 4)

拼接数组

np.concatenate(arrays, axis=None, out=None, *, dtype=None, casting=None)

沿着指定的轴拼接一系列数组。

arrays：包含数组的序列。待连接的数组除了axis轴之外，其他轴的大小必须相同。

a = np.arange(8).reshape(4, 2)
b = np.arange(6).reshape(3, 2)

# 沿着行方向纵向拼接
array = np.concatenate((a, b), axis=0)
print(array)

# 其他轴大小不一致，将出错
try:
    np.concatenate((a, b), axis=1)
except ValueError as e:
    print("ValueError", e)

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [0 1]
 [2 3]
 [4 5]]
ValueError all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 4 and the array at index 1 has size 3

np.stack(arrays, axis=0, out=None)

沿着“新轴”拼接数组。注意，np.hstack和np.vstack是沿着已存在的轴拼接数组。

out：储存结果的变量。

np.vstack(tup)

沿 0 轴拼接（垂直拼接），增加行数。

a = np.arange(8).reshape(4, 2)
b = np.arange(6).reshape(3, 2)

# 沿着行方向纵向拼接
array = np.vstack([a, b])
print(array)

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [0 1]
 [2 3]
 [4 5]]

np.hstack(tup)

沿 1 轴拼接（水平拼接），增加列数。

a = np.arange(3).reshape(3, 1)
b = np.arange(6).reshape(3, 2)

# 沿着列方向横向拼接
array = np.hstack([a, b])
print(array)

[[0 0 1]
 [1 2 3]
 [2 4 5]]

np.dstack(tup)

沿着3轴堆叠数组。

np.column_stack(tup)

类似于hstack，但是如果被拼接的数组是一维的，则将其形状修改为二维的 $(N, 1)$。

np.c_[]

类似于hstack，但是如果被拼接的数组是一维的，则将其形状修改为二维的$(N, 1)$，这和np.column_stack的效果一样。

a = np.arange(3).reshape(3, 1)
b = np.arange(6).reshape(3, 2)
print(np.c_[a, b] == np.hstack([a, b]))
print(np.c_[a, b])

[[ True  True  True]
 [ True  True  True]
 [ True  True  True]]
[[0 0 1]
 [1 2 3]
 [2 4 5]]

column_stack、np.c_和hstack的区别

a = np.arange(3)

print(np.hstack((a, a)), "\n")
print(np.column_stack((a, a)), "\n")
print(np.c_[a, a])

[0 1 2 0 1 2]

[[0 0]
 [1 1]
 [2 2]]

[[0 0]
 [1 1]
 [2 2]]

拆分数组

np.split(ary, indices_or_sections, axis=0)

沿着指定的轴拆分数组ary。

indices_or_sections：指定了拆分点：
1. 如果为整数 N，则表示平均拆分成 N 份。如果不能平均拆分，则报错。
2. 如果为序列，则该序列指定了划分区间（起点和终点无需指定），如[1,3]指定了区间：[0,1],[1,3],[3:]。

array = np.arange(50).reshape(10, 5)

try:
    np.split(array, 2, axis=0)
except ValueError as e:
    print("ValueError", e)

# [0,1) [1,3) [3:]
arrays = np.split(array, [1, 3], axis=1)
print(type(arrays))
print(len(arrays))

<class 'list'>
3

np.array_split(ary, indices_or_sections, axis=0)

作用与.split()类似。唯一的区别在于：当indices_or_sections为整数，且无法平均拆分时，并不报错，而是尽可能的维持平均拆分。

array = np.arange(50).reshape(10, 5)
arrays = np.array_split(array, 2, axis=1)
print(type(arrays))
print(len(arrays))

<class 'list'>
2

np.vsplit(ary, indices_or_sections)

沿着行方向（垂直方向）将数组拆分为多个子数组。

array = np.arange(4).reshape(4, 1)
np.vsplit(array, 2)

[array([[0],
        [1]]),
 array([[2],
        [3]])]

np.hsplit(ary, indices_or_sections)

沿着列方向（水平方向）将数组拆分为多个子数组。

array = np.arange(4).reshape(1, 4)
np.hsplit(array, 2)

[array([[0, 1]]), array([[2, 3]])]

np.dsplit(ary, indices_or_sections)

沿第3轴将数组拆分为多个子数组。

添加和删除数组元素

np.delete(arr, obj, axis=None)

沿指定轴删除指定的元素，返回未删除的数组。

a = np.arange(6).reshape((3, 2))
np.delete(a, 1, axis=0)

array([[0, 1],
       [4, 5]])

np.insert(arr, values, axis=None)

沿指定轴在指定索引之前插入值。

a = np.arange(4).reshape((2, 2))
# obj 是索引
np.insert(a, obj=1, values=[10, 11], axis=0)

array([[ 0,  1],
       [10, 11],
       [ 2,  3]])

np.append(arr, obj, values, axis=None)

将值附加到数组的末尾。

a = np.arange(4).reshape((2, 2))
np.append(a, [[4, 5], [6, 7]], axis=0)

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

np.trim_zeros(filt, trim='fb')

删除一维数组或序列中开头或结尾的零。

trim：trim='f'，从前面裁剪；trim='b'，从后面裁剪。默认trim='fb'。

a = np.array([0, 0, 1, 2, 3, 0, 0])
np.trim_zeros(a, trim='fb')

array([1, 2, 3])

np.unique(ar, return_index=False, return_inverse=False, return_counts=False, axis=None)

返回没有重复元素的数组。

默认返回一维数组：

a = np.array([[1, 2], [1, 2], [2, 3]])
print(np.unique(a))

[1 2 3]

设置axis参数：

a = np.array([[1, 2], [1, 2], [2, 3]])
print(np.unique(a, axis=0))

[[1 2]
 [2 3]]

同时返回unique数组元素在原数组中的索引：

a = np.array([[1, 2], [1, 2], [2, 3]])
print(np.unique(a, return_index=True, axis=0))

(array([[1, 2],
       [2, 3]]), array([0, 2]))

同时返回原数组元素在unique数组中的索引，可用于重建原数组：

a = np.array([[1, 2], [1, 2], [2, 3]])
np.unique(a, return_inverse=True, axis=0)

(array([[1, 2],
        [2, 3]]),
 array([0, 0, 1]))

同时返回计数

a = np.array([[1, 2], [1, 2], [2, 3]])
print(np.unique(a, return_counts=True, axis=0))

(array([[1, 2],
       [2, 3]]), array([2, 1]))

重排列元素

np.flip(m, axis=None)

沿给定轴反转数组中元素的顺序，数组的 shape 不变。

axis：指定需要反转元素顺序的轴，可以是整数或整数序列。

A = np.arange(8).reshape((2, 2, 2))
print(A)

array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]])

np.flip(A, 0)

array([[[4, 5], [6, 7]], [[0, 1], [2, 3]]])

np.flip(A, 1)

array([[[2, 3], [0, 1]], [[6, 7], [4, 5]]])

np.flip(A, 2)

array([[[1, 0], [3, 2]], [[5, 4], [7, 6]]])

默认反转所有轴的元素顺序：

np.flip(A)

array([[[7, 6], [5, 4]], [[3, 2], [1, 0]]])

同时指定反转多个轴的元素顺序：

np.flip(A, (0, 2))

array([[[5, 4], [7, 6]], [[1, 0], [3, 2]]])

np.fliplr(m)

左右方向翻转数组，输入数组至少是2维的。

A = np.diag([1., 2., 3.])
print(A, "\n------")
print(np.fliplr(A))
np.all(np.fliplr(A) == A[:, ::-1, ...])

[[1. 0. 0.]
 [0. 2. 0.]
 [0. 0. 3.]]
------
[[0. 0. 1.]
 [0. 2. 0.]
 [3. 0. 0.]]

True

np.flipud(m)

上下方向翻转数组。

A = np.diag([1., 2., 3.])
print(A, "\n------")
print(np.flipud(A))
np.all(np.flipud(A) == A[::-1, ...])

[[1. 0. 0.]
 [0. 2. 0.]
 [0. 0. 3.]]
------
[[0. 0. 3.]
 [0. 2. 0.]
 [1. 0. 0.]]

True

np.rot90(m, k=1, axes=(0, 1))

在轴指定的平面中将数组旋转 90 度。的旋转方向是从 0 轴到 1 轴。

m = np.array([[1, 2], [3, 4]], int)
m

array([[1, 2],
       [3, 4]])

print(np.rot90(m), "\n-----")
print(np.rot90(m, 2))

[[2 4]
 [1 3]]
-----
[[4 3]
 [2 1]]

print(np.rot90(m, k=1, axes=(1, 0)), "\n-----")
print(np.rot90(m, k=-1, axes=(0, 1)))

[[3 1]
 [4 2]]
-----
[[3 1]
 [4 2]]

m = np.arange(8).reshape((2, 2, 2))
print(m)
np.rot90(m, 1, (1, 2))

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]

array([[[1, 3],
        [0, 2]],

       [[5, 7],
        [4, 6]]])

np.roll(a, shift, axis=None)

沿给定轴滚动数组元素。

x = np.arange(10)
print(np.roll(x, 2))
print(np.roll(x, -2))

[8 9 0 1 2 3 4 5 6 7]
[2 3 4 5 6 7 8 9 0 1]

x2 = np.reshape(x, (2, 5))
print(x2)
print()
print(np.roll(x2, 1))
print()
print(np.roll(x2, -1))

[[0 1 2 3 4]
 [5 6 7 8 9]]

[[9 0 1 2 3]
 [4 5 6 7 8]]

[[1 2 3 4 5]
 [6 7 8 9 0]]

print(np.roll(x2, 1, axis=0))
print()
print(np.roll(x2, -1, axis=0))

[[5 6 7 8 9]
 [0 1 2 3 4]]

[[5 6 7 8 9]
 [0 1 2 3 4]]

print(np.roll(x2, 1, axis=1))
print()
print(np.roll(x2, -1, axis=1))

[[4 0 1 2 3]
 [9 5 6 7 8]]

[[1 2 3 4 0]
 [6 7 8 9 5]]