Chapter 3 Numpy

numpy 提供了一组高效处理数组的工具.

这个图有点误导性. 当数组是二维的时候, axis0 表示行, axis 表示列. 当数组是三维的时候, axis0 表示的则是不同的二维数组, axis1表示行, axis2 表示列

3.1 创建array 数组

import numpy as np

a = np.array([1,2,3]) #  1 D array
a
## array([1, 2, 3])
b = np.array([(1,2,3,4),(2,3,4,5)], dtype = float)  # 2 D array
b
## array([[1., 2., 3., 4.],
##        [2., 3., 4., 5.]])
c = np.array([[(1,2,3),(2,3,4)],[(1,1,1),(2,2,2)]], dtype = float) # 3 D array
c
## array([[[1., 2., 3.],
##         [2., 3., 4.]],
## 
##        [[1., 1., 1.],
##         [2., 2., 2.]]])

3.2 创建特殊数组

下面这些函数可以快速的创建数组

np.zeros((2,2,3))   # Create an array of zeros
## array([[[0., 0., 0.],
##         [0., 0., 0.]],
## 
##        [[0., 0., 0.],
##         [0., 0., 0.]]])
np.ones((2,2,3)) # Create an array of 1
## array([[[1., 1., 1.],
##         [1., 1., 1.]],
## 
##        [[1., 1., 1.],
##         [1., 1., 1.]]])
np.arange(10,25,5) # 1 D  array 
## array([10, 15, 20])
np.linspace(0,2,9) # start 0 , stop 2 , number is 9 
## array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])
np.full((2,2,2),7) # create an array of number 7
## array([[[7, 7],
##         [7, 7]],
## 
##        [[7, 7],
##         [7, 7]]])
np.eye(3) # 2 D 
## array([[1., 0., 0.],
##        [0., 1., 0.],
##        [0., 0., 1.]])
np.random.random((2,2,2)) # random create array
## array([[[0.01275371, 0.21964652],
##         [0.77847549, 0.20543379]],
## 
##        [[0.54901096, 0.43877933],
##         [0.61943631, 0.76193646]]])
np.random.rand(10)
## array([0.5397098 , 0.66997373, 0.72657924, 0.79385136, 0.20519991,
##        0.69418452, 0.17390961, 0.25323318, 0.04559041, 0.89642563])
np.empty((2,2)) # create empty 
## array([[2.68156159e+154, 2.68156159e+154],
##        [9.88131292e-324, 2.83587762e-309]])

3.3 Nupmy I/O

3.3.1 保存和加载数组

np.save("my_array",a)
np.savez("array.npz",a,b)
np.load("my_array.npy")

3.3.2 保存和加载文本文件

np.loadtxt("file.txt)

np.genfromtxt("file.csv", delimiter = ",")

np.savetxt("file.txt",a,delimiter= "")

3.4 Data types

  1. np.int64
  2. np.float32
  3. np.complex
  4. np.bool
  5. np.object
  6. np.string_
  7. np.unicode_

3.5 查看数组

 
a
## array([1, 2, 3])
a.shape # show array dimensions
## (3,)
len(a) # array length
## 3
a.ndim # number of array dimensions
## 1
a.size # number of array elements
## 3
a.dtype.name
## 'int64'
a.astype(int)
## array([1, 2, 3])

3.6 查询帮助

  1. np.info(np.random)
  2. help(np.random)
  3. ?np.random (in Rstudio)

3.7 数组函数

3.7.1 数组算数运算


a = np.random.random((2,2,2))
b = np.random.random((2,2,2))

a-b # subtraction 减法
## array([[[-0.34690224, -0.3208587 ],
##         [ 0.46689173,  0.31333901]],
## 
##        [[-0.03213382,  0.10831127],
##         [-0.21276662, -0.10638115]]])
np.subtract(a,b)
## array([[[-0.34690224, -0.3208587 ],
##         [ 0.46689173,  0.31333901]],
## 
##        [[-0.03213382,  0.10831127],
##         [-0.21276662, -0.10638115]]])
a+b
## array([[[0.45015366, 1.16093693],
##         [1.0427565 , 1.31077147]],
## 
##        [[0.4613834 , 1.41173947],
##         [0.45627454, 0.31618678]]])
np.add(a,b) # addition
## array([[[0.45015366, 1.16093693],
##         [1.0427565 , 1.31077147]],
## 
##        [[0.4613834 , 1.41173947],
##         [0.45627454, 0.31618678]]])
a/b # division
## array([[[0.129541  , 0.56693259],
##         [2.6215325 , 1.62829117]],
## 
##        [[0.86977631, 1.16619445],
##         [0.36396552, 0.49650153]]])
np.divide(a,b)
## array([[[0.129541  , 0.56693259],
##         [2.6215325 , 1.62829117]],
## 
##        [[0.86977631, 1.16619445],
##         [0.36396552, 0.49650153]]])
a*b # multiplication 乘法
## array([[[0.02057429, 0.31120606],
##         [0.21733831, 0.40498513]],
## 
##        [[0.05296051, 0.49531925],
##         [0.0407292 , 0.02216428]]])
np.multiply(a,b)

# other useful function
## array([[[0.02057429, 0.31120606],
##         [0.21733831, 0.40498513]],
## 
##        [[0.05296051, 0.49531925],
##         [0.0407292 , 0.02216428]]])
np.exp(a)
## array([[[1.05298155, 1.52202109],
##         [2.12723734, 2.25253273]],
## 
##        [[1.23939678, 2.13833047],
##         [1.12947617, 1.11060267]]])
np.sqrt(a)
## array([[[0.22721292, 0.64810425],
##         [0.86880614, 0.90114108]],
## 
##        [[0.46327615, 0.87179434],
##         [0.3489326 , 0.32388704]]])
np.sin(a)
## array([[[0.05160278, 0.40779617],
##         [0.68516056, 0.72570273]],
## 
##        [[0.21298084, 0.68893983],
##         [0.12145337, 0.10471052]]])
np.log10(a)
## array([[[-1.28713397, -0.37671026],
##         [-0.12215423, -0.09041443]],
## 
##        [[-0.66832012, -0.11917191],
##         [-0.91451691, -0.97921287]]])

3.7.2 比较数组

a == b
## array([[[False, False],
##         [False, False]],
## 
##        [[False, False],
##         [False, False]]])
a < 0.5
## array([[[ True,  True],
##         [False, False]],
## 
##        [[ True, False],
##         [ True,  True]]])
np.array_equal(a,b)
## False

3.7.3 聚合函数

a.sum()
## 3.239851111719709
a.min()
## 0.05162570936912836
a.max(axis=1) # notice the meaning of axis
## array([[0.75482411, 0.81205524],
##        [0.21462479, 0.76002537]])
a.cumsum(axis =0)
## array([[[0.05162571, 0.42003912],
##         [0.75482411, 0.81205524]],
## 
##        [[0.2662505 , 1.18006449],
##         [0.87657807, 0.91695805]]])
a.mean()
## 0.4049813889649636
np.median(a)
## 0.31733195338493403
a.std()
## 0.3053159872880776

3.7.4 复制数组

h = a.view() # create a view of the array with the same data
np.copy(a) # create a copy of the array 
## array([[[0.05162571, 0.42003912],
##         [0.75482411, 0.81205524]],
## 
##        [[0.21462479, 0.76002537],
##         [0.12175396, 0.10490281]]])
h = a.copy() # create a deep copy of the array 

3.7.5 数组排序

a.sort()

3.8 取子集,切片,索引

3.8.1 取子集

a = np.array((1,2,3))
b = np.array([(1,2,3),(4,5,6)])


a[0]  # select the element at the 1st index
## 1
b[0,0] # select the element at row 1 and column 1
## 1

3.8.2 切片

a[0:2] # select items at index 0 to 1
## array([1, 2])
b[0:2,1] # select items at rows 1 and 0 in column 2
## array([2, 5])
b[:1] #  select rows 1 b[0:1,:]
## array([[1, 2, 3]])
a[a<2] # bool indexing
## array([1])

3.9 操纵数组

3.9.1 数组置换

i = np.transpose(b) # permute(置换) array dimensions 
i.T
## array([[1, 2, 3],
##        [4, 5, 6]])

3.9.2 改变数组的形状

b.ravel() # flatten the array
## array([1, 2, 3, 4, 5, 6])
b.reshape(3,2) # reshape without change data
## array([[1, 2],
##        [3, 4],
##        [5, 6]])

3.9.3 添加/删除 元素

np.append(a,1) 
## array([1, 2, 3, 1])
np.insert(a,1,5)
## array([1, 5, 2, 3])
np.delete(a,0)
## array([2, 3])

3.9.4 合并数组

concatenate arrays 连接数组

np.concatenate((a,a),axis=0)
## array([1, 2, 3, 1, 2, 3])
np.concatenate((b,b),axis=0)
## array([[1, 2, 3],
##        [4, 5, 6],
##        [1, 2, 3],
##        [4, 5, 6]])

stack arrays vertically (row wise 按行排列)


a = np.array((1,2,3))
b = np.array([(1,2,3),(4,5,6)])

np.vstack((b.T,b.T))
## array([[1, 4],
##        [2, 5],
##        [3, 6],
##        [1, 4],
##        [2, 5],
##        [3, 6]])
np.row_stack((b.T,b.T))
## array([[1, 4],
##        [2, 5],
##        [3, 6],
##        [1, 4],
##        [2, 5],
##        [3, 6]])

stack arrays horizontally (colnum wise)

np.hstack((b,b))
## array([[1, 2, 3, 1, 2, 3],
##        [4, 5, 6, 4, 5, 6]])
np.column_stack((b,b))
## array([[1, 2, 3, 1, 2, 3],
##        [4, 5, 6, 4, 5, 6]])

3.9.5 分开数组

np.hsplit(a,3)
## [array([1]), array([2]), array([3])]
np.hsplit(b,3)
## [array([[1],
##        [4]]), array([[2],
##        [5]]), array([[3],
##        [6]])]
np.vsplit(b,2)
## [array([[1, 2, 3]]), array([[4, 5, 6]])]