numpy 常用總結

一個栗子

>>> import numpy as np
>>> a = np.arange(15).reshape(3, 5)
>>> a
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
>>> a.shape
(3, 5)
>>> a.ndim  # 陣列軸的個數，在python的世界中，軸的個數被稱作秩
2
>>> a.dtype.name
'int64'
>>> a.itemsize  # 陣列中每個元素的位元組大小。
8
>>> a.size
15
>>> type(a)
<type 'numpy.ndarray'>

隨機數

np.random.normal

生成標準正態分佈,在-1.96～+1.96範圍內曲線下的面積等於0.95

samples = np.random.normal(size=(4,4))

array([[ 1.84543397,  0.91407872,  0.83926424,  0.05128908],
       [ 0.89142489,  0.38926367,  0.22890831, -1.63944174],
       [ 0.50097211,  1.0324319 ,  1.12945725, -0.66500686],
       [-0.13809704,  1.72104946,  0.4812083 , -2.35993971]])

np.random.normal(0,1,[3,2])
array([[-0.89197774, -0.23395082],
       [ 1.31110604,  1.25929054],
       [ 0.22351615, -0.38488545]])

np.random.uniform

可以生成[low,high)中的隨機數，可以是單個值，也可以是一維陣列，也可以是多維陣列

np.random.uniform(1,5,(4,3)) #生成4x3的陣列

array([[ 2.33083328, 1.592934 , 2.38072  ],
    [ 1.07485686, 4.93224857, 1.42584919],
    [ 3.2667912 , 4.57868281, 1.53218578],
    [ 4.17965117, 3.63912616, 2.83516143]])

np.random.randint

生成[low,high)整型隨機數，可以是單個隨機數，也可以是多維的隨機數構成的陣列

np.random.randint(4,10,size=(2,2),dtype='int32')

array([[7, 4],
    [6, 9]])

#生成0-1之間的浮點數
np.random.randn(2,3)

array([[ 0.59892845,  0.35535291,  0.53566027],
       [-0.09791823,  1.50595013, -0.5044485 ]])

np.random.rand

返回[0,1)內的浮點數，沒有輸入時，則返回[0,1)內的一個隨機值

np.random.rand(3,3)

array([[ 0.47507608, 0.64225621, 0.9926529 ],
    [ 0.95028412, 0.18413813, 0.91879723],
    [ 0.89995217, 0.42356103, 0.81312942]])

建立矩陣

arange

import numpy as np
a = np.arange(10) # 預設從0開始到10（不包括10），步長為1
print(a) # 返回 [0 1 2 3 4 5 6 7 8 9]
a1 = np.arange(5,10) # 從5開始到10（不包括10），步長為1
print(a1) # 返回 [5 6 7 8 9]
a2 = np.arange(5,20,2) # 從5開始到20（不包括20），步長為2
print(a2) # 返回 [ 5  7  9 11 13 15 17 19]

linspace

import numpy as np
# 類似於matlab
a = np.linspace(0,10,7) # 生成首位是0，末位是10，含7個數的等差數列
# 結果 
[  0.   1.66666667   3.33333333   5.  6.66666667  8.33333333  10.]

ones、zeros、eye、empty

ones建立全1矩陣，zeros建立全0矩陣，eye建立單位矩陣，empty建立空矩陣（實際有值）

ones 全1矩陣

a_ones = np.ones((3,4)) # 建立3*4的全1矩陣
# 結果
[[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]

np.ones((2,3,4), dtype=np.int16 )   # dtype can also be specified
# 結果
array([[[ 1, 1, 1, 1],
        [ 1, 1, 1, 1],
        [ 1, 1, 1, 1]],
       [[ 1, 1, 1, 1],
        [ 1, 1, 1, 1],
        [ 1, 1, 1, 1]]], dtype=int16)

zeros 全0矩陣

a_zeros = np.zeros((3,4)) # 建立3*4的全0矩陣
print(a_zeros)
# 結果
[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]

eye 單位矩陣

a_eye = np.eye(3) # 建立3階單位矩陣
print(a_eye)
# 結果
[[ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]]

empty 空矩陣

a_empty = np.empty((3,4)) # 建立3*4的空矩陣 
print(a_empty)
# 結果
[[  1.78006111e-306  -3.13259416e-294   4.71524461e-309   1.94927842e+289]
 [  2.10230387e-309   5.42870216e+294   6.73606381e-310   3.82265219e-297]
 [  6.24242356e-309   1.07034394e-296   2.12687797e+183   6.88703165e-315]]

矩陣運算子

+矩陣對應元素相加

-矩陣對應元素相減

*矩陣對應元素相乘

/矩陣對應元素相除

import numpy as np
a1 = np.array([[4,5,6],[1,2,3]])
a2 = np.array([[6,5,4],[3,2,1]])

print(a1+a2) # 相加
# 結果
[[10 10 10]
 [ 4  4  4]]

print(a1/a2) # 整數相除取商
# 結果
[[0.66666667 1.         1.5       ]
 [0.33333333 1.         3.        ]]

print(a1%a2) # 相除取餘數
# 結果
[[4 0 2]
 [1 0 0]]

矩陣乘法

import numpy as np

a1 = np.array([[1,2,3],[4,5,6]]) # a1為2*3矩陣
a2 = np.array([[1,2],[3,4],[5,6]]) # a2為3*2矩陣
print(a1.dot(a2)) 
# 結果
[[22 28]
 [49 64]]

矩陣的轉置 a.T

import numpy as np
a = np.array([[1,2,3],[4,5,6]])
print(a.transpose())
# 或者 print(a.T)
# 結果
[[1 4]
 [2 5]
 [3 6]]

矩陣的逆

設A是數域上的一個n階方陣，若在相同數域上存在另一個n階矩陣B，使得： *AB*=BA=E。則我們稱B是A的逆矩陣，而A則被稱為可逆矩陣。

單位矩陣的逆為他本身

import numpy as np
import numpy.linalg as lg

a = np.array([[1,2,3],[4,5,6],[7,8,9]])

print(lg.inv(a))
# 結果
[[ 3.15251974e+15 -6.30503948e+15  3.15251974e+15]
 [-6.30503948e+15  1.26100790e+16 -6.30503948e+15]
 [ 3.15251974e+15 -6.30503948e+15  3.15251974e+15]]

最值

矩陣中元素最大最小值的函式分別是max和min。

import numpy as np

a = np.array([[1,2,3],[4,5,6]])
print(a.max()) #獲取整個矩陣的最大值 結果： 6
print(a.min()) #結果：1

# axis=0 行方向最大（小）值，即獲得每列的最大（小）值
print(a.max(axis=0))
# 結果為 [4 5 6]

# axis=1 列方向最大（小）值，即獲得每行的最大（小）值
print(a.max(axis=1))
# 結果為 [3 6]

# 要想獲得最大最小值元素所在的位置，可以通過argmax函式來獲得
print(a.argmax(axis=1))
# 結果為 [2 2]

平均值

import numpy as np

a = np.array([[1,2,3],[4,5,6]])
print(a.mean()) #結果為： 3.5

# 同樣地，可以通過關鍵字axis引數指定沿哪個方向獲取平均值
print(a.mean(axis=0)) # 結果 [ 2.5  3.5  4.5]
print(a.mean(axis=1)) # 結果 [ 2.  5.]

方差相關

方差的函式為`var()`相當於函式`mean(abs(x - x.mean())**2)`

import numpy as np

a = np.array([[1,2,3],[4,5,6]])
print(a.var()) # 結果 2.91666666667

print(a.var(axis=0)) # 結果 [ 2.25  2.25  2.25]
print(a.var(axis=1)) # 結果 [ 0.66666667  0.66666667]

標準差的函式為`std()`相當於`sqrt(mean(abs(x - x.mean())**2))`

import numpy as np

a = np.array([[1,2,3],[4,5,6]])
print(a.std()) # 結果 1.70782512766

print(a.std(axis=0)) # 結果 [ 1.5  1.5  1.5]
print(a.std(axis=1)) # 結果 [ 0.81649658  0.81649658]

中值

中值指的是將序列按大小順序排列中間的那個值，如果有偶數個數，則是排在中間兩個數的平均值。中值的函式是median()

import numpy as np
x = np.array([[1,2,3],[4,5,6]])

print(np.median(x))  # 對所有數取中值
# 結果
3.5

print(np.median(x,axis=0))  # 沿行方向取中值
# 結果
[ 2.5  3.5  4.5]

print(np.median(x,axis=1))  # 沿列方向取中值
# 結果
[ 2.  5.]

求和

import numpy as np

a = np.array([[1,2,3],[4,5,6]])

print(a.sum())           # 對整個矩陣求和
# 結果 21

print(a.sum(axis=0)) # 對行方向求和
# 結果 [5 7 9]

print(a.sum(axis=1)) # 對列方向求和
# 結果 [ 6 15]

累積和

某位置累積和指的是該位置之前(包括該位置)所有元素的和。例如序列[1,2,3,4,5]，其累計和為[1,3,6,10,15]

import numpy as np

a = np.array([[1,2,3],[4,5,6]])

print(a.cumsum())     # 對整個矩陣求累積和
# 結果 [ 1  3  6 10 15 21]

print(a.cumsum(axis=0))  # 對列方向求累積和
# 結果
[[1 2 3]
 [5 7 9]]

print(  a.cumsum(axis=1))  # 對行方向求累積和
# 結果
[[ 1  3  6]
 [ 4  9 15]]

極差

import numpy as np
a = np.arange(100)
a.ptp()
# 結果是
99

百分位數

百分位數是統計中使用的度量，表示小於這個值的觀察值佔總數q的百分比。

a = np.array([[10, 7, 4], [3, 2, 1]])
np.percentile(a, 50)    #50%的分位數，就是a裡排序之後的中位數
# 結果是
3.5

np.percentile(a, 50, axis=0) 
array([[ 6.5,  4.5,  2.5]])

加權平均值

import numpy as np
a = np.arange(15).reshape(3,5)
np.average(a, axis=0,weights=(10, 5, 1))
# 結果是
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
#35/16=2.1875
array([2.1875, 3.1875, 4.1875, 5.1875, 6.1875])

形狀操縱

numpy中的ravel()、flatten()、squeeze()都有將多維陣列轉換為一維陣列的功能，區別：

ravel()：如果沒有必要，不會產生源資料的副本
flatten()：返回源資料的副本
squeeze()：只能對維數為1的維度降維

np.resize()和np.reshape()的區別
resize:直接修改原始陣列的資料。np.resize(a,(2,3))或者a.resize((1,9))
reshape:在不改變原陣列資料的情況下，將它reshape成一個新的維度。如果給定的陣列資料和需要reshape的形狀不符合時，將會報錯。np.reshape(a, (3,-1))

a = np.floor(10*np.random.random((3,4)))
a
array([[ 2.,  8.,  0.,  6.],
       [ 4.,  5.,  1.,  1.],
       [ 8.,  9.,  3.,  6.]])

a.shape
(3, 4)

a.ravel()  # 轉換為一維陣列
array([ 2.,  8.,  0.,  6.,  4.,  5.,  1.,  1.,  8.,  9.,  3.,  6.])

a.reshape(6,2)  # 返回改變後陣列形狀
array([[ 2.,  8.],
       [ 0.,  6.],
       [ 4.,  5.],
       [ 1.,  1.],
       [ 8.,  9.],
       [ 3.,  6.]])

a.T  # 轉正陣列
array([[ 2.,  4.,  8.],
       [ 8.,  5.,  9.],
       [ 0.,  1.,  3.],
       [ 6.,  1.,  6.]])

a.T.shape
(4, 3)

Fancy Indexing

切片索引：通過[start: end: steps]

import numpy as np

#Fancy Indexing
x = np.arange(16)
np.random.shuffle(x)
print(x) #列印所有的元素
#[14  1  7 10 11 15  9  4  2 13 12  0  3  8  5  6]


print(x[2])         
print(x[1:3])        #切片，1到2
print(x[3:9:2])        #指定間距切片
#結果
7
[1 7]
[10 15  4]

布林索引：通過布林型別的陣列進行索引

通過一個布林陣列來索引目標陣列，以此找出與布林陣列中值為True的對應的目標陣列中的資料。需要注意的是，布林陣列的長度必須與目標陣列對應的軸的長度一致。

import numpy as np

names = np.array(['Bob', 'Joe', 'Will'])
scores = np.random.randint(0, 100, (3, 4)) # 3名學生的4科成績
#結果
array([[58, 52, 97, 76],
       [91, 86, 80, 78],
       [26, 35, 75, 78]])

print(names == 'Bob')
[ True False False]
print(scores[names == 'Bob']) # 獲取Bob的四科成績
[[58 52 97 76]]

花式索引：通過整型陣列進行索引

花式索引根據索引整型陣列的值作為目標陣列的某個軸的下標來取值。

import numpy as np

arr3d = np.arange(12).reshape(2, 2, 3) #2個2行三列陣列
#結果
array([
    [[ 0,  1,  2],
    [ 3,  4,  5]],

    [[ 6,  7,  8],
    [ 9, 10, 11]]
])
# 使用兩個整數陣列來對axis= 0,1兩個軸進行花式索引
print(arr3d[[0, 1], [1, 1]])
#結果
[[ 3  4  5]
 [ 9 10 11]]
print(arr3d[[0, 1], [0, 1, 2]]) # error



x = np.arange(16)
X = x.reshape(4,-1)
print(X)
#結果
[[14  1  7 10]
 [11 15  9  4]
 [ 2 13 12  0]
 [ 3  8  5  6]]
ind1 = np.array([1,3]) #行的索引
ind2 = np.array([2,0]) #列的索引
print(X[ind1,ind2])
#結果 （1,2）和（3,0）
[9 3]

本作品採用《CC 協議》，轉載必須註明作者和本文連結

如果可以，我要變成光