numpy introduction 01 — pydata: Huiming's learning notes

numpu base:

slicing and indexing an array / subset
How to get a row, a column in numpy array: arr[:, 1] 跟 arr[:, :1]的区别
how to sort, the difference between argsort and lexsort
matrix calculation
matrix eigen / triangle / decompose

import numpy as np
arr = np.array(np.arange(60).reshape(6, 10))    
arr

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]])

subset, slicing and indexing

arr[1:3]

array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])

arr[1: 3, 1: 5]

array([[11, 12, 13, 14],
         [21, 22, 23, 24]])

# indexing with slices
arr[1:3, 5:]

array([[15, 16, 17, 18, 19],
       [25, 26, 27, 28, 29]])

print 'arr[:, 0] gives a row'
print arr[:, 0]  
print '-'*20
print arr[:, 0].shape

arr[:, 0] gives a row
[ 0 10 20 30 40 50]
--------------------
(6,)

print 'arr[:, :1] gives a column'
print arr[:, :1]
print '-'*20
print arr[:, :1].shape

arr[:, :1] gives a column
[[ 0]
 [10]
 [20]
 [30]
 [40]
 [50]]
--------------------
(6, 1)

# this will issue an error, since arr only has two dimensions
arr[1, 3, 5]

---------------------------------------------------------------------------

IndexError                                Traceback (most recent call last)

<ipython-input-33-1db6a8bffb4b> in <module>()
      1 # this will issue an error, since arr only has two dimensions
----> 2 arr[1, 3, 5]


IndexError: too many indices for array

# boolean indexing: select * from arr where arr[:, 0] > 5 and arr[:, 5] < 40
arr[(arr[:, 0] > 5) & (arr[:, 5] < 40)]

array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39]])

# fancy indexing: indexing using integer arrays
arr[[2, 0, 5, 3]]

array([[20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39]])

arr[[-3, -1, -5]]

array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])

# the following returns (1, 1), (3, 5), (5, 8) element of array
print "this does not return a 3 x 3 array"
arr[[1, 3, 5], [1, 5, 8]]

this does not return a 3 x 3 array

array([11, 35, 58])

# this will return a rectangular region
arr[[1, 3, 5], :][:, [1, 3, 5]]

array([[11, 13, 15],
       [31, 33, 35],
       [51, 53, 55]])

np.dot(arr, arr.T)

array([[  285,   735,  1185,  1635,  2085,  2535],
       [  735,  2185,  3635,  5085,  6535,  7985],
       [ 1185,  3635,  6085,  8535, 10985, 13435],
       [ 1635,  5085,  8535, 11985, 15435, 18885],
       [ 2085,  6535, 10985, 15435, 19885, 24335],
       [ 2535,  7985, 13435, 18885, 24335, 29785]])

arr[np.argsort(-arr[:, 0])]

array([[50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9]])

np.argsort(-arr[:, 0])

array([5, 4, 3, 2, 1, 0])

# lexsort is for multiple vars, if only input one, the result is like:
np.lexsort(-arr[:, 0])

# lexsort is sorting the last input var first
a = [1,5,1,4,3,4,4] # First column
b = [9,4,0,4,0,2,1] # Second column
ind = np.lexsort((b,a)) # Sort by a, then by b
print ind

[2 0 4 6 5 3 1]

Array Calculation

1: elementwise calculation

2: matrix calculation: matrix multiply np.dot), inverse matrix(np.linalg.inv), tranpose(np.transpose)

3: eigenvalues and eigen vectors: np.linalg.eig

4: upper/lower triangle (np.triu, np.tril, np.triu_indices, np.tril_indices)

x1 = np.arange(9.0).reshape((3, 3))
x2 = np.arange(3.0)

print 'x1 = %s ' %(x1)
print '-'*20
print 'x2 = %s ' %(x2)
print '-'*20

# elementwise multiply
print np.multiply(x1, x2)

print '-'*20

# elementwise subtract
print np.subtract(x1, x2)

print '-'*20

# elementwise addition
print np.add(x1, x2)

print '-'*20

# elementwise division
print np.divide(x1, x2)

x1 = [[ 0.  1.  2.]
 [ 3.  4.  5.]
 [ 6.  7.  8.]]
--------------------
x2 = [ 0.  1.  2.]
--------------------
[[  0.   1.   4.]
 [  0.   4.  10.]
 [  0.   7.  16.]]
--------------------
[[ 0.  0.  0.]
 [ 3.  3.  3.]
 [ 6.  6.  6.]]
--------------------
[[  0.   2.   4.]
 [  3.   5.   7.]
 [  6.   8.  10.]]
--------------------
[[ nan  1.   1. ]
 [ inf  4.   2.5]
 [ inf  7.   4. ]]

/home/shm/anaconda/lib/python2.7/site-packages/IPython/kernel/__main__.py:25: RuntimeWarning: divide by zero encountered in divide
/home/shm/anaconda/lib/python2.7/site-packages/IPython/kernel/__main__.py:25: RuntimeWarning: invalid value encountered in divide

eig_value, eig_vec = np.linalg.eig(np.diag((1, 2, 3)))

print eig_value
print '-'*20
print eig_vec

[ 1.  2.  3.]
--------------------
[[ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]]

arr1 = np.arange(1, 10).reshape((3, 3))

# upper triangle
print np.triu(arr1)
print arr1[np.triu_indices(3)]
print '-'*20

# lower triangle
print np.tril(arr1)
print arr1[np.tril_indices(3)]
print '-'*20

# left to right, up to down
print np.fliplr(arr1)
print np.flipud(arr1)

[[1 2 3]
 [0 5 6]
 [0 0 9]]
[1 2 3 5 6 9]
--------------------
[[1 0 0]
 [4 5 0]
 [7 8 9]]
[1 4 5 7 8 9]
--------------------
[[3 2 1]
 [6 5 4]
 [9 8 7]]
[[7 8 9]
 [4 5 6]
 [1 2 3]]

# matrix QR decomposion

np.random.seed(0)
arr = np.random.random(9).reshape((3, 3))
q, r = np.linalg.qr(arr)

# original matrix and the matrix from qr are exactly the same
print np.allclose(arr, np.dot(q, r))

True

# matrix svd decomposition

np.random.seed(0)
arr = np.random.random(20).reshape((5, 4))
u, s, v = np.linalg.svd(arr)

print u.shape, s.shape, v.shape

s2 = np.zeros((5, 4))
s2[:4, :4] = np.diag(s)

# original matrix and the matrix from svd are exactly the same
print np.allclose(arr, np.dot(u, np.dot(s2, v)))

(5, 5) (4,) (4, 4)
True

numpy print option

np.set_printoptions(precision = 4, linewidth = 100)