import numpy as np

import numba

from numba import jit

def sum_matrix(M):
    N1, N2 = M.shape
    result = 0
    for i in range(N1):
        for j in range(N2):
            result += M[i, j]
    return result

sum_matrix_jit = jit()(sum_matrix)

np.random.seed(1234)
M = np.random.random([int(4e3)]*2)
# Time measurement over the situations
print('{:<14} |'.format('sum_matrix'), end=' ')
%timeit -n 1 -r 3 sum_matrix(M)
print('{:<14} |'.format('np.sum'), end=' ')
%timeit -n 5 -r 10 np.sum(M)
print('{:<14} |'.format('sum_matrix_jit'), end=' ')
%timeit -n 5 -r 10 sum_matrix_jit(M)

sum_matrix     | 3.74 s ± 208 ms per loop (mean ± std. dev. of 3 runs, 1 loop each)
np.sum         | 22.6 ms ± 5.49 ms per loop (mean ± std. dev. of 10 runs, 5 loops each)
sum_matrix_jit | 17.7 ms ± 5.1 ms per loop (mean ± std. dev. of 10 runs, 5 loops each)

@jit
def sum_matrix_jit(M):
    N1, N2 = M.shape
    result = 0
    for i in range(N1):
        for j in range(N2):
            result += M[i, j]
    return result

from numba import njit

@njit # or @jit(nopython=True)
def sum_matrix_njit(M):
    N1, N2 = M.shape
    result = 0
    for i in range(N1):
        for j in range(N2):
            result += M[i, j]
    return result

# Time measurement
print('{:<15} |'.format('sum_matrix_jit'), end=' ')
%timeit -n 5 -r 10 sum_matrix_jit(M)
print('{:<15} |'.format('sum_matrix_njit'), end=' ')
%timeit -n 5 -r 10 sum_matrix_njit(M)

sum_matrix_jit  | 17.8 ms ± 3.44 ms per loop (mean ± std. dev. of 10 runs, 5 loops each)
sum_matrix_njit | 17.2 ms ± 1.53 ms per loop (mean ± std. dev. of 10 runs, 5 loops each)

from numba import prange

@jit(nopython=True, parallel=True)
def sum_matrix_pjit(M):
    N1, N2 = M.shape
    result = 0
    for i in prange(N1):
        for j in prange(N2):
            result += M[i, j]
    return result

# Time measurement over the situations
print('{:<15} |'.format('sum_matrix_njit'), end=' ')
%timeit -n 5 -r 10 sum_matrix_njit(M)
print('{:<15} |'.format('sum_matrix_pjit'), end=' ')
%timeit -n 5 -r 10 sum_matrix_pjit(M)

sum_matrix_njit | 16.1 ms ± 383 µs per loop (mean ± std. dev. of 10 runs, 5 loops each)
sum_matrix_pjit | 3.95 ms ± 42.6 µs per loop (mean ± std. dev. of 10 runs, 5 loops each)

from numba import void, int64, float64

from numba import vectorize

@vectorize(
    [int64(int64, int64)],
    target='parallel'
)
def GCF(x1, x2):
    result = 1
    x = min(x1, x2)
    for i in prange(2, x + 1):
        div1 = (x1 % i) == 0
        div2 = (x2 % i) == 0
        if div1 and div2:
            result = i
    return result

N = 1024
np.random.seed(1234)
M1 = np.random.randint(1, 99, (N, N))
M2 = np.random.randint(1, 99, (N, N))
# Time measurement
%timeit -n 5 -r 10 GCF(M1, M2)

48.6 ms ± 320 µs per loop (mean ± std. dev. of 10 runs, 5 loops each)

# Example using 4x30 matrices
np.random.seed(1234)
M1 = np.random.randint(1, 99, (4, 30))
M2 = np.random.randint(1, 99, (4, 30))
# Visualize tables
np.set_printoptions(linewidth=125)
print(M1, end=' M1\n\n')
print(M2, end=' M2\n\n')
print(GCF(M1, M2), end=' GCF')

[[48 84 39 54 77 25 16 50 24 27 31 44 31 27 59 93 70 81 74 48 51 77 38 35 39 68 12  1 76 81]
 [ 4  3 20 13 66 76 82 15 72 61 47 29 82 88 14 97 13 70 96 32 90 85 46 17 42 73 57 71 57 87]
 [45 91 84 48 50 19 86 47 38 39  8 68  6 48 48 16 35 11 29  5 83 90 56 79 24 51 63 56 85  1]
 [91 34 22 72 69 82 53 65 86 42  2 15  4 31 13 74 20 27 97 69 65 23 57 85  9 45 25 95 16 73]] M1

[[ 3 17  3 80 68 47 58 56 37 89 34 43  3 88 85 36 19 77 70 82 81  9 76 16 21 17 65 62 97 84]
 [58 50 63 58 84 28 88 36 37 85 62 29  2 12 63 18 31 39 26 57 25 13 77 97  9 48 88 26 22 23]
 [46 20 42 48 37 40 18 40 60 83 38 88 39  9 55 18 42 48 20 44 45 36 43 21 27 90 24  3 66 90]
 [28 79 69 92 61  4 82 48 62 64 33 14 57 49 59 95 36 58 61 84 10 61 51 52 50 72 82  5  4 89]] M2

[[ 3  1  3  2  1  1  2  2  1  1  1  1  1  1  1  3  1  1  2  2  3  1 38  1  3 17  1  1  1  3]
 [ 2  1  1  1  6  4  2  3  1  1  1 29  2  4  7  1  1  1  2  1  5  1  1  1  3  1  1  1  1  1]
 [ 1  1 42 48  1  1  2  1  2  1  2  4  3  3  1  2  7  1  1  1  1 18  1  1  3  3  3  1  1  1]
 [ 7  1  1  4  1  2  1  1  2  2  1  1  1  1  1  1  4  1  1  3  5  1  3  1  1  9  1  5  4  1]] GCF

from numba import guvectorize

@guvectorize(
    [void(
        float64[:,:],
        int64,
        float64,
        float64[:]
    )],
    '(n,m),(),()->(n)'
)
def density(P, N, r, R):
    for i in prange(N):
        x, y, z = P[i]
        for j in prange(i + 1, N):
            if i == j:
                continue
            u, v, w = P[j]
            dx = abs(x - u)
            if dx > r:
                continue
            dy = abs(y - v)
            if dy > r:
                continue
            dz = abs(z - w)
            if dz > r:
                continue
            d = (dx**2 + dy**2 + dz**2)**0.5
            R[i] += 1 - d/(3*r*r)**0.5
            R[j] += 1 - d/(3*r*r)**0.5

N = 2500
np.random.seed(1234)
# Gaussian mixture
P = np.vstack((
    np.random.normal((-3,0,0), 1.8, (N, 3))*(1, 1, 1/10),
    np.random.normal((0,4,0), 1.4, (N, 3))*(1, 1, 1/10),
    np.random.normal((3,0,0), 1.0, (N, 3))*(1, 1, 1/10)
))
# Output array
R = np.zeros(3*N)
# Time measurement
%time D = density(P, 3*N, 0.75, R)

Wall time: 103 ms

%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(20, 10))
ax = Axes3D(fig)
ax.view_init(elev=60, azim=30)
# Visualize points
sct = ax.scatter(*P.T[:3], s=20, c=D)
ax.set_title('number of points: ' + str(3*N), size=20)
plt.colorbar(sct)
plt.show()

from numba import stencil

from imageio import imread
M = imread('../_data/tomography.png')

# Box Filter 7x7
@stencil(
    neighborhood=(
        (-3, 3),
        (-3, 3)
    ))
def box_filter(M):
    output = 0
    for i in range(-3, 4):
        for j in range(-3, 4):
            output += M[i, j]
    return output/49

%time Mb = box_filter(M)

Wall time: 237 ms

fig, (axA, axB) = plt.subplots(1, 2, figsize=(20, 10))
# Plot images
axA.imshow(M)
axA.set_title('Input image')
axB.imshow(Mb)
axB.set_title('Box Filter 7x7')

plt.show()

def pbox_filter(M, r):
    @stencil(neighborhood=(
        (-r, r),
        (-r, r)
    ))
    def box_filter(M):
        output = 0
        for i in range(-r, r + 1):
            for j in range(-r, r + 1):
                output += M[i, j]
        return output/(2*r + 1)**2
    return box_filter(M)

# Init parameters
rA = 2
rB = 9
# Run box filters
%time MpA = pbox_filter(M, rA)
%time MpB = pbox_filter(M, rB)

Wall time: 387 ms
Wall time: 451 ms

fig, (axA, axB) = plt.subplots(1, 2, figsize=(20, 10))
# Plot images
axA.imshow(MpA)
axA.set_title('Box Filter {0}x{0}'.format(rA*2 + 1))
axB.imshow(MpB)
axB.set_title('Box Filter {0}x{0}'.format(rB*2 + 1))

plt.show()

Basics [Numba]¶

Installation¶

Jit¶

Using decorator @jit¶

nopython and @njit¶

parallel flag and prange¶

Vectorization¶

@vectorize¶

@guvectorize¶

Stencil¶

Box Filter¶

Parameterized box filter¶