Tích chập 2 chiều

Posted by Hao Do on August 26, 2022

Cách tính tích chập 1 chiều

Tích chập 1 chiều

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def conv1d(a, w, b = 0, stride = 1, pad = 0):
    # pad (argument) = so luong them vao (pad_a_pad)
    # dang tong qua y = aw + b
    # truot vector w doc vector a, sum (tich a*w)
    # N1 = (N + 2P - f) / S + 1
    # N: len(a)
    # N1: len(y)
    # P: padding, S: stride
    # f: len(w)
    
    N = a.shape[0]
    f = w.shape[0]
    #a_pad: update padding vao 2 dau cua a
    a_pad = np.pad(a, pad_width=pad, mode = 'constant', constant_values = 0)
    N1 = int((N + 2*pad - f) / stride) + 1
    y = np.zeros((N1))
    for i in range(N1):
        start = i * stride
        end = start + f # f: do dai cua w
        y[i] = np.sum(a_pad[start:end] * w) + b
    return y
    
w = np.array([1, 2, 3])
a = np.array([1, 2, 5, 4, 7, 6])

print(conv1d(a, w, 0, 2, 1))

Cách plot dữ liệu

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import numpy as np
import matplotlib.pyplot as plt

N = 200
x_clean = np.sin(np.arange(N) / 20.0)
x_noisy = x_clean + 0.05 * np.random.randn(N)
w  = 1.0 / 7 * np.ones(3)
x_filtered = conv1d(x_noisy, w)

fig, (ax1, ax2, ax3) = plt.subplots(3)
fig.suptitle('veritcally stacked subplots')

ax1.plot(x_clean, color = 'blue')
ax2.plot(x_noisy, color = 'red')
ax3.plot(x_filtered, color = 'green')

img

Load image và gray image

1
2
3
4
5
6
7
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

img = mpimg.imread("building2.jpg")
plt.imshow(img)
plt.axis('off')
plt.show()


1
2
3
4
5
# tham so cmap = 'gray'
img_gray = 0.2125* img[:, :, 0] + 0.7154 *img[:, :, 1] + 0.0721 *img[:, :, 2]
plt.axis('off')
plt.imshow(img_gray, cmap='gray')
plt.show()

Sử dụng open cv

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# using cv2: pip3 install opencv-python
'''
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

img_path = 'building2.jpg'

img = cv2.imread(img_path)
fix_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

R, G, B = fix_img[:,:,0], fix_img[:,:,1],fix_img[:,:,2]

img_gray = 0.299 * R + 0.587 * G + 0.114 * B
plt.imshow(img_gray, cmap='gray')
'''
print('opencv2')

Convolution 2D

Dùng cho đơn kênh (1 channel)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# anh xam la tin hieu hai chieu don kenh 
# anh mau la tin hieu hai chieu voi 3 kenh

def conv2d_11(A, W, b = 0, stride = 1, pad = 0):
    '''
    A: input, A.shape (n_H_old, n_W_old)
    W: filter, W.shape = (f, f)
    '''
    n_H_old, n_W_old = A.shape
    f, f = W.shape
    A_pad = np.pad(A, pad_width = pad, mode = 'constant', constant_values = 0)
    
    n_H_new = int((n_H_old + 2*pad - f) / stride) + 1
    n_W_new = int((n_W_old + 2*pad - f) / stride) + 1
    
    y = np.zeros((n_H_new, n_W_new))
    for h in range(n_H_new):
        for v in range(n_W_new):
            h_start = h * stride
            h_end = h_start + f
            
            v_start = v * stride
            v_end = v_start + f
            
            y[h, v] = np.sum(A_pad[h_start:h_end, v_start:v_end] * W) + b
    return y

Thêm nhiều và khử nhiễu

1
2
3
4
5
img_noisy = gray_image + np.random.randn(*gray_image.shape) * 20

plt.imshow(img_noisy, cmap = 'gray')
plt.axis('off')
plt.show()


1
2
3
4
5
6
7
8
f = 3
k = 9
w = 1.0 / k**2 * np.ones((f, f))
img_denoised = conv2d_11(img_noisy, w)

plt.imshow(img_denoised, cmap = 'gray')
plt.axis('off')
plt.show()

Bộ lọc dò cạnh thẳng đứng trong ảnh

1
2
3
4
5
6
7
8
9
# Bo loc do canh thang dung trong anh
w = np.array([[-1, 0, 1], 
              [-1, 0, 1], 
              [-1, 0, 1]])
img_ver_edge = conv2d_11(gray_image, w)

plt.imshow(img_ver_edge, cmap = 'gray')
plt.axis('off')
plt.show()

Bộ lọc dò cạnh nằm ngang

1
2
3
4
5
6
7
8
# Bo loc do canh nam ngang.
w = np.array([[-1, -1, -1], 
              [0, 0, 0], 
              [1, 1, 1]])
img_hor_edge = conv2d_11(gray_image, w)
plt.imshow(img_hor_edge, cmap = 'gray')
plt.axis('off')
plt.show()

Tích chập với nhiều bộ lọc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
def conv2d_1n(A, W, b, stride = 1, pad = 0):
    '''
    A.shape = (n_H_old, n_W_old)
    W.shape = (f, f, n_C)
    b.shape = (n_C, )
    output.shape = (n_H_new, n_W_new, n_C)
    '''
    
    n_H_old, n_W_old = A.shape
    f, f, n_C = W.shape
    A_pad = np.pad(A, pad_width=pad, mode = 'constant', constant_values = 0)
    
    n_H_new = int((n_H_old + 2*pad - f)/stride) + 1
    n_W_new = int((n_W_old + 2*pad - f)/stride) + 1
    
    y = np.zeros((n_H_new, n_W_new, n_C))
    for c in range(n_C):
        for h in range(n_H_new):
            for v in range(n_W_new):
                h_start = h * stride
                h_end = h_start + f
                
                v_start = v * stride
                v_end = v_start + f
                
                y[h, v, c] = np.sum(A_pad[h_start:h_end, v_start:v_end] * W[..., c]) + b[c]

    return y

Plot data với tích chập nhiều bộ lọc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
W = np.zeros((3, 3, 2))
W[:, :, 0] = np.array([
    [-1, 0, 1],
    [-1, 0, 1],
    [-1, 0, 1]
])

W[:, :, 1] = np.array([
    [-1, -1, -1],
    [0, 0, 1],
    [1, 1, 1]
])
b = np.zeros(2)
img_2 = conv2d_1n(gray_image, W, b)

plt.imshow(np.abs(img_2[:, :, 0]), 'gray')
plt.show()

img

1
2
plt.imshow(np.abs(img_2[:, :, 1]), 'gray')
plt.show()

img

Conv2D tổng quát

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# dau vao la m buc anh
def conv2d(A, W, b, stride = 1, pad = 0):
    '''
    A: input, A.shape = (m, in_height, in_width, in_channel)
    W: filters, W.shape = (f, f, in_channel, out_channel)
    b: biases, b.shape = (out_channel)
    '''
    
    assert A.shape[3] == W.shape[2]
    
    m, in_height, in_width, _ = A.shape
    f, _, in_channel, out_channel = W.shape
    A_pad = np.pad(A, pad_width=pad, mode = 'constant', constant_values = 0)
    
    out_height = int((in_height - f + 2 *pad) / stride) + 1
    out_width = int((in_width - f + 2 * pad) / stride) + 1
    
    y = np.zeros((m, out_height, out_width, out_channel))
    for i in range (m):
        for h in range(out_height):
            for w in range(out_width):
                for c in range(out_channel):
                    h_start = h * stride
                    h_end = h_start + f
                    
                    w_start = w * stride
                    w_end = w_start + f
                    
                    a_slide = A_pad[i, h_start:h_end, w_start:w_end, :]
                    y[i, h, w, c] = np.sum(a_slide * W[:, :, :, c]) + b[c]
    return y

Full ipynb

Tài liệu tham khảo

Machine learning cơ bản

Hết.