Cách tính tích chập 1 chiều
Tích chập 1 chiều
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def conv1d(a, w, b = 0, stride = 1, pad = 0):
# pad (argument) = so luong them vao (pad_a_pad)
# dang tong qua y = aw + b
# truot vector w doc vector a, sum (tich a*w)
# N1 = (N + 2P - f) / S + 1
# N: len(a)
# N1: len(y)
# P: padding, S: stride
# f: len(w)
N = a.shape[0]
f = w.shape[0]
#a_pad: update padding vao 2 dau cua a
a_pad = np.pad(a, pad_width=pad, mode = 'constant', constant_values = 0)
N1 = int((N + 2*pad - f) / stride) + 1
y = np.zeros((N1))
for i in range(N1):
start = i * stride
end = start + f # f: do dai cua w
y[i] = np.sum(a_pad[start:end] * w) + b
return y
w = np.array([1, 2, 3])
a = np.array([1, 2, 5, 4, 7, 6])
print(conv1d(a, w, 0, 2, 1))
Cách plot dữ liệu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import numpy as np
import matplotlib.pyplot as plt
N = 200
x_clean = np.sin(np.arange(N) / 20.0)
x_noisy = x_clean + 0.05 * np.random.randn(N)
w = 1.0 / 7 * np.ones(3)
x_filtered = conv1d(x_noisy, w)
fig, (ax1, ax2, ax3) = plt.subplots(3)
fig.suptitle('veritcally stacked subplots')
ax1.plot(x_clean, color = 'blue')
ax2.plot(x_noisy, color = 'red')
ax3.plot(x_filtered, color = 'green')
Load image và gray image
1
2
3
4
5
6
7
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
img = mpimg.imread("building2.jpg")
plt.imshow(img)
plt.axis('off')
plt.show()
1
2
3
4
5
# tham so cmap = 'gray'
img_gray = 0.2125* img[:, :, 0] + 0.7154 *img[:, :, 1] + 0.0721 *img[:, :, 2]
plt.axis('off')
plt.imshow(img_gray, cmap='gray')
plt.show()
Sử dụng open cv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# using cv2: pip3 install opencv-python
'''
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
img_path = 'building2.jpg'
img = cv2.imread(img_path)
fix_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
R, G, B = fix_img[:,:,0], fix_img[:,:,1],fix_img[:,:,2]
img_gray = 0.299 * R + 0.587 * G + 0.114 * B
plt.imshow(img_gray, cmap='gray')
'''
print('opencv2')
Convolution 2D
Dùng cho đơn kênh (1 channel)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# anh xam la tin hieu hai chieu don kenh
# anh mau la tin hieu hai chieu voi 3 kenh
def conv2d_11(A, W, b = 0, stride = 1, pad = 0):
'''
A: input, A.shape (n_H_old, n_W_old)
W: filter, W.shape = (f, f)
'''
n_H_old, n_W_old = A.shape
f, f = W.shape
A_pad = np.pad(A, pad_width = pad, mode = 'constant', constant_values = 0)
n_H_new = int((n_H_old + 2*pad - f) / stride) + 1
n_W_new = int((n_W_old + 2*pad - f) / stride) + 1
y = np.zeros((n_H_new, n_W_new))
for h in range(n_H_new):
for v in range(n_W_new):
h_start = h * stride
h_end = h_start + f
v_start = v * stride
v_end = v_start + f
y[h, v] = np.sum(A_pad[h_start:h_end, v_start:v_end] * W) + b
return y
Thêm nhiều và khử nhiễu
1
2
3
4
5
img_noisy = gray_image + np.random.randn(*gray_image.shape) * 20
plt.imshow(img_noisy, cmap = 'gray')
plt.axis('off')
plt.show()
1
2
3
4
5
6
7
8
f = 3
k = 9
w = 1.0 / k**2 * np.ones((f, f))
img_denoised = conv2d_11(img_noisy, w)
plt.imshow(img_denoised, cmap = 'gray')
plt.axis('off')
plt.show()
Bộ lọc dò cạnh thẳng đứng trong ảnh
1
2
3
4
5
6
7
8
9
# Bo loc do canh thang dung trong anh
w = np.array([[-1, 0, 1],
[-1, 0, 1],
[-1, 0, 1]])
img_ver_edge = conv2d_11(gray_image, w)
plt.imshow(img_ver_edge, cmap = 'gray')
plt.axis('off')
plt.show()
Bộ lọc dò cạnh nằm ngang
1
2
3
4
5
6
7
8
# Bo loc do canh nam ngang.
w = np.array([[-1, -1, -1],
[0, 0, 0],
[1, 1, 1]])
img_hor_edge = conv2d_11(gray_image, w)
plt.imshow(img_hor_edge, cmap = 'gray')
plt.axis('off')
plt.show()
Tích chập với nhiều bộ lọc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
def conv2d_1n(A, W, b, stride = 1, pad = 0):
'''
A.shape = (n_H_old, n_W_old)
W.shape = (f, f, n_C)
b.shape = (n_C, )
output.shape = (n_H_new, n_W_new, n_C)
'''
n_H_old, n_W_old = A.shape
f, f, n_C = W.shape
A_pad = np.pad(A, pad_width=pad, mode = 'constant', constant_values = 0)
n_H_new = int((n_H_old + 2*pad - f)/stride) + 1
n_W_new = int((n_W_old + 2*pad - f)/stride) + 1
y = np.zeros((n_H_new, n_W_new, n_C))
for c in range(n_C):
for h in range(n_H_new):
for v in range(n_W_new):
h_start = h * stride
h_end = h_start + f
v_start = v * stride
v_end = v_start + f
y[h, v, c] = np.sum(A_pad[h_start:h_end, v_start:v_end] * W[..., c]) + b[c]
return y
Plot data với tích chập nhiều bộ lọc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
W = np.zeros((3, 3, 2))
W[:, :, 0] = np.array([
[-1, 0, 1],
[-1, 0, 1],
[-1, 0, 1]
])
W[:, :, 1] = np.array([
[-1, -1, -1],
[0, 0, 1],
[1, 1, 1]
])
b = np.zeros(2)
img_2 = conv2d_1n(gray_image, W, b)
plt.imshow(np.abs(img_2[:, :, 0]), 'gray')
plt.show()
1
2
plt.imshow(np.abs(img_2[:, :, 1]), 'gray')
plt.show()
Conv2D tổng quát
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# dau vao la m buc anh
def conv2d(A, W, b, stride = 1, pad = 0):
'''
A: input, A.shape = (m, in_height, in_width, in_channel)
W: filters, W.shape = (f, f, in_channel, out_channel)
b: biases, b.shape = (out_channel)
'''
assert A.shape[3] == W.shape[2]
m, in_height, in_width, _ = A.shape
f, _, in_channel, out_channel = W.shape
A_pad = np.pad(A, pad_width=pad, mode = 'constant', constant_values = 0)
out_height = int((in_height - f + 2 *pad) / stride) + 1
out_width = int((in_width - f + 2 * pad) / stride) + 1
y = np.zeros((m, out_height, out_width, out_channel))
for i in range (m):
for h in range(out_height):
for w in range(out_width):
for c in range(out_channel):
h_start = h * stride
h_end = h_start + f
w_start = w * stride
w_end = w_start + f
a_slide = A_pad[i, h_start:h_end, w_start:w_end, :]
y[i, h, w, c] = np.sum(a_slide * W[:, :, :, c]) + b[c]
return y
Link tham khảo
Tài liệu tham khảo
Machine learning cơ bản
Hết.