import numpy as np
import matplotlib.pyplot as plt
2D Convolution
= plt.imread("Image.jpeg")
img = np.shape(img)
Nx, Ny, Nz print(f"Height: {Nx}, Width: {Ny}, RGB: {Nz}")
plt.imshow(img) plt.show()
Height: 3264, Width: 4928, RGB: 3
print(img)
[[[182 202 227]
[172 192 217]
[174 194 219]
...
[216 229 246]
[206 219 236]
[196 209 226]]
[[189 209 234]
[179 199 224]
[179 199 224]
...
[213 226 243]
[211 224 241]
[210 223 240]]
[[191 211 236]
[182 202 227]
[180 200 225]
...
[208 221 238]
[214 227 244]
[219 232 249]]
...
[[ 60 107 137]
[ 59 106 136]
[ 63 110 140]
...
[ 57 89 110]
[ 59 91 116]
[ 57 89 114]]
[[ 60 107 137]
[ 58 105 135]
[ 60 105 136]
...
[ 59 91 112]
[ 59 89 115]
[ 55 85 111]]
[[ 61 108 138]
[ 57 104 134]
[ 57 102 133]
...
[ 45 77 98]
[ 54 81 108]
[ 57 84 111]]]
The RGB Channels
= img.copy(), img.copy(), img.copy()
imgR, imgG, imgB 1, 2)] = 0
imgR[:, :, (0, 2)] = 0
imgG[:, :, (0, 1)] = 0
imgB[:, :, (= plt.subplots(nrows = 1, ncols = 3, figsize=(15, 15))
fig, ax 0].imshow(imgR)
ax[1].imshow(imgG)
ax[2].imshow(imgB)
ax[ plt.show()
The Grayscale Image
= [0.2989, 0.5870, 0.1140]
rgb_weights = np.dot(img, rgb_weights)
grayscale_image = "gray")
plt.imshow(grayscale_image, cmap plt.show()
print(np.shape(grayscale_image))
print(grayscale_image)
(3264, 4928)
[[198.8518 188.8528 190.8526 ... 227.0294 217.0304 207.0314]
[205.8511 195.8521 195.8521 ... 224.0297 222.0299 221.03 ]
[207.8509 198.8518 196.852 ... 219.0302 225.0296 230.0291]
...
[ 96.361 95.3611 99.3607 ... 81.8203 84.2761 82.2763]
[ 96.361 94.3612 95.073 ... 83.8201 82.9881 78.9885]
[ 97.3609 93.3613 92.0733 ... 69.8215 75.9996 78.9993]]
Sobel Operators - The Edge-detecting Kernels
\(G_x = \begin{bmatrix}1 & 0 & -1 \\ 2 & 0 & -2 \\ 1 & 0 & -1 \end{bmatrix}\) and \(G_y = \begin{bmatrix}1 & 2 & 1 \\ 0 & 0 & 0 \\ -1 & -2 & -1 \end{bmatrix}\)
= np.array([[1.0, 0.0, -1.0], [2.0, 0.0, -2.0], [1.0, 0.0, -1.0]])
Gx = np.array([[1.0, 2.0, 1.0], [0.0, 0.0, 0.0], [-1.0, -2.0, -1.0]]) Gy
Implementing the 2d Convolution
\(A = \begin{bmatrix}1 & 2 & 3 \\ 4 & 5 & 6 \\ 7 & 8 & 9 \end{bmatrix}\) when zero padded by 1 pixel gives: \(A' = \begin{bmatrix}0 & 0 & 0 & 0 & 0 \\ 0 & 1 & 2 & 3 & 0 \\ 0 & 4 & 5 & 6 & 0 \\ 0 & 7 & 8 & 9 & 0 \\ 0 & 0 & 0 & 0 & 0\end{bmatrix}\)
This is achieved using the NumPy’s .pad() function.
= np.pad(A, padding = 1, mode = "constant") A_padded
Also before proceeding with the convolution, the kernel must be flipped Left-Right and then Upside-Down
\(ker = \begin{bmatrix}a & b & c \\ d & e & f \\ g & h & i \end{bmatrix} ⟶ \begin{bmatrix}c & b & a \\ f & e & d \\ i & h & g \end{bmatrix} ⟶ \begin{bmatrix}i & h & g \\ f & e & d \\ c & b & a \end{bmatrix} = ker'\)
This is achieved as:
= np.flipud(np.fliplr(ker)) ker_flipped
fliplr denoting a left-right flip and flipud denoting a up-down flip. Choose a stride of length 1 and perform the convolution as the dot product of kernel sized chunks of \(A\) with the \(ker\):
\(\begin{bmatrix}0 & 0 & 0 \\ 0 & 1 & 2 \\ 0 & 4 & 5 \end{bmatrix} \cdot \begin{bmatrix}i & h & g \\ f & e & d \\ c & b & a \end{bmatrix} = elt_1\)
\(\begin{bmatrix}0 & 0 & 0 \\ 1 & 2 & 3 \\ 4 & 5 & 6 \end{bmatrix} \cdot \begin{bmatrix}i & h & g \\ f & e & d \\ c & b & a \end{bmatrix} = elt_2\)
\(\vdots\)
\(\begin{bmatrix}5 & 6 & 0 \\ 8 & 9 & 0 \\ 0 & 0 & 0 \end{bmatrix} \cdot \begin{bmatrix}i & h & g \\ f & e & d \\ c & b & a \end{bmatrix} = elt_N\)
Notice the dimensions of the final output matrix:
\[\begin{equation} R_{\text{height}} = \frac{A_{\text{height}} + 2\cdot\text{padding} - ker_{\text{height}}}{\text{stride}} + 1 \end{equation}\]
\[\begin{equation} R_{\text{width}} = \frac{A_{\text{width}} + 2\cdot\text{padding} - ker_{\text{width}}}{\text{stride}} + 1 \end{equation}\]
print(Gx); print()
print(np.fliplr(Gx)); print()
print(np.flipud(Gx))
[[ 1. 0. -1.]
[ 2. 0. -2.]
[ 1. 0. -1.]]
[[-1. 0. 1.]
[-2. 0. 2.]
[-1. 0. 1.]]
[[ 1. 0. -1.]
[ 2. 0. -2.]
[ 1. 0. -1.]]
def convolve2d(image, kernel, padding, stride):
= image.shape
image_height, image_width = kernel.shape
kernel_height, kernel_width
= (image_height + 2 * padding - kernel_height) // stride + 1
output_height = (image_width + 2 * padding - kernel_width) // stride + 1
output_width = np.zeros((output_height, output_width))
output
= np.pad(image, padding, mode = "constant")
padded_image = np.flipud(np.fliplr(kernel))
kernel
for i in range(0, output_height, stride):
for j in range(0, output_width, stride):
= np.sum(padded_image[i : i + kernel_height, j : j+kernel_width] * kernel)
output[i, j]
return output
= convolve2d(grayscale_image, Gx, 1, 1)
imgX = convolve2d(grayscale_image, Gy, 1, 1) imgY
= plt.subplots(nrows = 1, ncols = 2, figsize=(15, 15))
fig, ax 0].imshow(imgX, cmap = "gray")
ax[0].set_title("X-Gradient")
ax[1].imshow(imgY, cmap = "gray")
ax[1].set_title("Y-Gradient")
ax[ plt.show()
= np.sqrt(imgX**2 + imgY**2)
sobel_final = "gray")
plt.imshow(sobel_final, cmap plt.show()
= plt.subplots(nrows = 1, ncols = 3, figsize=(15, 15))
fig, ax 0].imshow(img)
ax[1].imshow(grayscale_image, cmap = "gray")
ax[2].imshow(sobel_final, cmap = "gray")
ax[ plt.show()
"Sobel.jpeg", sobel_final, cmap = "gray") plt.imsave(
Wrapped Up Function
def edge_detect(image_org):
= 1, 1
padding, stride
= [0.2989, 0.5870, 0.1140]
rgb_weights = np.dot(image_org, rgb_weights)
image
= np.array([[1.0, 0.0, -1.0], [2.0, 0.0, -2.0], [1.0, 0.0, -1.0]])
Gx = np.array([[1.0, 2.0, 1.0], [0.0, 0.0, 0.0], [-1.0, -2.0, -1.0]])
Gy
= image.shape
image_height, image_width
= (image_height + 2 * padding - 3) // stride + 1
output_height = (image_width + 2 * padding - 3) // stride + 1
output_width = np.zeros((output_height, output_width))
A_sobel
= np.pad(image, padding, mode = "constant")
padded_image = np.flipud(np.fliplr(Gx))
Gx = np.flipud(np.fliplr(Gy))
Gy
for i in range(0, output_height, stride):
for j in range(0, output_width, stride):
= (np.sum(padded_image[i : i + 3, j : j + 3] * Gx)**2 + np.sum(padded_image[i : i + 3, j : j + 3] * Gy)**2)**0.5
A_sobel[i, j]
"Edge.jpeg", A_sobel, cmap = "gray")
plt.imsave(= plt.subplots(nrows = 1, ncols = 2, figsize=(15, 15))
fig, ax 0].imshow(image_org)
ax[0].set_title("Original Image")
ax[1].imshow(A_sobel, cmap = "gray")
ax[1].set_title("Edge-Detected")
ax[ plt.show()
edge_detect(img)