Python implementation of Feature Similarity Index (FSIM)
Abstract
FSIM[1] is an image quality assessment metric proposed by Zhang et al. in 2011. Similar to the human visual system (HVS), it interprets images based on their low-level features. This article will introduce the FSIM metric in two parts: first, the mathematical model of FSIM will be presented, followed by its implementation using Python.
Formula
FSIM obtains results by combining Phase Consistency (PC) and Gradient Magnitude (GM). A higher FSIM value indicates that the two images are more similar. Its mathematical model is as follows:
where, the phase congruency of and were denoted by 和 , respectively, . And the gradient magnitude of and were represented by and . and are two positive constants, respectively. and are two constants, respectively.
Code
from PIL import Image
import numpy as np
from scipy import signal
def normalization(col, row):
if col % 2 == 1:
xrange = np.arange(-(col - 1) / 2, (col - 1) / 2 + 1) / (col - 1)
else:
xrange = np.arange(-col / 2, col / 2) / col
if row % 2 == 1:
yrange = np.arange(-(row - 1) / 2, (row - 1) / 2 + 1) / (row - 1)
else:
yrange = np.arange(-row / 2, row / 2) / row
return xrange, yrange
def phase_cong2(im):
nscale = 4
norient = 4
min_wave_length = 6
mult = 2
sigma_onf = 0.55
d_theta_on_sigma = 1.2
k = 2.0
epsilon = 0.0001
theta_sigma = np.pi / norient / d_theta_on_sigma
row, col = im.shape
image_fft = np.fft.fft2(im)
zero = np.zeros((row, col))
eo = np.zeros((nscale, norient, row, col), dtype=complex)
est_mean_e2n = np.zeros(norient)
ifft_filter_array = np.zeros((nscale, row, col))
xrange, yrange = normalization(col, row)
x, y = np.meshgrid(xrange, yrange)
radius = np.sqrt(x ** 2 + y ** 2)
theta = np.arctan2(-y, x)
radius = np.fft.ifftshift(radius)
theta = np.fft.ifftshift(theta)
radius[0, 0] = 1
sintheta = np.sin(theta)
costheta = np.cos(theta)
lp = low_pass_filter((row, col), 0.45, 15)
log_gabor = np.zeros((nscale, row, col))
for s in range(nscale):
wavelength = min_wave_length * mult ** s
fo = 1.0 / wavelength
log_gabor[s] = np.exp(-(np.log(radius / fo)) ** 2 / (2 * np.log(sigma_onf) ** 2))
log_gabor[s] = log_gabor[s] * lp
log_gabor[s, 0, 0] = 0
spread = np.zeros((norient, row, col))
for o in range(norient):
angl = o * np.pi / norient
ds = sintheta * np.cos(angl) - costheta * np.sin(angl)
dc = costheta * np.cos(angl) + sintheta * np.sin(angl)
dtheta = np.abs(np.arctan2(ds, dc))
spread[o] = np.exp(-(dtheta ** 2) / (2 * theta_sigma ** 2))
energy_all = np.zeros((row, col))
an_all = np.zeros((row, col))
for o in range(norient):
sum_e_this_orient = zero.copy()
sum_o_this_orient = zero.copy()
sum_an_this_orient = zero.copy()
energy = zero.copy()
max_an = 0
em_n = epsilon
for s in range(nscale):
filter_val = log_gabor[s] * spread[o]
ifft_filt = np.real(np.fft.ifft2(filter_val)) * np.sqrt(row * col)
ifft_filter_array[s] = ifft_filt
eo[s, o] = np.fft.ifft2(image_fft * filter_val)
an = np.abs(eo[s, o])
sum_an_this_orient = sum_an_this_orient + an
sum_e_this_orient = sum_e_this_orient + np.real(eo[s, o])
sum_o_this_orient = sum_o_this_orient + np.imag(eo[s, o])
if s == 0:
em_n = np.sum(filter_val ** 2)
max_an = an
else:
max_an = np.maximum(max_an, an)
x_energy = np.sqrt(sum_e_this_orient ** 2 + sum_o_this_orient ** 2) + epsilon
mean_e = sum_e_this_orient / x_energy
mean_o = sum_o_this_orient / x_energy
for s in range(nscale):
e = np.real(eo[s, o])
o_val = np.imag(eo[s, o])
energy = energy + e * mean_e + o_val * mean_o - np.abs(e * mean_o - o_val * mean_e)
median_e2n = np.median(np.abs(eo[0, o]) ** 2)
mean_e2n = -median_e2n / np.log(0.5)
est_mean_e2n[o] = mean_e2n
noise_power = mean_e2n / em_n
est_sum_an2 = zero.copy()
for s in range(nscale):
est_sum_an2 = est_sum_an2 + ifft_filter_array[s] ** 2
est_sum_aiaj = zero.copy()
for si in range(nscale - 1):
for sj in range(si + 1, nscale):
est_sum_aiaj = est_sum_aiaj + ifft_filter_array[si] * ifft_filter_array[sj]
sum_est_sum_an2 = np.sum(est_sum_an2)
sum_est_sum_aiaj = np.sum(est_sum_aiaj)
est_noise_energy2 = 2 * noise_power * sum_est_sum_an2 + 4 * noise_power * sum_est_sum_aiaj
tau = np.sqrt(est_noise_energy2 / 2)
est_noise_energy = tau * np.sqrt(np.pi / 2)
est_noise_energy_sigma = np.sqrt((2 - np.pi / 2) * tau ** 2)
t = est_noise_energy + k * est_noise_energy_sigma
t = t / 1.7
energy = np.maximum(energy - t, zero)
energy_all = energy_all + energy
an_all = an_all + sum_an_this_orient
result_pc = energy_all / an_all
return result_pc
def low_pass_filter(sze, cutoff, n):
if cutoff < 0 or cutoff > 0.5:
raise ValueError('cutoff frequency must be between 0 and 0.5')
if n % 1 != 0 or n < 1:
raise ValueError('n must be an integer >= 1')
if len(sze) == 1:
row = sze
col = sze
else:
row = sze[0]
col = sze[1]
xrange, yrange = normalization(col, row)
x, y = np.meshgrid(xrange, yrange)
radius = np.sqrt(x ** 2 + y ** 2)
f = np.fft.ifftshift(1.0 / (1.0 + (radius / cutoff) ** (2 * n)))
return f
def fsim(img_1, img_2):
row, col = img_1.shape[:2]
i1 = np.ones((row, col))
i2 = np.ones((row, col))
q1 = np.ones((row, col))
q2 = np.ones((row, col))
if len(img_1.shape) == 3:
y1 = (0.299 * img_1[:, :, 0].astype(float) + 0.587 * img_1[:, :, 1].astype(float) +
0.114 * img_1[:, :, 2].astype(float))
y2 = (0.299 * img_2[:, :, 0].astype(float) + 0.587 * img_2[:, :, 1].astype(float) +
0.114 * img_2[:, :, 2].astype(float))
i1 = (0.596 * img_1[:, :, 0].astype(float) - 0.274 * img_1[:, :, 1].astype(float) -
0.322 * img_1[:, :, 2].astype(float))
i2 = (0.596 * img_2[:, :, 0].astype(float) - 0.274 * img_2[:, :, 1].astype(float) -
0.322 * img_2[:, :, 2].astype(float))
q1 = (0.211 * img_1[:, :, 0].astype(float) - 0.523 * img_1[:, :, 1].astype(float) +
0.312 * img_1[:, :, 2].astype(float))
q2 = (0.211 * img_2[:, :, 0].astype(float) - 0.523 * img_2[:, :, 1].astype(float) +
0.312 * img_2[:, :, 2].astype(float))
else:
y1 = img_1.astype(float)
y2 = img_2.astype(float)
y1 = y1.astype(float)
y2 = y2.astype(float)
min_dimension = min(row, col)
f = max(1, round(min_dimension / 256))
ave_kernel = np.ones((f, f)) / (f * f)
ave_i1 = signal.convolve2d(i1, ave_kernel, mode='same')
ave_i2 = signal.convolve2d(i2, ave_kernel, mode='same')
i1 = ave_i1[0:row:f, 0:col:f]
i2 = ave_i2[0:row:f, 0:col:f]
ave_q1 = signal.convolve2d(q1, ave_kernel, mode='same')
ave_q2 = signal.convolve2d(q2, ave_kernel, mode='same')
q1 = ave_q1[0:row:f, 0:col:f]
q2 = ave_q2[0:row:f, 0:col:f]
ave_y1 = signal.convolve2d(y1, ave_kernel, mode='same')
ave_y2 = signal.convolve2d(y2, ave_kernel, mode='same')
y1 = ave_y1[0:row:f, 0:col:f]
y2 = ave_y2[0:row:f, 0:col:f]
pc1 = phase_cong2(y1)
pc2 = phase_cong2(y2)
dx = np.array([[3, 0, -3], [10, 0, -10], [3, 0, -3]]) / 16
dy = np.array([[3, 10, 3], [0, 0, 0], [-3, -10, -3]]) / 16
ix_y1 = signal.convolve2d(y1, dx, mode='same')
iy_y1 = signal.convolve2d(y1, dy, mode='same')
gradient_map1 = np.sqrt(ix_y1 ** 2 + iy_y1 ** 2)
ix_y2 = signal.convolve2d(y2, dx, mode='same')
iy_y2 = signal.convolve2d(y2, dy, mode='same')
gradient_map2 = np.sqrt(ix_y2 ** 2 + iy_y2 ** 2)
t1 = 0.85
t2 = 160
pc_sim_matrix = (2 * pc1 * pc2 + t1) / (pc1 ** 2 + pc2 ** 2 + t1)
gradient_sim_matrix = (2 * gradient_map1 * gradient_map2 + t2) / (gradient_map1 ** 2 + gradient_map2 ** 2 + t2)
pcm = np.maximum(pc1, pc2)
sim_matrix = gradient_sim_matrix * pc_sim_matrix * pcm
fsim_value = np.sum(sim_matrix) / np.sum(pcm)
t3 = 200
t4 = 200
i_sim_matrix = (2 * i1 * i2 + t3) / (i1 ** 2 + i2 ** 2 + t3)
q_sim_matrix = (2 * q1 * q2 + t4) / (q1 ** 2 + q2 ** 2 + t4)
lambda_val = 0.03
sim_matrix_c = gradient_sim_matrix * pc_sim_matrix * np.real((i_sim_matrix * q_sim_matrix) ** lambda_val) * pcm
fsimc_value = np.sum(sim_matrix_c) / np.sum(pcm)
return fsim_value, fsimc_value
if __name__ == '__main__':
img1 = np.array(Image.open('img1.jpg').convert('L'))
img2 = np.array(Image.open('img2.jpg').convert('L'))
print(f'fsim: {fsim(img1, img2)}')