Python implementation of Feature Similarity Index (FSIM)

Vang-zAbout 1314 wordsAbout 4 min...

Abstract

FSIM^[1] is an image quality assessment metric proposed by Zhang et al. in 2011. Similar to the human visual system (HVS), it interprets images based on their low-level features. This article will introduce the FSIM metric in two parts: first, the mathematical model of FSIM will be presented, followed by its implementation using Python.

Formula

FSIM obtains results by combining Phase Consistency (PC) and Gradient Magnitude (GM). A higher FSIM value indicates that the two images are more similar. Its mathematical model is as follows:

FSIM = \frac{\sum_{x\in\Omega}S_LPC_{max}(x)}{\sum_{x\in\Omega}PC_{max}(x)}

S_L(x)=[S_{PC}(x)]^{\alpha}[S_G(x)]^{\beta}

S_{PC}(x)=\frac{2PC_1(x)PC_2(x)+T_1}{PC_1^2(x)+PC_2^2(x)+T_1}

S_G(x)=\frac{2G_1(x)G_2(x)+T_2}{G_1^2(x)+G_2^2(x)+T_2}

where, the phase congruency of $img_1$ and $img_2$ were denoted by $PC_1$ 和 $PC_2$ , respectively, $PC_{max} = max(PC_1, PC_2)$ . And the gradient magnitude of $img_1$ and $img_2$ were represented by $G_1$ and $G_2$ . $T_1$ and $T_2$ are two positive constants, respectively. $\alpha$ and $\beta$ are two constants, respectively.

Code

from PIL import Image
import numpy as np
from scipy import signal


def normalization(col, row):
    if col % 2 == 1:
        xrange = np.arange(-(col - 1) / 2, (col - 1) / 2 + 1) / (col - 1)
    else:
        xrange = np.arange(-col / 2, col / 2) / col

    if row % 2 == 1:
        yrange = np.arange(-(row - 1) / 2, (row - 1) / 2 + 1) / (row - 1)
    else:
        yrange = np.arange(-row / 2, row / 2) / row
    return xrange, yrange


def phase_cong2(im):
    nscale = 4
    norient = 4
    min_wave_length = 6
    mult = 2
    sigma_onf = 0.55
    d_theta_on_sigma = 1.2
    k = 2.0
    epsilon = 0.0001
    theta_sigma = np.pi / norient / d_theta_on_sigma

    row, col = im.shape
    image_fft = np.fft.fft2(im)

    zero = np.zeros((row, col))
    eo = np.zeros((nscale, norient, row, col), dtype=complex)

    est_mean_e2n = np.zeros(norient)
    ifft_filter_array = np.zeros((nscale, row, col))
    xrange, yrange = normalization(col, row)
    x, y = np.meshgrid(xrange, yrange)
    radius = np.sqrt(x ** 2 + y ** 2)
    theta = np.arctan2(-y, x)
    radius = np.fft.ifftshift(radius)
    theta = np.fft.ifftshift(theta)
    radius[0, 0] = 1

    sintheta = np.sin(theta)
    costheta = np.cos(theta)

    lp = low_pass_filter((row, col), 0.45, 15)
    log_gabor = np.zeros((nscale, row, col))

    for s in range(nscale):
        wavelength = min_wave_length * mult ** s
        fo = 1.0 / wavelength
        log_gabor[s] = np.exp(-(np.log(radius / fo)) ** 2 / (2 * np.log(sigma_onf) ** 2))
        log_gabor[s] = log_gabor[s] * lp
        log_gabor[s, 0, 0] = 0

    spread = np.zeros((norient, row, col))

    for o in range(norient):
        angl = o * np.pi / norient
        ds = sintheta * np.cos(angl) - costheta * np.sin(angl)
        dc = costheta * np.cos(angl) + sintheta * np.sin(angl)
        dtheta = np.abs(np.arctan2(ds, dc))
        spread[o] = np.exp(-(dtheta ** 2) / (2 * theta_sigma ** 2))

    energy_all = np.zeros((row, col))
    an_all = np.zeros((row, col))

    for o in range(norient):
        sum_e_this_orient = zero.copy()
        sum_o_this_orient = zero.copy()
        sum_an_this_orient = zero.copy()
        energy = zero.copy()

        max_an = 0
        em_n = epsilon
        for s in range(nscale):
            filter_val = log_gabor[s] * spread[o]
            ifft_filt = np.real(np.fft.ifft2(filter_val)) * np.sqrt(row * col)
            ifft_filter_array[s] = ifft_filt
            eo[s, o] = np.fft.ifft2(image_fft * filter_val)

            an = np.abs(eo[s, o])
            sum_an_this_orient = sum_an_this_orient + an
            sum_e_this_orient = sum_e_this_orient + np.real(eo[s, o])
            sum_o_this_orient = sum_o_this_orient + np.imag(eo[s, o])

            if s == 0:
                em_n = np.sum(filter_val ** 2)
                max_an = an
            else:
                max_an = np.maximum(max_an, an)

        x_energy = np.sqrt(sum_e_this_orient ** 2 + sum_o_this_orient ** 2) + epsilon
        mean_e = sum_e_this_orient / x_energy
        mean_o = sum_o_this_orient / x_energy

        for s in range(nscale):
            e = np.real(eo[s, o])
            o_val = np.imag(eo[s, o])
            energy = energy + e * mean_e + o_val * mean_o - np.abs(e * mean_o - o_val * mean_e)

        median_e2n = np.median(np.abs(eo[0, o]) ** 2)
        mean_e2n = -median_e2n / np.log(0.5)
        est_mean_e2n[o] = mean_e2n
        noise_power = mean_e2n / em_n

        est_sum_an2 = zero.copy()
        for s in range(nscale):
            est_sum_an2 = est_sum_an2 + ifft_filter_array[s] ** 2

        est_sum_aiaj = zero.copy()
        for si in range(nscale - 1):
            for sj in range(si + 1, nscale):
                est_sum_aiaj = est_sum_aiaj + ifft_filter_array[si] * ifft_filter_array[sj]

        sum_est_sum_an2 = np.sum(est_sum_an2)
        sum_est_sum_aiaj = np.sum(est_sum_aiaj)

        est_noise_energy2 = 2 * noise_power * sum_est_sum_an2 + 4 * noise_power * sum_est_sum_aiaj
        tau = np.sqrt(est_noise_energy2 / 2)
        est_noise_energy = tau * np.sqrt(np.pi / 2)
        est_noise_energy_sigma = np.sqrt((2 - np.pi / 2) * tau ** 2)
        t = est_noise_energy + k * est_noise_energy_sigma

        t = t / 1.7
        energy = np.maximum(energy - t, zero)
        energy_all = energy_all + energy
        an_all = an_all + sum_an_this_orient

    result_pc = energy_all / an_all
    return result_pc


def low_pass_filter(sze, cutoff, n):
    if cutoff < 0 or cutoff > 0.5:
        raise ValueError('cutoff frequency must be between 0 and 0.5')
    if n % 1 != 0 or n < 1:
        raise ValueError('n must be an integer >= 1')
    if len(sze) == 1:
        row = sze
        col = sze
    else:
        row = sze[0]
        col = sze[1]
    xrange, yrange = normalization(col, row)
    x, y = np.meshgrid(xrange, yrange)
    radius = np.sqrt(x ** 2 + y ** 2)
    f = np.fft.ifftshift(1.0 / (1.0 + (radius / cutoff) ** (2 * n)))

    return f


def fsim(img_1, img_2):
    row, col = img_1.shape[:2]
    i1 = np.ones((row, col))
    i2 = np.ones((row, col))
    q1 = np.ones((row, col))
    q2 = np.ones((row, col))

    if len(img_1.shape) == 3:
        y1 = (0.299 * img_1[:, :, 0].astype(float) + 0.587 * img_1[:, :, 1].astype(float) +
              0.114 * img_1[:, :, 2].astype(float))
        y2 = (0.299 * img_2[:, :, 0].astype(float) + 0.587 * img_2[:, :, 1].astype(float) +
              0.114 * img_2[:, :, 2].astype(float))
        i1 = (0.596 * img_1[:, :, 0].astype(float) - 0.274 * img_1[:, :, 1].astype(float) -
              0.322 * img_1[:, :, 2].astype(float))
        i2 = (0.596 * img_2[:, :, 0].astype(float) - 0.274 * img_2[:, :, 1].astype(float) -
              0.322 * img_2[:, :, 2].astype(float))
        q1 = (0.211 * img_1[:, :, 0].astype(float) - 0.523 * img_1[:, :, 1].astype(float) +
              0.312 * img_1[:, :, 2].astype(float))
        q2 = (0.211 * img_2[:, :, 0].astype(float) - 0.523 * img_2[:, :, 1].astype(float) +
              0.312 * img_2[:, :, 2].astype(float))
    else:
        y1 = img_1.astype(float)
        y2 = img_2.astype(float)

    y1 = y1.astype(float)
    y2 = y2.astype(float)
    min_dimension = min(row, col)
    f = max(1, round(min_dimension / 256))
    ave_kernel = np.ones((f, f)) / (f * f)

    ave_i1 = signal.convolve2d(i1, ave_kernel, mode='same')
    ave_i2 = signal.convolve2d(i2, ave_kernel, mode='same')
    i1 = ave_i1[0:row:f, 0:col:f]
    i2 = ave_i2[0:row:f, 0:col:f]

    ave_q1 = signal.convolve2d(q1, ave_kernel, mode='same')
    ave_q2 = signal.convolve2d(q2, ave_kernel, mode='same')
    q1 = ave_q1[0:row:f, 0:col:f]
    q2 = ave_q2[0:row:f, 0:col:f]

    ave_y1 = signal.convolve2d(y1, ave_kernel, mode='same')
    ave_y2 = signal.convolve2d(y2, ave_kernel, mode='same')
    y1 = ave_y1[0:row:f, 0:col:f]
    y2 = ave_y2[0:row:f, 0:col:f]

    pc1 = phase_cong2(y1)
    pc2 = phase_cong2(y2)

    dx = np.array([[3, 0, -3], [10, 0, -10], [3, 0, -3]]) / 16
    dy = np.array([[3, 10, 3], [0, 0, 0], [-3, -10, -3]]) / 16
    ix_y1 = signal.convolve2d(y1, dx, mode='same')
    iy_y1 = signal.convolve2d(y1, dy, mode='same')
    gradient_map1 = np.sqrt(ix_y1 ** 2 + iy_y1 ** 2)

    ix_y2 = signal.convolve2d(y2, dx, mode='same')
    iy_y2 = signal.convolve2d(y2, dy, mode='same')
    gradient_map2 = np.sqrt(ix_y2 ** 2 + iy_y2 ** 2)

    t1 = 0.85
    t2 = 160
    pc_sim_matrix = (2 * pc1 * pc2 + t1) / (pc1 ** 2 + pc2 ** 2 + t1)
    gradient_sim_matrix = (2 * gradient_map1 * gradient_map2 + t2) / (gradient_map1 ** 2 + gradient_map2 ** 2 + t2)
    pcm = np.maximum(pc1, pc2)
    sim_matrix = gradient_sim_matrix * pc_sim_matrix * pcm
    fsim_value = np.sum(sim_matrix) / np.sum(pcm)

    t3 = 200
    t4 = 200
    i_sim_matrix = (2 * i1 * i2 + t3) / (i1 ** 2 + i2 ** 2 + t3)
    q_sim_matrix = (2 * q1 * q2 + t4) / (q1 ** 2 + q2 ** 2 + t4)

    lambda_val = 0.03

    sim_matrix_c = gradient_sim_matrix * pc_sim_matrix * np.real((i_sim_matrix * q_sim_matrix) ** lambda_val) * pcm
    fsimc_value = np.sum(sim_matrix_c) / np.sum(pcm)

    return fsim_value, fsimc_value


if __name__ == '__main__':
    img1 = np.array(Image.open('img1.jpg').convert('L'))
    img2 = np.array(Image.open('img2.jpg').convert('L'))
    print(f'fsim: {fsim(img1, img2)}')

AliPay
WeChat

References

L. Zhang, L. Zhang, X. Mou and D. Zhang, "FSIM: A Feature Similarity Index for Image Quality Assessment," in IEEE Transactions on Image Processing, 20, 8, 2378-2386 (2011). ↩︎

NickName

E-Mail(Optional)

Comments

Latest
Oldest
Hottest

Python implementation of Feature Similarity Index (FSIM)

Abstract

Formula

Code

References

Preview: