这是我想做的:

我定期用网络摄像头拍照。就像时间流逝一样。然而,如果没有什么真正的改变,也就是说,图片看起来几乎相同,我不想存储最新的快照。

我想有某种方法可以量化这种差异,我必须根据经验确定一个阈值。

我追求的是简单而不是完美。 我用的是python。


当前回答

import os
from PIL import Image
from PIL import ImageFile
import imagehash
  
#just use to the size diferent picture
def compare_image(img_file1, img_file2):
    if img_file1 == img_file2:
        return True
    fp1 = open(img_file1, 'rb')
    fp2 = open(img_file2, 'rb')

    img1 = Image.open(fp1)
    img2 = Image.open(fp2)

    ImageFile.LOAD_TRUNCATED_IMAGES = True
    b = img1 == img2

    fp1.close()
    fp2.close()

    return b





#through picturu hash to compare
def get_hash_dict(dir):
    hash_dict = {}
    image_quantity = 0
    for _, _, files in os.walk(dir):
        for i, fileName in enumerate(files):
            with open(dir + fileName, 'rb') as fp:
                hash_dict[dir + fileName] = imagehash.average_hash(Image.open(fp))
                image_quantity += 1

    return hash_dict, image_quantity

def compare_image_with_hash(image_file_name_1, image_file_name_2, max_dif=0):
    """
    max_dif: The maximum hash difference is allowed, the smaller and more accurate, the minimum is 0.
    recommend to use
    """
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    hash_1 = None
    hash_2 = None
    with open(image_file_name_1, 'rb') as fp:
        hash_1 = imagehash.average_hash(Image.open(fp))
    with open(image_file_name_2, 'rb') as fp:
        hash_2 = imagehash.average_hash(Image.open(fp))
    dif = hash_1 - hash_2
    if dif < 0:
        dif = -dif
    if dif <= max_dif:
        return True
    else:
        return False


def compare_image_dir_with_hash(dir_1, dir_2, max_dif=0):
    """
    max_dif: The maximum hash difference is allowed, the smaller and more accurate, the minimum is 0.

    """
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    hash_dict_1, image_quantity_1 = get_hash_dict(dir_1)
    hash_dict_2, image_quantity_2 = get_hash_dict(dir_2)

    if image_quantity_1 > image_quantity_2:
        tmp = image_quantity_1
        image_quantity_1 = image_quantity_2
        image_quantity_2 = tmp

        tmp = hash_dict_1
        hash_dict_1 = hash_dict_2
        hash_dict_2 = tmp

    result_dict = {}

    for k in hash_dict_1.keys():
        result_dict[k] = None

    for dif_i in range(0, max_dif + 1):
        have_none = False

        for k_1 in result_dict.keys():
            if result_dict.get(k_1) is None:
                have_none = True

        if not have_none:
            return result_dict

        for k_1, v_1 in hash_dict_1.items():
            for k_2, v_2 in hash_dict_2.items():
                sub = (v_1 - v_2)
                if sub < 0:
                    sub = -sub
                if sub == dif_i and result_dict.get(k_1) is None:
                    result_dict[k_1] = k_2
                    break
    return result_dict


def main():
    print(compare_image('image1\\815.jpg', 'image2\\5.jpg'))
    print(compare_image_with_hash('image1\\815.jpg', 'image2\\5.jpg', 7))
    r = compare_image_dir_with_hash('image1\\', 'image2\\', 10)
    for k in r.keys():
        print(k, r.get(k))


if __name__ == '__main__':
    main()

输出: 假 真正的 image2 jpg image1 5. \ \ 815. jpg image2 jpg image1 6. \ \ 819. jpg image2 jpg image1 7. \ \ 900. jpg image2 jpg image1 8. \ \ 998. jpg image2 jpg image1 9. \ \ 1012. jpg 示例图片: 815. jpg 5. jpg

其他回答

两种流行且相对简单的方法是:(a)已经提出的欧几里得距离,或(b)标准化互相关。与简单的互相关相比,归一化互相关对光照变化的影响明显更强。维基百科给出了一个标准化互相关的公式。更复杂的方法也存在,但它们需要更多的工作。

使用numpy-like语法,

dist_euclidean = sqrt(sum((i1 - i2)^2)) / i1.size

dist_manhattan = sum(abs(i1 - i2)) / i1.size

dist_ncc = sum( (i1 - mean(i1)) * (i2 - mean(i2)) ) / (
  (i1.size - 1) * stdev(i1) * stdev(i2) )

假设i1和i2为二维灰度图像阵列。

看看Haar小波是如何由isk-daemon实现的。你可以使用它的imgdb c++代码来实时计算图像之间的差异:

disk -daemon是一个开源的数据库服务器,能够将基于内容的(可视的)图像搜索添加到任何与图像相关的网站或软件。 这项技术允许任何与图像相关的网站或软件的用户在小部件上绘制他们想要查找的图像,并让网站回复他们最相似的图像或简单地在每个图像详细页面请求更多相似的照片。

有很多指标可以用来评估两张图片是否像/有多像。

这里我就不讲代码了,因为我认为这应该是一个科学问题,而不是技术问题。

一般来说,问题与人类对图像的感知有关,因此每种算法都有其对人类视觉系统特征的支持。

经典方法有:

可见差异预测器:一种评估图像保真度的算法(https://www.spiedigitallibrary.org/conference-proceedings-of-spie/1666/0000/Visible-differences-predictor--an-algorithm-for-the-assessment-of/10.1117/12.135952.short?SSO=1)

图像质量评估:从错误可见性到结构相似性(http://www.cns.nyu.edu/pub/lcv/wang03-reprint.pdf)

FSIM:一种用于图像质量评估的特征相似度指数(https://www4.comp.polyu.edu.hk/~cslzhang/IQA/TIP_IQA_FSIM.pdf)

其中,SSIM (Image Quality Assessment: From Error Visibility to Structural Similarity)是最容易计算的,其开销也较小,另一篇论文《基于梯度相似度的图像质量评估》(https://www.semanticscholar.org/paper/Image-Quality-Assessment-Based-on-Gradient-Liu-Lin/2b819bef80c02d5d4cb56f27b202535e119df988)也有报道。

还有很多其他的方法。如果你对艺术感兴趣或真正关心,可以在谷歌Scholar上搜索“视觉差异”、“图像质量评估”等。

下面是我写的一个函数,它以2个图像(文件路径)作为参数,并返回两个图像“像素”组件之间的平均差值。这对我确定视觉上“相等”的图像(当它们不==相等时)非常有效。

(我发现8个是判断图像本质上是否相同的一个很好的限制。)

(如果不添加预处理,图像必须具有相同的尺寸。)

from PIL import Image

def imagesDifference( imageA, imageB ):
    A = list(Image.open(imageA, r'r').convert(r'RGB').getdata())
    B = list(Image.open(imageB, r'r').convert(r'RGB').getdata())
    if (len(A) != len(B)): return -1
    diff = []
    for i in range(0, len(A)):
        diff += [abs(A[i][0] - B[i][0]), abs(A[i][1] - B[i][1]), abs(A[i][2] - B[i][2])]
    return (sum(diff) / len(diff))
import os
from PIL import Image
from PIL import ImageFile
import imagehash
  
#just use to the size diferent picture
def compare_image(img_file1, img_file2):
    if img_file1 == img_file2:
        return True
    fp1 = open(img_file1, 'rb')
    fp2 = open(img_file2, 'rb')

    img1 = Image.open(fp1)
    img2 = Image.open(fp2)

    ImageFile.LOAD_TRUNCATED_IMAGES = True
    b = img1 == img2

    fp1.close()
    fp2.close()

    return b





#through picturu hash to compare
def get_hash_dict(dir):
    hash_dict = {}
    image_quantity = 0
    for _, _, files in os.walk(dir):
        for i, fileName in enumerate(files):
            with open(dir + fileName, 'rb') as fp:
                hash_dict[dir + fileName] = imagehash.average_hash(Image.open(fp))
                image_quantity += 1

    return hash_dict, image_quantity

def compare_image_with_hash(image_file_name_1, image_file_name_2, max_dif=0):
    """
    max_dif: The maximum hash difference is allowed, the smaller and more accurate, the minimum is 0.
    recommend to use
    """
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    hash_1 = None
    hash_2 = None
    with open(image_file_name_1, 'rb') as fp:
        hash_1 = imagehash.average_hash(Image.open(fp))
    with open(image_file_name_2, 'rb') as fp:
        hash_2 = imagehash.average_hash(Image.open(fp))
    dif = hash_1 - hash_2
    if dif < 0:
        dif = -dif
    if dif <= max_dif:
        return True
    else:
        return False


def compare_image_dir_with_hash(dir_1, dir_2, max_dif=0):
    """
    max_dif: The maximum hash difference is allowed, the smaller and more accurate, the minimum is 0.

    """
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    hash_dict_1, image_quantity_1 = get_hash_dict(dir_1)
    hash_dict_2, image_quantity_2 = get_hash_dict(dir_2)

    if image_quantity_1 > image_quantity_2:
        tmp = image_quantity_1
        image_quantity_1 = image_quantity_2
        image_quantity_2 = tmp

        tmp = hash_dict_1
        hash_dict_1 = hash_dict_2
        hash_dict_2 = tmp

    result_dict = {}

    for k in hash_dict_1.keys():
        result_dict[k] = None

    for dif_i in range(0, max_dif + 1):
        have_none = False

        for k_1 in result_dict.keys():
            if result_dict.get(k_1) is None:
                have_none = True

        if not have_none:
            return result_dict

        for k_1, v_1 in hash_dict_1.items():
            for k_2, v_2 in hash_dict_2.items():
                sub = (v_1 - v_2)
                if sub < 0:
                    sub = -sub
                if sub == dif_i and result_dict.get(k_1) is None:
                    result_dict[k_1] = k_2
                    break
    return result_dict


def main():
    print(compare_image('image1\\815.jpg', 'image2\\5.jpg'))
    print(compare_image_with_hash('image1\\815.jpg', 'image2\\5.jpg', 7))
    r = compare_image_dir_with_hash('image1\\', 'image2\\', 10)
    for k in r.keys():
        print(k, r.get(k))


if __name__ == '__main__':
    main()

输出: 假 真正的 image2 jpg image1 5. \ \ 815. jpg image2 jpg image1 6. \ \ 819. jpg image2 jpg image1 7. \ \ 900. jpg image2 jpg image1 8. \ \ 998. jpg image2 jpg image1 9. \ \ 1012. jpg 示例图片: 815. jpg 5. jpg