比赛背景

精确的图像检索是拍照购物场景下的核心技术,也是学术界和工业界的研究热点。在本次大赛中,主办方提供真实拍照购物场景下的数码设备图像数据集,参赛选手基于给定的数据构建图像检索模型。希望通过本次大赛发现图像检索领域的人才,推动该领域算法的发展。

赛题说明

本题目将为选手提供数码设备图片数据集,包含手机、智能穿戴、PC、平板、音箱、路由器等粗粒度数码设备图片和细粒度(如不同外观的手机等)数码设备图片,完成数码设备图像检索任务,即给定一张含有数码设备的查询图片,算法需要在数码设备图像库中查找并返回含有该商品的图片。

本次比赛提供的数据集包含两部分:训练集和测试集。其中训练集包含不同数码商品,每个数码设备商品会对应数量不等的图片;测试集包含查询图片和检索图像库,用于指标评估。

赛事数据

文件名称 文件格式及大小
train_data.zip zip(11.4 G)
test_data_A.zip zip(11.6 G)
test_data_B.zip zip(19.4 G)

数据说明 数据概括

比赛数据包含:训练数据集(含标注文件)、测试数据集。

训练数据集:

训练数据集将提供数码商品图片和商品ID标签,每个数码商品ID会对应数量不等的商品图片,所有的图片为jpg格式。标注文件为文本文件,每一行格式为:图像名,商品ID。

测试数据集:

测试集包括查询图片集(query)和图像检索库(gallery),不提供ID标签。测试集分两个阶段(A/B榜),A/B榜测试数据采用同一份整体数据进行划分,但B榜测试集规模约为A榜测试集规模的两倍,其中A榜测试集用于参赛选手模型实时评估,B榜测试集在比赛限定时间内开放,用于最终的成绩评定和排名。

数据和模型使用

为了保证公平性,本次大赛对数据集和模型限制如下:

  1. 不允许参赛选手对大赛测试数据集进行额外标注。

  2. 允许使用在ImageNet数据上训练的图像分类模型作为特征提取模型初始化,不允许使用除此之外的外部数据集或外部数据集训练的预训练模型。

  3. 测试数据集只允许用于测试,禁止在训练阶段以任何形式使用测试数据集。

违反上述规则的参赛者,取消参赛成绩或参赛资格。

对图片的预处理

#脚本文件 preprocessing_img.py 主要对目录下的所有图片进行大小的比例重整,以及重写图片
import cv2
import os
import sys

in_dir = sys.argv[1]
print('in_dir:', in_dir)

def resize_img_maxH_and_imgW(img, maxH=1024, maxW=1024):
    h = img.shape[0]
    w = img.shape[1]
    if h > maxH:
        w = int(maxH * w / h)
        h = maxH
    if w > maxW:
        h = int(maxW * h / w)
        w = maxW
    if img.shape[0] != h or img.shape[1] != w:
        img = cv2.resize(img,(w,h),interpolation=cv2.INTER_CUBIC)
    return img

with os.popen('ls '+ in_dir) as f:
    names = f.readlines()
    for name in names:
        img_path = os.path.join(in_dir, name.strip())
        print(img_path)
        img = cv2.imread(img_path)
        img = resize_img_maxH_and_imgW(img)
        cv2.imwrite(img_path, img)

全局特征的生成

#全局特征训练的模型采用由 imagenet 预训练 ResNet50 网络前端,加上一个 3097 维的全连接层,
#进行 fine-tuning 训练,最后去掉全连接层将 CNN 的输出当作图片的全局特征,
#特征之间的距离采用向量的余弦距离,A 榜的得分大概是 0.4 左右。以下是模型定义
NUM_CLASS = 3097
batch_size = 128

resnet = ResNet50(weights='imagenet',include_top=False)
my_resnet = Sequential([
    resnet,
    layers.GlobalAveragePooling2D()
])
my_resnet.summary()
fc_net = Sequential([
    layers.Dense(NUM_CLASS)
])
fc_net.build(input_shape=[batch_size,2048])

参考基于 VGG-16 的海量图像检索博客

返回检索的 top-10 张图片

# -*- coding: utf-8 -*-

import h5py
import os

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import argparse

import numpy as np
import tensorflow as tf
from numpy import linalg as LA
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from about_img import get_shuffle_path_and_label_list, get_img_data, resize_img_maxH_and_imgW, prepare_fc_train_data
from tensorflow.keras import layers, Sequential


ap = argparse.ArgumentParser()
ap.add_argument("-index", required = True,
    help = "Path to index")
ap.add_argument("-query_dir", required = True,
    help = "Path for query_dir")
args = vars(ap.parse_args())


# read in indexed images' feature vectors and corresponding image names
h5f = h5py.File(args["index"],'r')
feats = h5f['dataset_1'][:]
imgNames = h5f['dataset_2'][:]
h5f.close()

def make_model(weight_path='v2_save_model_resnet_fc_net/my_resnet_weight.ckpt'):
    resnet = ResNet50(weights='imagenet',include_top=False)
    my_resnet = Sequential([
        resnet,
        layers.GlobalAveragePooling2D()
    ])
    my_resnet.load_weights(weight_path)
    return my_resnet

# read and show query image
query_dir = args["query_dir"]

# init  model
model = make_model()
with open('submission.csv','w', encoding='utf-8') as f:
    for img_name in os.listdir(query_dir):
        img_path = os.path.join(query_dir, img_name)
        # extract query image's feature, compute simlarity score and sort
        img = get_img_data(img_path)
        img = resize_img_maxH_and_imgW(img)
        img = preprocess_input(img)
        img = tf.expand_dims(img,axis=0)
        feat = model(img)
        queryVec = feat[0]/LA.norm(feat[0])

        scores = np.dot(queryVec, feats.T)
        rank_ID = np.argsort(scores)[::-1]
        rank_score = scores[rank_ID]

        # number of top retrieved images to show
        maxres = 10
        imlist = [imgNames[index] for i,index in enumerate(rank_ID[0:maxres])]
        imlist = [im.decode('utf-8') for im in imlist]
        ll = ','.join(imlist)
        f.write(img_name + ',{' + ll + '}\n')

使用局部特征进行检索

局部特征的训练和生成

可以参考论文 Large-Scale Image Retrieval with Attentive Deep Local Features ,以及 github delf 项目 训练并且生成局部特征。

增量 pca 降维

import os
import numpy as np
import joblib
import random
from sklearn.decomposition import IncrementalPCA
from multiprocessing import Process
from delf import feature_io

def get_batch_local_features(features_dir, batch_size=1000000):
    if batch_size < 1000:
        print('batch_size must biger than 1000')
        return
    names = os.listdir(features_dir)
    delf_path_list = [os.path.join(features_dir, name) for name in names]
    random.shuffle(delf_path_list)
    print(len(delf_path_list))

    features = []
    for i, delf_path in enumerate(delf_path_list):
        _, _, descriptors, _, _ = feature_io.ReadFromFile(delf_path)
        print(i, delf_path, len(features), descriptors.shape)
        if batch_size - len(features) > len(descriptors):
            features.extend(descriptors)
        else:
            cut_point = batch_size - len(features)
            features.extend(descriptors[:cut_point])
            yield np.array(features)
            features = []
            features.extend(descriptors[cut_point:])
    return np.array(features)

def ipca():
    features_dir = 'testB/gallery_features'
    save_pca_dir = 'testB/pca_model/'
    batch_size = 1000000
    reduce_dim = 40
    ipca = IncrementalPCA(n_components=reduce_dim, batch_size=batch_size)
    for i, features  in enumerate(get_batch_local_features(features_dir, batch_size)):
        features = features / np.linalg.norm(features, axis=1, keepdims=True)
        ipca.partial_fit(features)
        if i % 5 == 0:
            joblib.dump(ipca, os.path.join(save_pca_dir,'pca_'+str(i)+'.m'))
    joblib.dump(ipca, os.path.join(save_pca_dir,'pca.m'))

def pca_reduce_delf(delf_path_list, to_features_dir, pca_file):
    ipca = joblib.load(pca_file)
    for i, delf_path in enumerate(delf_path_list):
        locations, feature_scales, descriptors, attention, orientations= feature_io.ReadFromFile(delf_path)
        out_delf_path = os.path.join(to_features_dir, os.path.basename(delf_path))
        print(descriptors.shape)
        descriptors = descriptors / np.linalg.norm(descriptors, axis=1, keepdims=True)
        descriptors = ipca.transform(descriptors)
        descriptors = descriptors / np.linalg.norm(descriptors, axis=1, keepdims=True)
        feature_io.WriteToFile(out_delf_path, locations, feature_scales,
                               descriptors, attention)

def main():
    features_dir = 'testB/gallery_features'
    to_features_dir = 'testB/gallery_features_pca_40'
    pca_file = 'testB/pca_model/pca.m'
    names = os.listdir(features_dir)
    delf_path_list = [os.path.join(features_dir, name) for name in names]
    pros_list = []
    for i in range(0, 10):
        s = i * len(delf_path_list) / 10
        e = (i+1) * len(delf_path_list) / 10
        p = Process(target=pca_reduce_delf, kwargs={"delf_path_list": delf_path_list[int(s):int(e)], "to_features_dir":to_features_dir, "pca_file": pca_file})
        pros_list.append(p)
        p.start()
    for p in pros_list:
        p.join()

    features_dir = 'testB/query_features'
    to_features_dir = 'testB/query_features_pca_40'
    pca_file = 'testB/pca_model/pca.m'
    names = os.listdir(features_dir)
    delf_path_list = [os.path.join(features_dir, name) for name in names]
    pros_list = []
    for i in range(0, 10):
        s = i * len(delf_path_list) / 10
        e = (i+1) * len(delf_path_list) / 10
        p = Process(target=pca_reduce_delf, kwargs={"delf_path_list": delf_path_list[int(s):int(e)], "to_features_dir":to_features_dir, "pca_file": pca_file})
        pros_list.append(p)
        p.start()
    for p in pros_list:
        p.join()

if __name__ == '__main__':
    ipca()
    main()

特征聚合,索引建立

import faiss
import os
import numpy as np
from delf import feature_io
from collections import Counter
import datetime

def make_index_table(descriptors_list):
    des_from_img = {}
    img_from_des = {}
    cnt = 0
    for i_img, des_list in enumerate(descriptors_list):
        i_des_range = range(cnt, cnt+len(des_list))
        des_from_img[i_img] = list(i_des_range)
        for i_des in i_des_range:
            img_from_des[i_des] = i_img
        cnt+=len(des_list)
    return des_from_img, img_from_des

def get_similar_img(query_des2imgList, query_des_from_img):
    query_img2imgFreq = {}
    # travel each query image's descriptor list
    for img_i in query_des_from_img:
        all_searched_des = []
        # travel
        for des_i in query_des_from_img[img_i]:
            all_searched_des.extend(query_des2imgList[des_i])

        imgFreq = Counter(all_searched_des).most_common()
        index, freq = list(zip(*imgFreq))
        query_img2imgFreq[img_i] = {'index': index, 'freq':freq}
    return query_img2imgFreq

def read_features(delf_dir):
    descriptors_all = []
    locations_all = []
    range_list = []

    delf_names = os.listdir(delf_dir)
    for name in delf_names:
        features_path = os.path.join(delf_dir, name)
        locations, _, descriptors, _, _ = feature_io.ReadFromFile(features_path)
        if len(range_list) == 0:
            range_list.append([0,len(descriptors)])
        else:
            range_list.append([range_list[-1][1],range_list[-1][1]+len(descriptors)])
        descriptors_all.append(descriptors)
        locations_all.append(locations)
    return delf_names, descriptors_all, locations_all, range_list

dim = 40          # dimension
n_subq = 8        # number of sub-quantizers
n_centroids = 32  # number of centroids for each sub-vector
n_bits = 5        # number of bits for each sub-vector
n_probe = 20       # number of voronoi cell to explore
nlist = 10000
K = 60  # K nearest neighber

i_delf_dir = 'testB/gallery_features_pca_40'
delf_names, descriptors_all, locations_all, range_list = read_features(i_delf_dir)
des_from_img, img_from_des = make_index_table(descriptors_all)
with open('testB/gallery_delf_names.txt','w') as f:
    for delf in delf_names:
        f.write(delf+'\n')

descriptors_all_np = np.concatenate(np.asarray(descriptors_all, dtype=object), axis=0).astype('float32')
print(descriptors_all_np.shape)

print('build index\t',datetime.datetime.now())
coarse_quantizer = faiss.IndexFlatL2(dim)
pq = faiss.IndexIVFPQ(coarse_quantizer, dim, n_centroids, n_subq, n_bits)
pq.nprobe = n_probe
pq.train(descriptors_all_np)
pq.add(descriptors_all_np)
# quantizer = faiss.IndexFlatL2(dim)
# pq = faiss.IndexIVFFlat(quantizer, dim, nlist, faiss.METRIC_INNER_PRODUCT)
# pq.nprobe = n_probe
# pq.train(descriptors_all_np)
# pq.add(descriptors_all_np)
print('end index\t',datetime.datetime.now())

q_delf_dir = 'testB/query_features_pca_40'
q_delf_names, q_descriptors_all, q_locations_all, q_range_list = read_features(q_delf_dir)
with open('testB/query_delf_names.txt','w') as f:
    for delf in q_delf_names:
        f.write(delf+'\n')

query_des_from_img, query_img_from_des = make_index_table(q_descriptors_all)
q_descriptors_all_np = np.concatenate(np.asarray(q_descriptors_all, dtype=object), axis=0).astype('float32')
print(q_descriptors_all_np.shape)

print('search start',datetime.datetime.now())
_, query_des2desList = pq.search(q_descriptors_all_np, K)
print('end search',datetime.datetime.now())
print(datetime.datetime.now())

query_des2imgList = {}
# travel query images' inferenced descriptors
for i, des_list in enumerate(query_des2desList):
    # map inferenced descirptors to their parents' image index
    query_des2imgList[i] = [img_from_des[des_i] for des_i in des_list]

query_img2imgFreq = get_similar_img(query_des2imgList, query_des_from_img)

with open('testB/query_img2imgFreq','wb') as f:
    import pickle
    pickle.dump(query_img2imgFreq, f)

print(q_delf_names[0])
for i in range(0,len(query_img2imgFreq[0]['index'])):
    print(delf_names[query_img2imgFreq[0]['index'][i]], query_img2imgFreq[0]['freq'][i])

全局特征结合局部特征进行重排

#最后 A 榜得分 0.65
import argparse
import sys, os

import numpy as np
from scipy import spatial
from skimage import feature
from skimage import measure
from skimage import transform
from multiprocessing import Process, Manager, Pool
import datetime

from tensorflow.python.platform import app
from delf import feature_io


save_feature_dict = Manager().dict()
_DISTANCE_THRESHOLD = 0.8

def get_location_and_descriptors(features_path):
    if features_path in save_feature_dict.keys():
        locations = save_feature_dict[features_path][0]
        descriptors = save_feature_dict[features_path][1]
    else:
        locations, _, descriptors, _, _ = feature_io.ReadFromFile(features_path)
        z = [locations, descriptors]
        save_feature_dict[features_path] = z
    return locations, descriptors

def get_num_inliers(features_1_path, features_2_path,query, index, d):
    # Read features.
    locations_1, descriptors_1 = get_location_and_descriptors(features_1_path)
    num_features_1 = locations_1.shape[0]
    #print(f"Loaded image 1's {num_features_1} features")
    locations_2, descriptors_2 = get_location_and_descriptors(features_2_path)
    num_features_2 = locations_2.shape[0]
    #print(f"Loaded image 2's {num_features_2} features")

    # Find nearest-neighbor matches using a KD tree.
    d1_tree = spatial.cKDTree(descriptors_1)
    _, indices = d1_tree.query(
        descriptors_2, distance_upper_bound=_DISTANCE_THRESHOLD)

    # Select feature locations for putative matches.
    locations_2_to_use = np.array([
        locations_2[i,]
        for i in range(num_features_2)
        if indices[i] != num_features_1
    ])
    locations_1_to_use = np.array([
        locations_1[indices[i],]
        for i in range(num_features_2)
        if indices[i] != num_features_1
    ])

    # Perform geometric verification using RANSAC.
    _, inliers = measure.ransac((locations_1_to_use, locations_2_to_use),
                                transform.AffineTransform,
                                min_samples=3,
                                residual_threshold=20,
                                max_trials=1000)

    #print(type(inliers), len(inliers), inliers)
    #print(f'Found {sum(inliers)} inliers')
    z = d[query]
    z.append([sum(inliers), index])
    d[query] = z
    #return sum(inliers)

def main(in_file, out_file):
    with open(in_file) as f:
        lines = f.readlines()
    lines = [line.strip()[:-1].split(',')[:500] for line in lines]
    # for i in range(0,len(lines)):
    #     lines[i][1] = lines[i][1][1:]

    q_delf_dir = 'data/features/query_pca_40'
    i_delf_dir = 'data/features/gallery_pca_40'

    pool = Pool(processes = 15)

    print(datetime.datetime.now())
    d = Manager().dict()

    with open('submission_0.625213.csv_score') as f:
        for ll in f.readlines():
            parts = ll.strip().split(',')
            k = parts[0][:-3] + 'delf'
            d[k] = []
            parts[1] = parts[1][1:]
            parts[-1] = parts[-1][:-1]
            for i in range(1,len(parts)):
                z = d[k]
                z.append([int(parts[i].strip().split('_')[0]),parts[i].strip().split('_')[1][:-3]+'delf'])
                d[k] = z
    print(len(d.keys()))
    for parts in lines:
        q = parts[0][:-3] + 'delf'
        #d[q] = []
        if d[q][0][0] <= 50:
            print(q)
            for part in parts[300:]:
                i = part[:-3] + 'delf'
                #num_inliers = get_num_inliers(os.path.join(q_delf_dir, q), os.path.join(i_delf_dir, i), q, i d)
                pool.apply_async(get_num_inliers, (os.path.join(q_delf_dir, q), os.path.join(i_delf_dir, i), q, i, d))
                #d[q].append([num_inliers, i])
    pool.close()
    pool.join()
    print(datetime.datetime.now())

    for k in d.keys():
        d[k] = sorted(d[k], reverse=True)[:10]


    with open(out_file,'w') as f:
        for k in d.keys():
            f.write(k[:-4] + 'jpg,{' + ','.join([x[1][:-4] + 'jpg' for x in d[k]]) + '}\n')

    with open(out_file +'_score','w') as f:
        for k in d.keys():
            f.write(k[:-4] + 'jpg,{' + ','.join([str(x[0]) + '_' + x[1][:-4] + 'jpg' for x in d[k]]) + '}\n')

if __name__ == '__main__':
    in_file = sys.argv[1]
    out_file = sys.argv[2]
    main(in_file, out_file)

来自亚军团队的分享

下面的链接是参加华为举办的 2020 DIGIX赛道B 数码设备图像检索的亚军团队的整理 https://zhuanlan.zhihu.com/p/303371522

原创文章,转载请注明出处:https://www.daiyufish.com/article/digital-image-retrieval/