基于 `face_recognition` + `milvus` 实现的人脸识别方案。

基于 face_recognition + milvus 实现的人脸识别方案。

face_recognition 是基于 Dlib 算法实现,相对比较古老,目前比较先进的是 InSightFaceTransFace

算法模型 厂商 时间
DeepFace Facebook 2014
FaceNet Google 2015
VGG-Face Oxford大学 2015
ArcFace 未知 2015
TFace 腾讯 2020
InSightFace deepinsight 2021

https://learnopencv.com/face-recognition-models/


import sys
from glob import glob
from pathlib import Path
from statistics import mean

import face_recognition
import os
from datetime import datetime
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility

HOST = '192.168.10.25'
PORT = '19530'
DIM = 128
COLLECTION_NAME = 'face_search'
INDEX_TYPE = 'IVF_FLAT'
METRIC_TYPE = 'L2'

def create_milvus_collection(collection_name, dim):
    if utility.has_collection(collection_name):
        return Collection(name=collection_name)

    fields = [
        FieldSchema(name='path', dtype=DataType.VARCHAR, description='path to image', max_length=500,
                    is_primary=True, auto_id=False),
        FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, description='image embedding vectors', dim=dim)
    ]
    schema = CollectionSchema(fields=fields, description='reverse image search')
    c = Collection(name=collection_name, schema=schema)

    index_params = {
        'metric_type': METRIC_TYPE,
        'index_type': INDEX_TYPE,
        'params': {"nlist": 2048}
    }
    create = c.create_index(field_name='embedding', index_params=index_params)
    print(f'A new collection created: {COLLECTION_NAME}')
    return c

def search(collection, search_vectors):
    search_param = {
        "data": search_vectors,
        "anns_field": "embedding",
        "param": {"metric_type": METRIC_TYPE, "params": {"nlist": 2048, "nprobe": 16}},
        "limit": 10
    }
    results = collection.search(**search_param)
    print("Result: {} rows found".format(len(results)))
    for i, result in enumerate(results):
        print("\nSearch result for {}th vector: ".format(i))
        for j, res in enumerate(result):
            print("Top {}: {}".format(j, res))

def load(base):
    if not os.path.exists(base):
        print(f'{base} not exists')
        return

    for root, ds, fs in os.walk(base):
        for f in fs:
            if f.endswith('.jpeg') or f.endswith('.jpg'):
                fullname = os.path.join(root, f)
                loadFile(fullname)

def loadFile(img):
    if not os.path.exists(img):
        print(f'{img} not exists')
        return

    collection = create_milvus_collection(COLLECTION_NAME, DIM)

    stime = datetime.now()

    file = face_recognition.load_image_file(img)
    vect = face_recognition.face_encodings(file, model="cnn")
    if len(vect) > 0:
        try:
            insert = collection.insert([[img], vect])
            print(insert)
        except Exception as e:
            print("{} insert error".format(img))
            print(e)

    print("{} face found in {}, time use {} ms".format(len(vect), img, (datetime.now() - stime).microseconds))

def similar(img):
    if not os.path.exists(img):
        print(f'{img} not exists')
        return

    collection = create_milvus_collection(COLLECTION_NAME, DIM)
    collection.flush()
    collection.load()

    ImageArr = face_recognition.load_image_file(img)
    img_encodings = face_recognition.face_encodings(ImageArr, model="cnn")

    print("search: " + img)
    search(collection, img_encodings)

if __name__ == '__main__':
    connections.connect(host=HOST, port=PORT, alias="default")
    print(utility.list_collections())

    if len(sys.argv) == 3 and sys.argv[1] == "file":
        loadFile(sys.argv[2])

    if len(sys.argv) == 3 and sys.argv[1] == "find":
        similar(sys.argv[2])

    if len(sys.argv) == 3 and sys.argv[1] == "dir":
        load(sys.argv[2])

    if len(sys.argv) >= 2 and sys.argv[1] == "drop":
        utility.drop_collection(COLLECTION_NAME)

    connections.disconnect(alias="default")

批量执行命令

parallel -j 20 "python face.py file {}" ::: `find /data/people/cleaned/ -type f`