first
BIN
Database/new.npy
Normal file
BIN
Database/student.npy
Normal file
132
accuracy.py
Normal file
@ -0,0 +1,132 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
import torch
|
||||
import cv2
|
||||
import numpy as np
|
||||
from backbones import iresnet50,iresnet18,iresnet100
|
||||
|
||||
def load_image(img_path):
|
||||
#img = cv2.imread(img_path)
|
||||
img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
|
||||
img = img.transpose((2, 0, 1))
|
||||
img = img[np.newaxis, :, :, :]
|
||||
img = np.array(img, dtype=np.float32)
|
||||
img -= 127.5
|
||||
img /= 127.5
|
||||
return img
|
||||
|
||||
def findEuclideanDistance(source_representation, test_representation):
|
||||
euclidean_distance = source_representation - test_representation
|
||||
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
|
||||
euclidean_distance = np.sqrt(euclidean_distance)
|
||||
return euclidean_distance
|
||||
|
||||
def l2_normalize(x):
|
||||
return x / np.sqrt(np.sum(np.multiply(x, x)))
|
||||
|
||||
def load_npy(path):
|
||||
data = np.load(path,allow_pickle=True)
|
||||
data = data.item()
|
||||
return data
|
||||
|
||||
def findmindistance(pred,threshold,k_v):
|
||||
distance = 10
|
||||
most_like = ""
|
||||
for name in k_v.keys():
|
||||
tmp = findEuclideanDistance(k_v[name],pred)
|
||||
if distance > tmp:
|
||||
distance = tmp
|
||||
most_like = name
|
||||
if distance < threshold:
|
||||
return most_like
|
||||
else:
|
||||
return -1
|
||||
|
||||
def findOne(img,model,k_v):
|
||||
with torch.no_grad():
|
||||
start_time = time.time()
|
||||
pred = model(img)
|
||||
end_time = time.time()
|
||||
#print("predOne time: " + str(end_time - start_time))
|
||||
pred = pred.numpy()
|
||||
name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
|
||||
if name != -1:
|
||||
return name
|
||||
else:
|
||||
return "unknown"
|
||||
def findAll(imglist,model,k_v):
|
||||
with torch.no_grad():
|
||||
name_list = []
|
||||
pred = model(imglist)
|
||||
pred = pred.numpy()
|
||||
for pr in pred:
|
||||
name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
|
||||
if name != -1:
|
||||
name_list.append(name)
|
||||
else:
|
||||
name_list.append("unknown")
|
||||
return name_list
|
||||
|
||||
if __name__=='__main__':
|
||||
model = iresnet100()
|
||||
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||
model.eval()
|
||||
pred_name = []
|
||||
order_name = []
|
||||
order_path = []
|
||||
unknown = []
|
||||
test_path = "D:\Download\out\cfp_test"
|
||||
name_list = os.listdir(test_path)
|
||||
for name in name_list:
|
||||
img_list = os.listdir(os.path.join(test_path,name))
|
||||
for img in img_list:
|
||||
order_name.append(name)
|
||||
order_path.append(os.path.join(os.path.join(test_path,name),img))
|
||||
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
|
||||
for index,img_path in enumerate(order_path):
|
||||
order_img[index] = load_image(img_path)
|
||||
print(order_img.shape)
|
||||
# for name in order_path:
|
||||
# print(name)
|
||||
|
||||
k_v = load_npy("cfp.npy")
|
||||
start_time = time.time()
|
||||
order_img = torch.from_numpy(order_img)
|
||||
|
||||
batch = 256
|
||||
now = 0
|
||||
number = len(order_img)
|
||||
#number = 1400
|
||||
for i in range(number):
|
||||
unknown.append("unknown")
|
||||
|
||||
while now < number:
|
||||
if now+batch < number:
|
||||
name = findAll(order_img[now:now+batch],model,k_v)
|
||||
else:
|
||||
name = findAll(order_img[now:number], model, k_v)
|
||||
now = now+batch
|
||||
for na in name:
|
||||
pred_name.append(na)
|
||||
print("batch"+str(now))
|
||||
end_time = time.time()
|
||||
print("findAll time: " + str(end_time - start_time))
|
||||
#print(len(pred_name))
|
||||
right = 0
|
||||
for i,name in enumerate(pred_name):
|
||||
if pred_name[i] == order_name[i]:
|
||||
right += 1
|
||||
filed = 0
|
||||
for i, name in enumerate(pred_name):
|
||||
if pred_name[i] == unknown[i]:
|
||||
filed += 1
|
||||
error = 0
|
||||
for i,name in enumerate(pred_name):
|
||||
if pred_name[i] != order_name[i]:
|
||||
error += 1
|
||||
print(order_name[i]+" "+pred_name[i]+" "+order_path[i])
|
||||
print("total:" + str(number))
|
||||
print("right:" + str(right) + " rate:" + str(right / number))
|
||||
print("filed:" + str(filed) + " rate:" + str(filed / number))
|
||||
print("error:"+str(error-filed)+" rate:"+str((error-filed)/number))
|
134
accuracy_GPU.py
Normal file
@ -0,0 +1,134 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
import torch
|
||||
import cv2
|
||||
import numpy as np
|
||||
from backbones import iresnet50,iresnet18,iresnet100
|
||||
|
||||
def load_image(img_path):
|
||||
#img = cv2.imread(img_path)
|
||||
img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
|
||||
img = img.transpose((2, 0, 1))
|
||||
img = img[np.newaxis, :, :, :]
|
||||
img = np.array(img, dtype=np.float32)
|
||||
img -= 127.5
|
||||
img /= 127.5
|
||||
return img
|
||||
|
||||
def findEuclideanDistance(source_representation, test_representation):
|
||||
euclidean_distance = source_representation - test_representation
|
||||
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
|
||||
euclidean_distance = np.sqrt(euclidean_distance)
|
||||
return euclidean_distance
|
||||
|
||||
def l2_normalize(x):
|
||||
return x / np.sqrt(np.sum(np.multiply(x, x)))
|
||||
|
||||
def load_npy(path):
|
||||
data = np.load(path,allow_pickle=True)
|
||||
data = data.item()
|
||||
return data
|
||||
|
||||
def findmindistance(pred,threshold,k_v):
|
||||
distance = 10
|
||||
most_like = ""
|
||||
for name in k_v.keys():
|
||||
tmp = findEuclideanDistance(k_v[name],pred)
|
||||
if distance > tmp:
|
||||
distance = tmp
|
||||
most_like = name
|
||||
if distance < threshold:
|
||||
return most_like
|
||||
else:
|
||||
return -1
|
||||
|
||||
def findOne(img,model,k_v):
|
||||
with torch.no_grad():
|
||||
start_time = time.time()
|
||||
pred = model(img)
|
||||
end_time = time.time()
|
||||
#print("predOne time: " + str(end_time - start_time))
|
||||
pred = pred.numpy()
|
||||
name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
|
||||
if name != -1:
|
||||
return name
|
||||
else:
|
||||
return "unknown"
|
||||
def findAll(imglist,model,k_v):
|
||||
with torch.no_grad():
|
||||
name_list = []
|
||||
imglist = imglist.to(torch.device("cuda"))
|
||||
pred = model(imglist)
|
||||
pred = pred.cpu().numpy()
|
||||
for pr in pred:
|
||||
name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
|
||||
if name != -1:
|
||||
name_list.append(name)
|
||||
else:
|
||||
name_list.append("unknown")
|
||||
return name_list
|
||||
|
||||
if __name__=='__main__':
|
||||
model = iresnet100()
|
||||
model.load_state_dict(torch.load("./model/backbone100.pth"))
|
||||
model.to(torch.device("cuda"))
|
||||
model.eval()
|
||||
pred_name = []
|
||||
order_name = []
|
||||
order_path = []
|
||||
unknown = []
|
||||
test_path = "./retinaface_test"
|
||||
name_list = os.listdir(test_path)
|
||||
for name in name_list:
|
||||
img_list = os.listdir(os.path.join(test_path,name))
|
||||
for img in img_list:
|
||||
order_name.append(name)
|
||||
order_path.append(os.path.join(os.path.join(test_path,name),img))
|
||||
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
|
||||
for index,img_path in enumerate(order_path):
|
||||
order_img[index] = load_image(img_path)
|
||||
print(order_img.shape)
|
||||
# for name in order_path:
|
||||
# print(name)
|
||||
|
||||
k_v = load_npy("retinaface_lfw_myalign.npy")
|
||||
start_time = time.time()
|
||||
order_img = torch.from_numpy(order_img)
|
||||
|
||||
batch = 256
|
||||
now = 0
|
||||
number = len(order_img)
|
||||
#number = 1400
|
||||
for i in range(number):
|
||||
unknown.append("unknown")
|
||||
|
||||
while now < number:
|
||||
if now+batch < number:
|
||||
name = findAll(order_img[now:now+batch],model,k_v)
|
||||
else:
|
||||
name = findAll(order_img[now:number], model, k_v)
|
||||
now = now+batch
|
||||
for na in name:
|
||||
pred_name.append(na)
|
||||
print("batch"+str(now))
|
||||
end_time = time.time()
|
||||
print("findAll time: " + str(end_time - start_time))
|
||||
#print(len(pred_name))
|
||||
right = 0
|
||||
for i,name in enumerate(pred_name):
|
||||
if pred_name[i] == order_name[i]:
|
||||
right += 1
|
||||
filed = 0
|
||||
for i, name in enumerate(pred_name):
|
||||
if pred_name[i] == unknown[i]:
|
||||
filed += 1
|
||||
error = 0
|
||||
for i,name in enumerate(pred_name):
|
||||
if pred_name[i] != order_name[i]:
|
||||
error += 1
|
||||
print(order_name[i]+" "+pred_name[i]+" "+order_path[i])
|
||||
print("total:" + str(number))
|
||||
print("right:" + str(right) + " rate:" + str(right / number))
|
||||
print("filed:" + str(filed) + " rate:" + str(filed / number))
|
||||
print("error:"+str(error-filed)+" rate:"+str((error-filed)/number))
|
150
anti.py
Normal file
@ -0,0 +1,150 @@
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
import argparse
|
||||
import warnings
|
||||
import time
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
from src.generate_patches import CropImage
|
||||
from src.model_lib.MiniFASNet import MiniFASNetV1, MiniFASNetV2,MiniFASNetV1SE,MiniFASNetV2SE
|
||||
from src.data_io import transform as trans
|
||||
from src.utility import get_kernel, parse_model_name
|
||||
warnings.filterwarnings('ignore')
|
||||
|
||||
MODEL_MAPPING = {
|
||||
'MiniFASNetV1': MiniFASNetV1,
|
||||
'MiniFASNetV2': MiniFASNetV2,
|
||||
'MiniFASNetV1SE':MiniFASNetV1SE,
|
||||
'MiniFASNetV2SE':MiniFASNetV2SE
|
||||
}
|
||||
|
||||
class AntiSpoofPredict():
|
||||
def __init__(self, cpu_or_cuda):
|
||||
super(AntiSpoofPredict, self).__init__()
|
||||
self.device = torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu")
|
||||
|
||||
def predict(self, img, model):
|
||||
test_transform = trans.Compose([
|
||||
trans.ToTensor(),
|
||||
])
|
||||
img = test_transform(img)
|
||||
img = img.unsqueeze(0).to(self.device)
|
||||
with torch.no_grad():
|
||||
result = model.forward(img)
|
||||
result = F.softmax(result).cpu().numpy()
|
||||
return result
|
||||
|
||||
def load_anti_model(model_dir,cpu_or_cuda):
|
||||
model_list = []
|
||||
for model_path in os.listdir(model_dir):
|
||||
model_list.append(_load_model(os.path.join(model_dir, model_path), cpu_or_cuda))
|
||||
return model_list
|
||||
|
||||
def _load_model(model_path,cpu_or_cuda):
|
||||
# define model
|
||||
device = torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu")
|
||||
model_name = os.path.basename(model_path)
|
||||
h_input, w_input, model_type, _ = parse_model_name(model_name)
|
||||
kernel_size = get_kernel(h_input, w_input, )
|
||||
model = MODEL_MAPPING[model_type](conv6_kernel=kernel_size).to(device)
|
||||
|
||||
# load model weight
|
||||
state_dict = torch.load(model_path, map_location=device)
|
||||
keys = iter(state_dict)
|
||||
first_layer_name = keys.__next__()
|
||||
if first_layer_name.find('module.') >= 0:
|
||||
from collections import OrderedDict
|
||||
new_state_dict = OrderedDict()
|
||||
for key, value in state_dict.items():
|
||||
name_key = key[7:]
|
||||
new_state_dict[name_key] = value
|
||||
model.load_state_dict(new_state_dict)
|
||||
else:
|
||||
model.load_state_dict(state_dict)
|
||||
model.eval()
|
||||
return model
|
||||
|
||||
|
||||
# 因为安卓端APK获取的视频流宽高比为3:4,为了与之一致,所以将宽高比限制为3:4
|
||||
def check_image(image):
|
||||
height, width, channel = image.shape
|
||||
if width/height != 3/4:
|
||||
print("Image is not appropriate!!!\nHeight/Width should be 4/3.")
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
# 人脸活体检测
|
||||
def anti_spoofing(image_name, model_dir, cpu_or_cuda, bbox, model_list):
|
||||
model_test = AntiSpoofPredict(cpu_or_cuda)
|
||||
image_cropper = CropImage()
|
||||
image = cv2.imdecode(np.fromfile(image_name, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||
h, w = image.shape[:2]
|
||||
factor = h / w
|
||||
if (w > 1000):
|
||||
image = cv2.resize(image, (600, int(600 * factor)))
|
||||
# result = check_image(image)
|
||||
# if result is False:
|
||||
# return
|
||||
# image_bbox = model_test.get_bbox(image)
|
||||
image_bbox = bbox
|
||||
prediction = np.zeros((1, 3))
|
||||
test_speed = 0
|
||||
# sum the prediction from single model's result
|
||||
for index, model_name in enumerate(os.listdir(model_dir)):
|
||||
h_input, w_input, model_type, scale = parse_model_name(model_name)
|
||||
param = {
|
||||
"org_img": image,
|
||||
"bbox": image_bbox,
|
||||
"scale": scale,
|
||||
"out_w": w_input,
|
||||
"out_h": h_input,
|
||||
"crop": True,
|
||||
}
|
||||
if scale is None:
|
||||
param["crop"] = False
|
||||
img = image_cropper.crop(**param)
|
||||
|
||||
start = time.time()
|
||||
prediction += model_test.predict(img, model_list[index])
|
||||
test_speed += time.time()-start
|
||||
|
||||
label = np.argmax(prediction)
|
||||
# print(prediction)
|
||||
# cv2.rectangle(
|
||||
# image,
|
||||
# (image_bbox[0], image_bbox[1]),
|
||||
# (image_bbox[0] + image_bbox[2], image_bbox[1] + image_bbox[3]),
|
||||
# (225,0,0), 2)
|
||||
# cv2.imshow("out",image)
|
||||
# cv2.waitKey(0)
|
||||
value = prediction[0][1]/2
|
||||
if value > 0.915:
|
||||
return "real face", '{:.10f}'.format(value)
|
||||
else:
|
||||
return "fake face", '{:.10f}'.format(value)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
desc = "test"
|
||||
parser = argparse.ArgumentParser(description=desc)
|
||||
parser.add_argument(
|
||||
"--device_id",
|
||||
type=int,
|
||||
default=0,
|
||||
help="which gpu id, [0/1/2/3]")
|
||||
parser.add_argument(
|
||||
"--model_dir",
|
||||
type=str,
|
||||
default="./resources/anti_spoof_models",
|
||||
help="model_lib used to test")
|
||||
parser.add_argument(
|
||||
"--image_name",
|
||||
type=str,
|
||||
default="000_0.bmp",
|
||||
help="image used to test")
|
||||
args = parser.parse_args()
|
||||
# anti_spoofing(args.image_name, args.model_dir, args.device_id)
|
449
app.py
Normal file
@ -0,0 +1,449 @@
|
||||
import time
|
||||
|
||||
import faiss
|
||||
from flask import Flask, render_template, request, jsonify, send_from_directory
|
||||
from markupsafe import escape, escape_silent
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from anti import anti_spoofing, load_anti_model
|
||||
from face_api import load_arcface_model, load_npy, findOne, load_image, face_verification, findAll, add_one_to_database, \
|
||||
get_claster_tmp_file_embedding, cluster, detect_video
|
||||
from gender_age import set_gender_conf, gender_age, load_gender_model
|
||||
from retinaface_detect import load_retinaface_model, detect_one, set_retinaface_conf
|
||||
from werkzeug.exceptions import RequestEntityTooLarge
|
||||
import zipfile
|
||||
import os
|
||||
import shutil
|
||||
import re
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
ALLOWED_IMG = set(['png', 'jpg', 'jpeg', 'bmp', 'PNG', 'JPG', 'JPEG'])
|
||||
# 限制上传的图片最大为10M
|
||||
ALLOWED_IMG_SIZE = 10 * 1024 * 1024
|
||||
ALLOWED_FILE = set(['zip'])
|
||||
ALLOWED_VIDEO = set(['mp4'])
|
||||
app = Flask(__name__)
|
||||
|
||||
# 限制上传的文件最大为100M
|
||||
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024
|
||||
# 使用jsonify,避免中文乱码
|
||||
app.config['JSON_AS_ASCII'] = False
|
||||
|
||||
# 设置使用CPU或者GPU(传入cuda)
|
||||
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
# 加载人脸识别模型
|
||||
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
|
||||
# 加载人脸检测模型
|
||||
retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
|
||||
retinaface_model = load_retinaface_model(retinaface_args)
|
||||
# 加载性别年龄识别模型
|
||||
gender_args = set_gender_conf()
|
||||
gender_model = load_gender_model(gender_args, 'fc1')
|
||||
anti_spoofing_model_path = "model/anti_spoof_models"
|
||||
anti_model = load_anti_model(anti_spoofing_model_path, cpu_or_cuda)
|
||||
|
||||
|
||||
# 读取人脸库
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return "model"
|
||||
|
||||
|
||||
@app.route('/hello')
|
||||
@app.route('/hello/<name>')
|
||||
def hello(name=None):
|
||||
return render_template('hello.html', name=name)
|
||||
|
||||
|
||||
@app.route('/user', methods=['GET'])
|
||||
def show_user_name():
|
||||
return request.args.get('username', '')
|
||||
|
||||
|
||||
# 创建返回的json数据
|
||||
# 函数参数用是否=None判断,函数中定义的data,result用true,false判断
|
||||
def create_response(status, name=None, distance=None, verification=None, gender=None, age=None, num=None, anti=None,
|
||||
score=None, box_and_point=None, addfile_names=None,fail_names=None,database_name=None,msg=None,
|
||||
delete_names=None,not_exist_names=None):
|
||||
# res为总的json结构体
|
||||
res = {}
|
||||
res['status'] = status
|
||||
|
||||
data = {}
|
||||
try:
|
||||
data["box_and_point"] = box_and_point.tolist()
|
||||
except AttributeError:
|
||||
pass
|
||||
if anti != None and score != None:
|
||||
liveness = {}
|
||||
liveness["spoofing"] = anti
|
||||
liveness['score'] = score
|
||||
data['liveness'] = liveness
|
||||
if distance!=None:
|
||||
data['distance'] = float(distance)
|
||||
if verification!=None:
|
||||
data['verification'] = verification
|
||||
if num!=None:
|
||||
data['number'] = num
|
||||
if gender!=None:
|
||||
data['gender'] = gender
|
||||
if age!=None:
|
||||
data['age'] = age
|
||||
if name!=None:
|
||||
data['name'] = name
|
||||
if data:
|
||||
res['data'] = data
|
||||
|
||||
# 数据库增删接口返回数据
|
||||
result = {}
|
||||
if msg!=None:
|
||||
res['msg'] = msg
|
||||
if database_name!=None:
|
||||
result['database_name'] = database_name
|
||||
# 增加人脸
|
||||
if addfile_names!=None or fail_names!=None:
|
||||
result['success_names'] = addfile_names
|
||||
result['fail_names'] = fail_names
|
||||
# 删除人脸
|
||||
if delete_names!=None or not_exist_names!=None:
|
||||
result['delete_names'] = delete_names
|
||||
result['not_exist_names'] = not_exist_names
|
||||
if result:
|
||||
res['result'] = result
|
||||
|
||||
return jsonify(res)
|
||||
|
||||
|
||||
# 创建cluster接口返回的json数据
|
||||
def create_cluster_response(status, all_cluster):
|
||||
res = {}
|
||||
data = {}
|
||||
for index, cluster in enumerate(all_cluster):
|
||||
data['cluster' + str(index)] = cluster
|
||||
res['data'] = data
|
||||
res['status'] = status
|
||||
return res
|
||||
|
||||
|
||||
# 检查上传文件格式
|
||||
def check_file_format(file_name, format):
|
||||
if '.' in file_name:
|
||||
file_format = file_name.rsplit('.')[1]
|
||||
if file_format in format:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# 检查img大小,大于10M抛出异常
|
||||
def check_img_size(img_path):
|
||||
fsize = os.path.getsize(img_path)
|
||||
if fsize > ALLOWED_IMG_SIZE:
|
||||
raise RequestEntityTooLarge
|
||||
|
||||
|
||||
# 解压zip文件存到某路径:
|
||||
def unzip(zip_src, dst_dir):
|
||||
f = zipfile.is_zipfile(zip_src)
|
||||
if f:
|
||||
fz = zipfile.ZipFile(zip_src, 'r')
|
||||
for file in fz.namelist():
|
||||
fz.extract(file, dst_dir)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
# 解压文件
|
||||
def un_zip(file_path, output_path):
|
||||
zip_file = zipfile.ZipFile(file_path)
|
||||
if os.path.isdir(output_path):
|
||||
pass
|
||||
else:
|
||||
os.mkdir(output_path)
|
||||
zip_file.extractall(output_path)
|
||||
# for names in zip_file.namelist():
|
||||
# zip_file.extract(names,output_path)
|
||||
zip_file.close()
|
||||
|
||||
|
||||
# 人脸识别、性别年龄识别
|
||||
@app.route('/recognition', methods=['POST'])
|
||||
def recognition():
|
||||
try:
|
||||
f = request.files['file_name']
|
||||
if f and check_file_format(f.filename, ALLOWED_IMG):
|
||||
img_path = './img/recognition/' + secure_filename(f.filename)
|
||||
f.save(img_path)
|
||||
check_img_size(img_path)
|
||||
# img3 = load_image('./file/'+secure_filename(f.filename))
|
||||
# img3 = torch.from_numpy(img3)
|
||||
tic = time.time()
|
||||
img3, box_and_point = detect_one(img_path, retinaface_model, retinaface_args)
|
||||
print('detect time: {:.4f}'.format(time.time() - tic))
|
||||
if len(img3) == 0:
|
||||
return create_response('no face')
|
||||
elif len(img3) > 1:
|
||||
namelist = findAll(img3, arcface_model, index, database_name_list, cpu_or_cuda)
|
||||
gender_list, age_list = [], []
|
||||
# gender_list, age_list = gender_age(img3, gender_model)
|
||||
res = create_response('success', namelist, gender=gender_list, age=age_list,
|
||||
box_and_point=box_and_point)
|
||||
else:
|
||||
b = box_and_point[0]
|
||||
w = b[2] - b[0]
|
||||
h = b[3] - b[1]
|
||||
b[2] = w
|
||||
b[3] = h
|
||||
label, value = anti_spoofing(img_path, anti_spoofing_model_path, cpu_or_cuda, np.array(b[:4], int),
|
||||
anti_model)
|
||||
# print(index,database_name_list)
|
||||
name, distance = findOne(img3, arcface_model, index, database_name_list, cpu_or_cuda)
|
||||
gender_list, age_list = [], []
|
||||
# gender_list, age_list = gender_age(img3, gender_model)
|
||||
res = create_response('success', name, gender=gender_list, age=age_list, distance=distance,
|
||||
anti=label, score=value, box_and_point=box_and_point)
|
||||
return res
|
||||
else:
|
||||
return create_response('png jpg jpeg bmp are allowed')
|
||||
except RequestEntityTooLarge:
|
||||
return create_response('image size should be less than 10M')
|
||||
|
||||
|
||||
# 两张图片比对
|
||||
@app.route('/compare', methods=['POST'])
|
||||
def compare_file():
|
||||
try:
|
||||
file1 = request.files['file1_name']
|
||||
file2 = request.files['file2_name']
|
||||
if file1 and check_file_format(file1.filename, ALLOWED_IMG) and file2 and check_file_format(file2.filename,
|
||||
ALLOWED_IMG):
|
||||
img1_path = './img/compare/' + secure_filename(file1.filename)
|
||||
img2_path = './img/compare/' + secure_filename(file2.filename)
|
||||
file1.save(img1_path)
|
||||
file2.save(img2_path)
|
||||
check_img_size(img1_path)
|
||||
check_img_size(img2_path)
|
||||
img1, box_and_point1 = detect_one(img1_path, retinaface_model,
|
||||
retinaface_args)
|
||||
img2, box_and_point2 = detect_one(img2_path, retinaface_model, retinaface_args)
|
||||
if len(img1) == 1 and len(img2) == 1:
|
||||
result,distance = face_verification(img1, img2, arcface_model, cpu_or_cuda)
|
||||
print(result,distance)
|
||||
return create_response('success', verification=result,distance=distance)
|
||||
else:
|
||||
return create_response('image contains no face or more than 1 face')
|
||||
else:
|
||||
return create_response('png jpg jpeg bmp are allowed')
|
||||
except RequestEntityTooLarge:
|
||||
return create_response('image size should be less than 10M')
|
||||
|
||||
|
||||
# 数据库增加人脸,可实现向“现有/新建”数据库增加“单张/多张”人脸
|
||||
# 增和改
|
||||
@app.route('/databaseAdd', methods=['POST'])
|
||||
def DB_add_face():
|
||||
try:
|
||||
# 上传人脸图片(>=1)
|
||||
# key都为file_list,value为不同的值可实现批量上传图片
|
||||
upload_files = request.files.getlist("file_list")
|
||||
# '',[],{},0都可以视为False
|
||||
if not upload_files:
|
||||
msg = "上传文件为空"
|
||||
return create_response(0,msg=msg)
|
||||
database_name = request.form.get("database_name")
|
||||
database_path = "./Database/" + database_name + ".npy"
|
||||
if not os.path.exists(database_path):
|
||||
msg = "数据库不存在"
|
||||
return create_response(0,msg=msg)
|
||||
# 数据库中已存在的人名
|
||||
names = load_npy(database_path).keys()
|
||||
# print(names)
|
||||
|
||||
# 这是服务器上用于暂存上传图片的文件夹,每次上传前重建,使用后删除
|
||||
# 后面可根据需要改为定期删除
|
||||
file_temp_path = './img/uploadNew/'
|
||||
if not os.path.exists(file_temp_path):
|
||||
os.makedirs(file_temp_path)
|
||||
|
||||
# 正则表达式用于提取文件名中的中文,用于.npy中的keys
|
||||
r = re.compile('[\u4e00-\u9fa5]+')
|
||||
# 分别存取添加成功或失败的名字
|
||||
success_names = []
|
||||
fail_names = {}
|
||||
# 添加失败的两种情况:格式错误或已经存在
|
||||
format_wrong = []
|
||||
alreadyExist = []
|
||||
# 分别处理每一张图片,先判断格式对不对,再判断是否存在
|
||||
for file in upload_files:
|
||||
filename = file.filename
|
||||
name = r.findall(filename)[0]
|
||||
if file and check_file_format(filename, ALLOWED_IMG):
|
||||
if name in names:
|
||||
alreadyExist.append(name)
|
||||
continue
|
||||
save_path = file_temp_path + filename
|
||||
file.save(save_path)
|
||||
check_img_size(save_path)
|
||||
img_file, box_and_point = detect_one(save_path, retinaface_model, retinaface_args)
|
||||
add_one_to_database(img=img_file, model=arcface_model, name=name, database_path=database_path,
|
||||
cpu_or_cuda=cpu_or_cuda)
|
||||
success_names.append(name)
|
||||
else:
|
||||
format_wrong.append(name)
|
||||
continue
|
||||
shutil.rmtree(file_temp_path)
|
||||
# 如果有错误情况
|
||||
if format_wrong or alreadyExist:
|
||||
status = 0
|
||||
else:
|
||||
status = 1
|
||||
fail_names['formatWrong'] = format_wrong
|
||||
fail_names['alreadyExist'] = alreadyExist
|
||||
|
||||
return create_response(status=status,addfile_names=success_names,fail_names=fail_names,database_name=database_name,msg="新增人脸操作执行完成")
|
||||
except RequestEntityTooLarge:
|
||||
return create_response(0,msg='image size should be less than 10M')
|
||||
|
||||
|
||||
# 数据库删除人脸,可实现在现有数据库中删除’单/多‘张人脸
|
||||
@app.route('/databaseDelete', methods=['POST'])
|
||||
def DB_delete_face():
|
||||
try:
|
||||
delete_names = request.form.getlist("delete_names")
|
||||
database_name = request.form.get("database_name")
|
||||
database_path = "./Database/" + database_name + ".npy"
|
||||
if not os.path.exists(database_path):
|
||||
msg = "数据库不存在"
|
||||
return create_response(0,msg=msg)
|
||||
if not delete_names:
|
||||
msg = "delete_names参数为空"
|
||||
return create_response(0,msg=msg)
|
||||
k_v = load_npy(database_path)
|
||||
print(k_v.keys())
|
||||
success_list = []
|
||||
fail_list = []
|
||||
for name in delete_names:
|
||||
if name in k_v.keys():
|
||||
del k_v[name]
|
||||
success_list.append(name)
|
||||
else:
|
||||
fail_list.append(name)
|
||||
continue
|
||||
np.save(database_path, k_v)
|
||||
status = 1
|
||||
if fail_list:
|
||||
status = 0
|
||||
return create_response(status=status,delete_names=success_list,not_exist_names=fail_list,database_name=database_name,
|
||||
msg="删除人脸操作完成")
|
||||
except RequestEntityTooLarge:
|
||||
return create_response(0,'image size should be less than 10M')
|
||||
|
||||
|
||||
# 以图搜图接口:
|
||||
# 上传图片压缩包建图片库
|
||||
@app.route('/uploadZip', methods=['POST'])
|
||||
def upload_Zip():
|
||||
try:
|
||||
zip = request.files['zip_name']
|
||||
dst_dir = './img/search/'
|
||||
if unzip(zip, dst_dir):
|
||||
return create_response('upload zip success')
|
||||
else:
|
||||
return create_response('upload zip file please')
|
||||
except RequestEntityTooLarge:
|
||||
return create_response('image size should be less than 10M')
|
||||
|
||||
|
||||
# 以图搜图
|
||||
@app.route('/imgSearchImg', methods=['POST'])
|
||||
def img_search_img():
|
||||
searchfile = './img/search/face'
|
||||
try:
|
||||
file = request.files['img_name']
|
||||
if file and check_file_format(file.filename, ALLOWED_IMG):
|
||||
img_path = './img/search/' + secure_filename(file.filename)
|
||||
file.save(img_path)
|
||||
check_img_size(img_path)
|
||||
img, box_and_point = detect_one(img_path, retinaface_model,
|
||||
retinaface_args)
|
||||
if len(img) == 1:
|
||||
Onename = []
|
||||
num = 0
|
||||
for filenames in os.listdir(searchfile):
|
||||
imgpath = os.path.join(searchfile, filenames)
|
||||
imgdata, box_and_point = detect_one(imgpath, retinaface_model, retinaface_args)
|
||||
result = face_verification(img, imgdata, arcface_model, cpu_or_cuda)
|
||||
isOne, distance = result.split(' ', -1)[0], result.split(' ', -1)[1]
|
||||
if isOne == 'same':
|
||||
Onename.append(filenames)
|
||||
num += 1
|
||||
return create_response('success', name=Onename, num=num)
|
||||
else:
|
||||
return create_response('image contains no face or more than 1 face')
|
||||
else:
|
||||
return create_response('png jpg jpeg bmp are allowed')
|
||||
except RequestEntityTooLarge:
|
||||
return create_response('image size should be less than 10M')
|
||||
|
||||
|
||||
# 人脸聚类接口
|
||||
@app.route('/cluster', methods=['POST'])
|
||||
def zip_cluster():
|
||||
try:
|
||||
f = request.files['file_name']
|
||||
if f and check_file_format(f.filename, ALLOWED_FILE):
|
||||
zip_name = secure_filename(f.filename)
|
||||
f.save('./img/cluster_tmp_file/' + zip_name)
|
||||
un_zip('./img/cluster_tmp_file/' + zip_name, './img/cluster_tmp_file/')
|
||||
emb_list, name_list = get_claster_tmp_file_embedding("./img/cluster_tmp_file/" + zip_name.rsplit('.')[0],
|
||||
retinaface_model,
|
||||
retinaface_args, arcface_model, cpu_or_cuda)
|
||||
return create_cluster_response("success", cluster(emb_list, name_list))
|
||||
else:
|
||||
return create_response('zip are allowed')
|
||||
except RequestEntityTooLarge:
|
||||
return create_response('file size should be less than 100M')
|
||||
|
||||
|
||||
# 视频识别接口
|
||||
@app.route('/videorecognition', methods=['POST'])
|
||||
def video_recognition():
|
||||
try:
|
||||
f = request.files['file_name']
|
||||
if f and check_file_format(f.filename, ALLOWED_VIDEO):
|
||||
video_name = secure_filename(f.filename)
|
||||
f.save('./video/' + video_name)
|
||||
detect_video('./video/' + video_name, './videoout/' + video_name, retinaface_model, arcface_model, k_v,
|
||||
retinaface_args)
|
||||
return create_response("success")
|
||||
else:
|
||||
return create_response('mp4 are allowed')
|
||||
except RequestEntityTooLarge:
|
||||
return create_response('file size should be less than 100M')
|
||||
|
||||
|
||||
@app.route('/download/<string:filename>', methods=['GET'])
|
||||
def download(filename):
|
||||
if os.path.isfile(os.path.join('./videoout/', filename)):
|
||||
return send_from_directory('./videoout/', filename, as_attachment=True)
|
||||
else:
|
||||
return create_response("Download failed")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
k_v = load_npy("./Database/student.npy")
|
||||
database_name_list = list(k_v.keys())
|
||||
vector_list = np.array(list(k_v.values()))
|
||||
print(vector_list.shape)
|
||||
#print(database_name_list)
|
||||
nlist = 50
|
||||
quantizer = faiss.IndexFlatL2(512) # the other index
|
||||
index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
|
||||
index.train(vector_list)
|
||||
# index = faiss.IndexFlatL2(512)
|
||||
index.add(vector_list)
|
||||
index.nprobe = 50
|
||||
app.run(host="0.0.0.0", port=5000)
|
1
backbones/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from .iresnet import iresnet18, iresnet34, iresnet50, iresnet100, iresnet200
|
BIN
backbones/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
backbones/__pycache__/iresnet.cpython-38.pyc
Normal file
187
backbones/iresnet.py
Normal file
@ -0,0 +1,187 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
__all__ = ['iresnet18', 'iresnet34', 'iresnet50', 'iresnet100', 'iresnet200']
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
|
||||
"""3x3 convolution with padding"""
|
||||
return nn.Conv2d(in_planes,
|
||||
out_planes,
|
||||
kernel_size=3,
|
||||
stride=stride,
|
||||
padding=dilation,
|
||||
groups=groups,
|
||||
bias=False,
|
||||
dilation=dilation)
|
||||
|
||||
|
||||
def conv1x1(in_planes, out_planes, stride=1):
|
||||
"""1x1 convolution"""
|
||||
return nn.Conv2d(in_planes,
|
||||
out_planes,
|
||||
kernel_size=1,
|
||||
stride=stride,
|
||||
bias=False)
|
||||
|
||||
|
||||
class IBasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None,
|
||||
groups=1, base_width=64, dilation=1):
|
||||
super(IBasicBlock, self).__init__()
|
||||
if groups != 1 or base_width != 64:
|
||||
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
|
||||
if dilation > 1:
|
||||
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
|
||||
self.bn1 = nn.BatchNorm2d(inplanes, eps=1e-05,)
|
||||
self.conv1 = conv3x3(inplanes, planes)
|
||||
self.bn2 = nn.BatchNorm2d(planes, eps=1e-05,)
|
||||
self.prelu = nn.PReLU(planes)
|
||||
self.conv2 = conv3x3(planes, planes, stride)
|
||||
self.bn3 = nn.BatchNorm2d(planes, eps=1e-05,)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
identity = x
|
||||
out = self.bn1(x)
|
||||
out = self.conv1(out)
|
||||
out = self.bn2(out)
|
||||
out = self.prelu(out)
|
||||
out = self.conv2(out)
|
||||
out = self.bn3(out)
|
||||
if self.downsample is not None:
|
||||
identity = self.downsample(x)
|
||||
out += identity
|
||||
return out
|
||||
|
||||
|
||||
class IResNet(nn.Module):
|
||||
fc_scale = 7 * 7
|
||||
def __init__(self,
|
||||
block, layers, dropout=0, num_features=512, zero_init_residual=False,
|
||||
groups=1, width_per_group=64, replace_stride_with_dilation=None, fp16=False):
|
||||
super(IResNet, self).__init__()
|
||||
self.fp16 = fp16
|
||||
self.inplanes = 64
|
||||
self.dilation = 1
|
||||
if replace_stride_with_dilation is None:
|
||||
replace_stride_with_dilation = [False, False, False]
|
||||
if len(replace_stride_with_dilation) != 3:
|
||||
raise ValueError("replace_stride_with_dilation should be None "
|
||||
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
|
||||
self.groups = groups
|
||||
self.base_width = width_per_group
|
||||
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
|
||||
self.prelu = nn.PReLU(self.inplanes)
|
||||
self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
|
||||
self.layer2 = self._make_layer(block,
|
||||
128,
|
||||
layers[1],
|
||||
stride=2,
|
||||
dilate=replace_stride_with_dilation[0])
|
||||
self.layer3 = self._make_layer(block,
|
||||
256,
|
||||
layers[2],
|
||||
stride=2,
|
||||
dilate=replace_stride_with_dilation[1])
|
||||
self.layer4 = self._make_layer(block,
|
||||
512,
|
||||
layers[3],
|
||||
stride=2,
|
||||
dilate=replace_stride_with_dilation[2])
|
||||
self.bn2 = nn.BatchNorm2d(512 * block.expansion, eps=1e-05,)
|
||||
self.dropout = nn.Dropout(p=dropout, inplace=True)
|
||||
self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
|
||||
self.features = nn.BatchNorm1d(num_features, eps=1e-05)
|
||||
nn.init.constant_(self.features.weight, 1.0)
|
||||
self.features.weight.requires_grad = False
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.normal_(m.weight, 0, 0.1)
|
||||
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
if zero_init_residual:
|
||||
for m in self.modules():
|
||||
if isinstance(m, IBasicBlock):
|
||||
nn.init.constant_(m.bn2.weight, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
|
||||
downsample = None
|
||||
previous_dilation = self.dilation
|
||||
if dilate:
|
||||
self.dilation *= stride
|
||||
stride = 1
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
conv1x1(self.inplanes, planes * block.expansion, stride),
|
||||
nn.BatchNorm2d(planes * block.expansion, eps=1e-05, ),
|
||||
)
|
||||
layers = []
|
||||
layers.append(
|
||||
block(self.inplanes, planes, stride, downsample, self.groups,
|
||||
self.base_width, previous_dilation))
|
||||
self.inplanes = planes * block.expansion
|
||||
for _ in range(1, blocks):
|
||||
layers.append(
|
||||
block(self.inplanes,
|
||||
planes,
|
||||
groups=self.groups,
|
||||
base_width=self.base_width,
|
||||
dilation=self.dilation))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
with torch.cuda.amp.autocast(self.fp16):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.prelu(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
x = self.bn2(x)
|
||||
x = torch.flatten(x, 1)
|
||||
x = self.dropout(x)
|
||||
x = self.fc(x.float() if self.fp16 else x)
|
||||
x = self.features(x)
|
||||
return x
|
||||
|
||||
|
||||
def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
|
||||
model = IResNet(block, layers, **kwargs)
|
||||
if pretrained:
|
||||
raise ValueError()
|
||||
return model
|
||||
|
||||
|
||||
def iresnet18(pretrained=False, progress=True, **kwargs):
|
||||
return _iresnet('iresnet18', IBasicBlock, [2, 2, 2, 2], pretrained,
|
||||
progress, **kwargs)
|
||||
|
||||
|
||||
def iresnet34(pretrained=False, progress=True, **kwargs):
|
||||
return _iresnet('iresnet34', IBasicBlock, [3, 4, 6, 3], pretrained,
|
||||
progress, **kwargs)
|
||||
|
||||
|
||||
def iresnet50(pretrained=False, progress=True, **kwargs):
|
||||
return _iresnet('iresnet50', IBasicBlock, [3, 4, 14, 3], pretrained,
|
||||
progress, **kwargs)
|
||||
|
||||
|
||||
def iresnet100(pretrained=False, progress=True, **kwargs):
|
||||
return _iresnet('iresnet100', IBasicBlock, [3, 13, 30, 3], pretrained,
|
||||
progress, **kwargs)
|
||||
|
||||
|
||||
def iresnet200(pretrained=False, progress=True, **kwargs):
|
||||
return _iresnet('iresnet200', IBasicBlock, [6, 26, 60, 6], pretrained,
|
||||
progress, **kwargs)
|
||||
|
135
centerface.py
Normal file
@ -0,0 +1,135 @@
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
import datetime
|
||||
|
||||
|
||||
class CenterFace(object):
|
||||
def __init__(self, landmarks=True):
|
||||
self.landmarks = landmarks
|
||||
if self.landmarks:
|
||||
self.net = cv2.dnn.readNetFromONNX('./model/onnx/centerface.onnx')
|
||||
else:
|
||||
self.net = cv2.dnn.readNetFromONNX('./model/onnx/cface.1k.onnx')
|
||||
self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0
|
||||
|
||||
def __call__(self, img, height, width, threshold=0.5):
|
||||
self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = self.transform(height, width)
|
||||
return self.inference_opencv(img, threshold)
|
||||
|
||||
def inference_opencv(self, img, threshold):
|
||||
blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(self.img_w_new, self.img_h_new), mean=(0, 0, 0), swapRB=True, crop=False)
|
||||
self.net.setInput(blob)
|
||||
begin = datetime.datetime.now()
|
||||
start_time = time.time()
|
||||
|
||||
|
||||
if self.landmarks:
|
||||
heatmap, scale, offset, lms = self.net.forward(["537", "538", "539", '540'])
|
||||
else:
|
||||
heatmap, scale, offset = self.net.forward(["535", "536", "537"])
|
||||
end = datetime.datetime.now()
|
||||
end_time = time.time()
|
||||
# print("cpuOne time: " + str(end_time - start_time))
|
||||
# print("cpu times = ", end - begin)
|
||||
return self.postprocess(heatmap, lms, offset, scale, threshold)
|
||||
|
||||
def transform(self, h, w):
|
||||
img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32)
|
||||
scale_h, scale_w = img_h_new / h, img_w_new / w
|
||||
return img_h_new, img_w_new, scale_h, scale_w
|
||||
|
||||
def postprocess(self, heatmap, lms, offset, scale, threshold):
|
||||
if self.landmarks:
|
||||
dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold)
|
||||
else:
|
||||
dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold)
|
||||
if len(dets) > 0:
|
||||
dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_h
|
||||
if self.landmarks:
|
||||
lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_h
|
||||
else:
|
||||
dets = np.empty(shape=[0, 5], dtype=np.float32)
|
||||
if self.landmarks:
|
||||
lms = np.empty(shape=[0, 10], dtype=np.float32)
|
||||
if self.landmarks:
|
||||
return dets, lms
|
||||
else:
|
||||
return dets
|
||||
|
||||
def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1):
|
||||
heatmap = np.squeeze(heatmap)
|
||||
scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :]
|
||||
offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :]
|
||||
c0, c1 = np.where(heatmap > threshold)
|
||||
if self.landmarks:
|
||||
boxes, lms = [], []
|
||||
else:
|
||||
boxes = []
|
||||
if len(c0) > 0:
|
||||
for i in range(len(c0)):
|
||||
s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4
|
||||
o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]]
|
||||
s = heatmap[c0[i], c1[i]]
|
||||
x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2)
|
||||
x1, y1 = min(x1, size[1]), min(y1, size[0])
|
||||
boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s])
|
||||
if self.landmarks:
|
||||
lm = []
|
||||
for j in range(5):
|
||||
lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1)
|
||||
lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1)
|
||||
lms.append(lm)
|
||||
boxes = np.asarray(boxes, dtype=np.float32)
|
||||
keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3)
|
||||
boxes = boxes[keep, :]
|
||||
if self.landmarks:
|
||||
lms = np.asarray(lms, dtype=np.float32)
|
||||
lms = lms[keep, :]
|
||||
if self.landmarks:
|
||||
return boxes, lms
|
||||
else:
|
||||
return boxes
|
||||
|
||||
def nms(self, boxes, scores, nms_thresh):
|
||||
x1 = boxes[:, 0]
|
||||
y1 = boxes[:, 1]
|
||||
x2 = boxes[:, 2]
|
||||
y2 = boxes[:, 3]
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = np.argsort(scores)[::-1]
|
||||
num_detections = boxes.shape[0]
|
||||
suppressed = np.zeros((num_detections,), dtype=np.bool)
|
||||
|
||||
keep = []
|
||||
for _i in range(num_detections):
|
||||
i = order[_i]
|
||||
if suppressed[i]:
|
||||
continue
|
||||
keep.append(i)
|
||||
|
||||
ix1 = x1[i]
|
||||
iy1 = y1[i]
|
||||
ix2 = x2[i]
|
||||
iy2 = y2[i]
|
||||
iarea = areas[i]
|
||||
|
||||
for _j in range(_i + 1, num_detections):
|
||||
j = order[_j]
|
||||
if suppressed[j]:
|
||||
continue
|
||||
|
||||
xx1 = max(ix1, x1[j])
|
||||
yy1 = max(iy1, y1[j])
|
||||
xx2 = min(ix2, x2[j])
|
||||
yy2 = min(iy2, y2[j])
|
||||
w = max(0, xx2 - xx1 + 1)
|
||||
h = max(0, yy2 - yy1 + 1)
|
||||
|
||||
inter = w * h
|
||||
ovr = inter / (iarea + areas[j] - inter)
|
||||
if ovr >= nms_thresh:
|
||||
suppressed[j] = True
|
||||
|
||||
return keep
|
67
config.py
Normal file
@ -0,0 +1,67 @@
|
||||
from easydict import EasyDict as edict
|
||||
|
||||
config = edict()
|
||||
config.dataset = "ms1m-retinaface-t2"
|
||||
config.embedding_size = 512
|
||||
config.sample_rate = 1
|
||||
config.fp16 = False
|
||||
config.momentum = 0.9
|
||||
config.weight_decay = 5e-4
|
||||
config.batch_size = 64
|
||||
config.lr = 0.1 # batch size is 512
|
||||
config.output = "ms1mv3_arcface_r50"
|
||||
|
||||
if config.dataset == "emore":
|
||||
config.rec = "/train_tmp/faces_emore"
|
||||
config.num_classes = 85742
|
||||
config.num_image = 5822653
|
||||
config.num_epoch = 16
|
||||
config.warmup_epoch = -1
|
||||
config.val_targets = ["lfw", ]
|
||||
|
||||
def lr_step_func(epoch):
|
||||
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < -1 else 0.1 ** len(
|
||||
[m for m in [8, 14] if m - 1 <= epoch])
|
||||
config.lr_func = lr_step_func
|
||||
|
||||
elif config.dataset == "ms1m-retinaface-t2":
|
||||
config.rec = "/train_tmp/ms1m-retinaface-t2"
|
||||
config.num_classes = 91180
|
||||
config.num_epoch = 25
|
||||
config.warmup_epoch = -1
|
||||
config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
|
||||
|
||||
def lr_step_func(epoch):
|
||||
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < -1 else 0.1 ** len(
|
||||
[m for m in [11, 17, 22] if m - 1 <= epoch])
|
||||
config.lr_func = lr_step_func
|
||||
|
||||
elif config.dataset == "glint360k":
|
||||
# make training faster
|
||||
# our RAM is 256G
|
||||
# mount -t tmpfs -o size=140G tmpfs /train_tmp
|
||||
config.rec = "/train_tmp/glint360k"
|
||||
config.num_classes = 360232
|
||||
config.num_image = 17091657
|
||||
config.num_epoch = 20
|
||||
config.warmup_epoch = -1
|
||||
config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
|
||||
|
||||
def lr_step_func(epoch):
|
||||
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < config.warmup_epoch else 0.1 ** len(
|
||||
[m for m in [8, 12, 15, 18] if m - 1 <= epoch])
|
||||
config.lr_func = lr_step_func
|
||||
|
||||
elif config.dataset == "webface":
|
||||
config.rec = "/train_tmp/faces_webface_112x112"
|
||||
config.num_classes = 10572
|
||||
config.num_image = "forget"
|
||||
config.num_epoch = 34
|
||||
config.warmup_epoch = -1
|
||||
config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
|
||||
|
||||
def lr_step_func(epoch):
|
||||
return ((epoch + 1) / (4 + 1)) ** 2 if epoch < config.warmup_epoch else 0.1 ** len(
|
||||
[m for m in [20, 28, 32] if m - 1 <= epoch])
|
||||
config.lr_func = lr_step_func
|
||||
|
168
create_database.py
Normal file
@ -0,0 +1,168 @@
|
||||
import os
|
||||
import time
|
||||
import re
|
||||
import torch
|
||||
import cv2
|
||||
import numpy as np
|
||||
from backbones import iresnet50,iresnet18,iresnet100
|
||||
|
||||
def load_image(img_path):
|
||||
#img = cv2.imread(img_path)
|
||||
img = cv2.imdecode(np.fromfile(img_path,dtype=np.uint8),cv2.IMREAD_COLOR)
|
||||
img = img.transpose((2, 0, 1))
|
||||
img = img[np.newaxis, :, :, :]
|
||||
img = np.array(img, dtype=np.float32)
|
||||
img -= 127.5
|
||||
img /= 127.5
|
||||
return img
|
||||
|
||||
def findEuclideanDistance(source_representation, test_representation):
|
||||
euclidean_distance = source_representation - test_representation
|
||||
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
|
||||
euclidean_distance = np.sqrt(euclidean_distance)
|
||||
return euclidean_distance
|
||||
|
||||
def findCosineDistance(source_representation, test_representation):
|
||||
a = np.matmul(np.transpose(source_representation), test_representation)
|
||||
b = np.sum(np.multiply(source_representation, source_representation))
|
||||
c = np.sum(np.multiply(test_representation, test_representation))
|
||||
return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
|
||||
|
||||
def l2_normalize(x):
|
||||
return x / np.sqrt(np.sum(np.multiply(x, x)))
|
||||
|
||||
def cosin_metric(x1, x2):
|
||||
return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
|
||||
|
||||
def load_npy(path):
|
||||
data = np.load(path,allow_pickle=True)
|
||||
data = data.item()
|
||||
return data
|
||||
|
||||
def create_database(path,model,database_path):
|
||||
name_list = os.listdir(path)
|
||||
k_v = {}
|
||||
if os.path.exists(database_path):
|
||||
k_v = np.load(database_path, allow_pickle=True)
|
||||
k_v = k_v.item()
|
||||
for name in name_list:
|
||||
img_path = os.listdir(os.path.join(path,name))
|
||||
for img_name in img_path[:1]:
|
||||
img = load_image(os.path.join(path,name,img_name))
|
||||
img = torch.from_numpy(img)
|
||||
with torch.no_grad():
|
||||
pred = model(img)
|
||||
pred = pred.numpy()
|
||||
k_v[name] = l2_normalize(pred)
|
||||
np.save(database_path, k_v)
|
||||
|
||||
def create_database_batch(path,model,database_path):
|
||||
name_list = os.listdir(path)
|
||||
k_v = {}
|
||||
if os.path.exists(database_path):
|
||||
k_v = np.load(database_path, allow_pickle=True)
|
||||
k_v = k_v.item()
|
||||
batch = 256
|
||||
order_name = []
|
||||
order_path = []
|
||||
emb_list = []
|
||||
for name in name_list:
|
||||
img_path = os.listdir(os.path.join(path,name))
|
||||
for img_name in img_path[:1]:
|
||||
order_name.append(name)
|
||||
order_path.append(os.path.join(path,name,img_name))
|
||||
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
|
||||
for index, img_path in enumerate(order_path):
|
||||
order_img[index] = load_image(img_path)
|
||||
print(order_img.shape)
|
||||
order_img = torch.from_numpy(order_img)
|
||||
now = 0
|
||||
number = len(order_img)
|
||||
with torch.no_grad():
|
||||
while now < number:
|
||||
if now + batch < number:
|
||||
emb = model(order_img[now:now+batch])
|
||||
else:
|
||||
emb = model(order_img[now:])
|
||||
now = now + batch
|
||||
for em in emb:
|
||||
emb_list.append(em)
|
||||
print("batch"+str(now))
|
||||
|
||||
for i, emb in enumerate(emb_list):
|
||||
k_v[order_name[i]] = l2_normalize(emb.numpy())
|
||||
np.save(database_path, k_v)
|
||||
|
||||
def add_one(img,model,name,database_path):
|
||||
img = torch.from_numpy(img)
|
||||
with torch.no_grad():
|
||||
pred = model(img)
|
||||
pred = pred.numpy()
|
||||
k_v = {}
|
||||
if os.path.exists(database_path):
|
||||
k_v = np.load(database_path, allow_pickle=True)
|
||||
k_v = k_v.item()
|
||||
k_v[name] = l2_normalize(pred)
|
||||
np.save(database_path, k_v)
|
||||
|
||||
def findmindistance(pred,threshold,k_v):
|
||||
distance = 10
|
||||
most_like = ""
|
||||
for name in k_v.keys():
|
||||
tmp = findEuclideanDistance(k_v[name],pred)
|
||||
if distance > tmp:
|
||||
distance = tmp
|
||||
most_like = name
|
||||
if distance < threshold:
|
||||
return most_like
|
||||
else:
|
||||
return -1
|
||||
|
||||
def findOne(img,model,k_v):
|
||||
|
||||
with torch.no_grad():
|
||||
start_time = time.time()
|
||||
pred = model(img)
|
||||
end_time = time.time()
|
||||
#print("predOne time: " + str(end_time - start_time))
|
||||
pred = pred.numpy()
|
||||
name = findmindistance(l2_normalize(pred),threshold=1.20,k_v=k_v)
|
||||
if name != -1:
|
||||
return name
|
||||
else:
|
||||
return "unknown"
|
||||
def findAll(imglist,model,k_v):
|
||||
with torch.no_grad():
|
||||
name_list = []
|
||||
pred = model(imglist)
|
||||
pred = pred.numpy()
|
||||
for pr in pred:
|
||||
name = findmindistance(l2_normalize(pr),threshold=1.20,k_v=k_v)
|
||||
if name != -1:
|
||||
name_list.append(name)
|
||||
else:
|
||||
name_list.append("unknown")
|
||||
return name_list
|
||||
|
||||
if __name__=='__main__':
|
||||
model = iresnet100()
|
||||
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||
model.eval()
|
||||
#img = load_image(r"D:\Download\out\facedatabase\man.jpg")
|
||||
#img = load_image(r"D:\Download\out\facedatabase\man6.jpg")
|
||||
# img = load_image(r"D:\Download\out\alig_students\student.jpg")
|
||||
# print(img.shape)
|
||||
#
|
||||
# k_v = load_npy("./Database/student.npy")
|
||||
# start_time = time.time()
|
||||
# img = torch.from_numpy(img)
|
||||
# name = findOne(img,model,k_v)
|
||||
# mo = r'[\u4e00-\u9fa5]*'
|
||||
# name = re.match(mo,name)
|
||||
# print(name.group(0))
|
||||
# end_time = time.time()
|
||||
# print("findOne time: " + str(end_time - start_time))
|
||||
|
||||
#create_database_batch(r"D:\Download\out\alig_students",model,"./Database/student.npy")
|
||||
create_database_batch(r"D:\Download\out\cfp_database", model, "cfp.npy")
|
||||
#add_one(img,model,"Arminio_Fraga","centerface_lfw.npy")
|
2845
data/FDDB/img_list.txt
Normal file
3
data/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .wider_face import WiderFaceDetection, detection_collate
|
||||
from .data_augment import *
|
||||
from .config import *
|
BIN
data/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
data/__pycache__/config.cpython-38.pyc
Normal file
BIN
data/__pycache__/data_augment.cpython-38.pyc
Normal file
BIN
data/__pycache__/wider_face.cpython-38.pyc
Normal file
42
data/config.py
Normal file
@ -0,0 +1,42 @@
|
||||
# config.py
|
||||
|
||||
cfg_mnet = {
|
||||
'name': 'mobilenet0.25',
|
||||
'min_sizes': [[16, 32], [64, 128], [256, 512]],
|
||||
'steps': [8, 16, 32],
|
||||
'variance': [0.1, 0.2],
|
||||
'clip': False,
|
||||
'loc_weight': 2.0,
|
||||
'gpu_train': True,
|
||||
'batch_size': 32,
|
||||
'ngpu': 1,
|
||||
'epoch': 250,
|
||||
'decay1': 190,
|
||||
'decay2': 220,
|
||||
'image_size': 640,
|
||||
'pretrain': True,
|
||||
'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
|
||||
'in_channel': 32,
|
||||
'out_channel': 64
|
||||
}
|
||||
|
||||
cfg_re50 = {
|
||||
'name': 'Resnet50',
|
||||
'min_sizes': [[16, 32], [64, 128], [256, 512]],
|
||||
'steps': [8, 16, 32],
|
||||
'variance': [0.1, 0.2],
|
||||
'clip': False,
|
||||
'loc_weight': 2.0,
|
||||
'gpu_train': True,
|
||||
'batch_size': 24,
|
||||
'ngpu': 4,
|
||||
'epoch': 100,
|
||||
'decay1': 70,
|
||||
'decay2': 90,
|
||||
'image_size': 840,
|
||||
'pretrain': True,
|
||||
'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
|
||||
'in_channel': 256,
|
||||
'out_channel': 256
|
||||
}
|
||||
|
237
data/data_augment.py
Normal file
@ -0,0 +1,237 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
import random
|
||||
from utils.box_utils import matrix_iof
|
||||
|
||||
|
||||
def _crop(image, boxes, labels, landm, img_dim):
|
||||
height, width, _ = image.shape
|
||||
pad_image_flag = True
|
||||
|
||||
for _ in range(250):
|
||||
"""
|
||||
if random.uniform(0, 1) <= 0.2:
|
||||
scale = 1.0
|
||||
else:
|
||||
scale = random.uniform(0.3, 1.0)
|
||||
"""
|
||||
PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
|
||||
scale = random.choice(PRE_SCALES)
|
||||
short_side = min(width, height)
|
||||
w = int(scale * short_side)
|
||||
h = w
|
||||
|
||||
if width == w:
|
||||
l = 0
|
||||
else:
|
||||
l = random.randrange(width - w)
|
||||
if height == h:
|
||||
t = 0
|
||||
else:
|
||||
t = random.randrange(height - h)
|
||||
roi = np.array((l, t, l + w, t + h))
|
||||
|
||||
value = matrix_iof(boxes, roi[np.newaxis])
|
||||
flag = (value >= 1)
|
||||
if not flag.any():
|
||||
continue
|
||||
|
||||
centers = (boxes[:, :2] + boxes[:, 2:]) / 2
|
||||
mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
|
||||
boxes_t = boxes[mask_a].copy()
|
||||
labels_t = labels[mask_a].copy()
|
||||
landms_t = landm[mask_a].copy()
|
||||
landms_t = landms_t.reshape([-1, 5, 2])
|
||||
|
||||
if boxes_t.shape[0] == 0:
|
||||
continue
|
||||
|
||||
image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
|
||||
|
||||
boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
|
||||
boxes_t[:, :2] -= roi[:2]
|
||||
boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
|
||||
boxes_t[:, 2:] -= roi[:2]
|
||||
|
||||
# landm
|
||||
landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
|
||||
landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
|
||||
landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
|
||||
landms_t = landms_t.reshape([-1, 10])
|
||||
|
||||
|
||||
# make sure that the cropped image contains at least one face > 16 pixel at training image scale
|
||||
b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
|
||||
b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
|
||||
mask_b = np.minimum(b_w_t, b_h_t) > 0.0
|
||||
boxes_t = boxes_t[mask_b]
|
||||
labels_t = labels_t[mask_b]
|
||||
landms_t = landms_t[mask_b]
|
||||
|
||||
if boxes_t.shape[0] == 0:
|
||||
continue
|
||||
|
||||
pad_image_flag = False
|
||||
|
||||
return image_t, boxes_t, labels_t, landms_t, pad_image_flag
|
||||
return image, boxes, labels, landm, pad_image_flag
|
||||
|
||||
|
||||
def _distort(image):
|
||||
|
||||
def _convert(image, alpha=1, beta=0):
|
||||
tmp = image.astype(float) * alpha + beta
|
||||
tmp[tmp < 0] = 0
|
||||
tmp[tmp > 255] = 255
|
||||
image[:] = tmp
|
||||
|
||||
image = image.copy()
|
||||
|
||||
if random.randrange(2):
|
||||
|
||||
#brightness distortion
|
||||
if random.randrange(2):
|
||||
_convert(image, beta=random.uniform(-32, 32))
|
||||
|
||||
#contrast distortion
|
||||
if random.randrange(2):
|
||||
_convert(image, alpha=random.uniform(0.5, 1.5))
|
||||
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
||||
|
||||
#saturation distortion
|
||||
if random.randrange(2):
|
||||
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
|
||||
|
||||
#hue distortion
|
||||
if random.randrange(2):
|
||||
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
|
||||
tmp %= 180
|
||||
image[:, :, 0] = tmp
|
||||
|
||||
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
||||
|
||||
else:
|
||||
|
||||
#brightness distortion
|
||||
if random.randrange(2):
|
||||
_convert(image, beta=random.uniform(-32, 32))
|
||||
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
||||
|
||||
#saturation distortion
|
||||
if random.randrange(2):
|
||||
_convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
|
||||
|
||||
#hue distortion
|
||||
if random.randrange(2):
|
||||
tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
|
||||
tmp %= 180
|
||||
image[:, :, 0] = tmp
|
||||
|
||||
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
||||
|
||||
#contrast distortion
|
||||
if random.randrange(2):
|
||||
_convert(image, alpha=random.uniform(0.5, 1.5))
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def _expand(image, boxes, fill, p):
|
||||
if random.randrange(2):
|
||||
return image, boxes
|
||||
|
||||
height, width, depth = image.shape
|
||||
|
||||
scale = random.uniform(1, p)
|
||||
w = int(scale * width)
|
||||
h = int(scale * height)
|
||||
|
||||
left = random.randint(0, w - width)
|
||||
top = random.randint(0, h - height)
|
||||
|
||||
boxes_t = boxes.copy()
|
||||
boxes_t[:, :2] += (left, top)
|
||||
boxes_t[:, 2:] += (left, top)
|
||||
expand_image = np.empty(
|
||||
(h, w, depth),
|
||||
dtype=image.dtype)
|
||||
expand_image[:, :] = fill
|
||||
expand_image[top:top + height, left:left + width] = image
|
||||
image = expand_image
|
||||
|
||||
return image, boxes_t
|
||||
|
||||
|
||||
def _mirror(image, boxes, landms):
|
||||
_, width, _ = image.shape
|
||||
if random.randrange(2):
|
||||
image = image[:, ::-1]
|
||||
boxes = boxes.copy()
|
||||
boxes[:, 0::2] = width - boxes[:, 2::-2]
|
||||
|
||||
# landm
|
||||
landms = landms.copy()
|
||||
landms = landms.reshape([-1, 5, 2])
|
||||
landms[:, :, 0] = width - landms[:, :, 0]
|
||||
tmp = landms[:, 1, :].copy()
|
||||
landms[:, 1, :] = landms[:, 0, :]
|
||||
landms[:, 0, :] = tmp
|
||||
tmp1 = landms[:, 4, :].copy()
|
||||
landms[:, 4, :] = landms[:, 3, :]
|
||||
landms[:, 3, :] = tmp1
|
||||
landms = landms.reshape([-1, 10])
|
||||
|
||||
return image, boxes, landms
|
||||
|
||||
|
||||
def _pad_to_square(image, rgb_mean, pad_image_flag):
|
||||
if not pad_image_flag:
|
||||
return image
|
||||
height, width, _ = image.shape
|
||||
long_side = max(width, height)
|
||||
image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
|
||||
image_t[:, :] = rgb_mean
|
||||
image_t[0:0 + height, 0:0 + width] = image
|
||||
return image_t
|
||||
|
||||
|
||||
def _resize_subtract_mean(image, insize, rgb_mean):
|
||||
interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
|
||||
interp_method = interp_methods[random.randrange(5)]
|
||||
image = cv2.resize(image, (insize, insize), interpolation=interp_method)
|
||||
image = image.astype(np.float32)
|
||||
image -= rgb_mean
|
||||
return image.transpose(2, 0, 1)
|
||||
|
||||
|
||||
class preproc(object):
|
||||
|
||||
def __init__(self, img_dim, rgb_means):
|
||||
self.img_dim = img_dim
|
||||
self.rgb_means = rgb_means
|
||||
|
||||
def __call__(self, image, targets):
|
||||
assert targets.shape[0] > 0, "this image does not have gt"
|
||||
|
||||
boxes = targets[:, :4].copy()
|
||||
labels = targets[:, -1].copy()
|
||||
landm = targets[:, 4:-1].copy()
|
||||
|
||||
image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
|
||||
image_t = _distort(image_t)
|
||||
image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
|
||||
image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
|
||||
height, width, _ = image_t.shape
|
||||
image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
|
||||
boxes_t[:, 0::2] /= width
|
||||
boxes_t[:, 1::2] /= height
|
||||
|
||||
landm_t[:, 0::2] /= width
|
||||
landm_t[:, 1::2] /= height
|
||||
|
||||
labels_t = np.expand_dims(labels_t, 1)
|
||||
targets_t = np.hstack((boxes_t, landm_t, labels_t))
|
||||
|
||||
return image_t, targets_t
|
258
data/realtime_detect.py
Normal file
@ -0,0 +1,258 @@
|
||||
import subprocess
|
||||
import time
|
||||
import cv2
|
||||
import torch
|
||||
import numpy as np
|
||||
from skimage import transform as trans
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from data import cfg_mnet, cfg_re50
|
||||
from face_api import load_arcface_model, load_npy
|
||||
from layers.functions.prior_box import PriorBox
|
||||
from retinaface_detect import set_retinaface_conf, load_retinaface_model, findAll
|
||||
from utils.nms.py_cpu_nms import py_cpu_nms
|
||||
from utils.box_utils import decode, decode_landm
|
||||
import faiss
|
||||
|
||||
ppi = 1280
|
||||
ppi2 = 640
|
||||
step = 3
|
||||
|
||||
def detect_rtsp(rtsp, out_rtsp, net, arcface_model, k_v, args):
|
||||
tic_total = time.time()
|
||||
cfg = None
|
||||
if args.network == "mobile0.25":
|
||||
cfg = cfg_mnet
|
||||
elif args.network == "resnet50":
|
||||
cfg = cfg_re50
|
||||
device = torch.device("cpu" if args.cpu else "cuda")
|
||||
resize = 1
|
||||
|
||||
# testing begin
|
||||
cap = cv2.VideoCapture(rtsp)
|
||||
ret, frame = cap.read()
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
factor = 0
|
||||
if (w > ppi):
|
||||
factor = h / w
|
||||
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||
h, w = frame.shape[:2]
|
||||
arf = 1
|
||||
detect_h, detect_w = frame.shape[:2]
|
||||
frame_detect = frame
|
||||
factor2 = 0
|
||||
if (w > ppi2):
|
||||
factor2 = h / w
|
||||
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
|
||||
detect_h, detect_w = frame_detect.shape[:2]
|
||||
arf = w/detect_w
|
||||
print(w,h)
|
||||
print(detect_w,detect_h)
|
||||
|
||||
#fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
#print(fps)
|
||||
size = (w, h)
|
||||
sizeStr = str(size[0]) + 'x' + str(size[1])
|
||||
if(out_rtsp.startswith("rtsp")):
|
||||
command = ['ffmpeg',
|
||||
'-y', '-an',
|
||||
'-f', 'rawvideo',
|
||||
'-vcodec', 'rawvideo',
|
||||
'-pix_fmt', 'bgr24',
|
||||
'-s', sizeStr,
|
||||
'-r', "25",
|
||||
'-i', '-',
|
||||
'-c:v', 'libx265',
|
||||
'-b:v', '3000k',
|
||||
'-pix_fmt', 'yuv420p',
|
||||
'-preset', 'ultrafast',
|
||||
'-f', 'rtsp',
|
||||
out_rtsp]
|
||||
pipe = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE)
|
||||
number = step
|
||||
dets = []
|
||||
name_list = []
|
||||
font = ImageFont.truetype("font.ttf", 22)
|
||||
priorbox = PriorBox(cfg, image_size=(detect_h, detect_w))
|
||||
priors = priorbox.forward()
|
||||
priors = priors.to(device)
|
||||
prior_data = priors.data
|
||||
|
||||
scale = torch.Tensor([detect_w, detect_h, detect_w, detect_h])
|
||||
scale = scale.to(device)
|
||||
scale1 = torch.Tensor([detect_w, detect_h, detect_w, detect_h,
|
||||
detect_w, detect_h, detect_w, detect_h,
|
||||
detect_w, detect_h])
|
||||
scale1 = scale1.to(device)
|
||||
|
||||
src1 = np.array([
|
||||
[38.3814, 51.6963],
|
||||
[73.6186, 51.5014],
|
||||
[56.1120, 71.7366],
|
||||
[41.6361, 92.3655],
|
||||
[70.8167, 92.2041]], dtype=np.float32)
|
||||
tform = trans.SimilarityTransform()
|
||||
|
||||
while ret:
|
||||
tic_all = time.time()
|
||||
if number == step:
|
||||
tic = time.time()
|
||||
img = np.float32(frame_detect)
|
||||
img -= (104, 117, 123)
|
||||
img = img.transpose(2, 0, 1)
|
||||
img = torch.from_numpy(img).unsqueeze(0)
|
||||
img = img.to(device)
|
||||
|
||||
loc, conf, landms = net(img) # forward pass
|
||||
|
||||
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||
boxes = boxes * scale / resize
|
||||
boxes = boxes.cpu().numpy()
|
||||
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||
|
||||
landms = landms * scale1 / resize
|
||||
landms = landms.cpu().numpy()
|
||||
|
||||
# ignore low scores
|
||||
inds = np.where(scores > args.confidence_threshold)[0]
|
||||
boxes = boxes[inds]
|
||||
landms = landms[inds]
|
||||
scores = scores[inds]
|
||||
|
||||
# keep top-K before NMS
|
||||
order = scores.argsort()[::-1][:args.top_k]
|
||||
boxes = boxes[order]
|
||||
landms = landms[order]
|
||||
scores = scores[order]
|
||||
|
||||
# do NMS
|
||||
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||
dets = dets[keep, :]
|
||||
landms = landms[keep]
|
||||
|
||||
# keep top-K faster NMS
|
||||
dets = dets[:args.keep_top_k, :]
|
||||
landms = landms[:args.keep_top_k, :]
|
||||
|
||||
dets = np.concatenate((dets, landms), axis=1)
|
||||
face_list = []
|
||||
name_list = []
|
||||
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||
start_time_findall = time.time()
|
||||
for i, det in enumerate(dets[:1]):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
#boxes, score = det[:4], det[4]
|
||||
dst = np.reshape(landms[i], (5, 2))
|
||||
dst = dst * arf
|
||||
|
||||
tform.estimate(dst, src1)
|
||||
M = tform.params[0:2, :]
|
||||
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||
img112 = frame2[0:112, 0:112, :]
|
||||
face_list.append(img112)
|
||||
|
||||
if len(face_list) != 0:
|
||||
face_list = np.array(face_list)
|
||||
face_list = face_list.transpose((0, 3, 1, 2))
|
||||
face_list = np.array(face_list, dtype=np.float32)
|
||||
face_list -= 127.5
|
||||
face_list /= 127.5
|
||||
print(face_list.shape)
|
||||
print("warpALL time: " + str(time.time() - start_time_findall ))
|
||||
#start_time = time.time()
|
||||
name_list = findAll(face_list, arcface_model, k_v, "cpu" if args.cpu else "cuda")
|
||||
#print(name_list)
|
||||
|
||||
#print("findOneframe time: " + str(time.time() - start_time_findall))
|
||||
#start_time = time.time()
|
||||
# if (len(dets) != 0):
|
||||
# for i, det in enumerate(dets[:]):
|
||||
# if det[4] < args.vis_thres:
|
||||
# continue
|
||||
# boxes, score = det[:4], det[4]
|
||||
# boxes = boxes * arf
|
||||
# name = name_list[i]
|
||||
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (255, 0, 0), 2)
|
||||
# cv2.putText(frame, name, (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,(0, 225, 255), 1)
|
||||
start_time = time.time()
|
||||
if(len(dets) != 0):
|
||||
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
draw = ImageDraw.Draw(img_PIL)
|
||||
for i, det in enumerate(dets[:1]):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
boxes, score = det[:4], det[4]
|
||||
boxes = boxes * arf
|
||||
name = name_list[i]
|
||||
if not isinstance(name, np.unicode):
|
||||
name = name.decode('utf8')
|
||||
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
|
||||
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||
pipe.stdin.write(frame.tostring())
|
||||
print("drawOneframe time: " + str(time.time() - start_time))
|
||||
#start_time = time.time()
|
||||
ret, frame = cap.read()
|
||||
frame_detect = frame
|
||||
number = step
|
||||
if (ret != 0 and factor != 0):
|
||||
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||
if (ret != 0 and factor2 != 0):
|
||||
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
|
||||
#print("readframe time: " + str(time.time() - start_time))
|
||||
else:
|
||||
number += 1
|
||||
if (len(dets) != 0):
|
||||
for i, det in enumerate(dets[:4]):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
boxes, score = det[:4], det[4]
|
||||
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||
# if (len(dets) != 0):
|
||||
# img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
# draw = ImageDraw.Draw(img_PIL)
|
||||
# for i, det in enumerate(dets[:4]):
|
||||
# if det[4] < args.vis_thres:
|
||||
# continue
|
||||
# boxes, score = det[:4], det[4]
|
||||
# name = name_list[i]
|
||||
# if not isinstance(name, np.unicode):
|
||||
# name = name.decode('utf8')
|
||||
# draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||
# draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
|
||||
# width=3)
|
||||
# frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||
start_time = time.time()
|
||||
pipe.stdin.write(frame.tostring())
|
||||
print("writeframe time: " + str(time.time() - start_time))
|
||||
start_time = time.time()
|
||||
ret, frame = cap.read()
|
||||
if (ret != 0 and factor != 0):
|
||||
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||
print("readframe time: " + str(time.time() - start_time))
|
||||
print('all time: {:.4f}'.format(time.time() - tic_all))
|
||||
cap.release()
|
||||
pipe.terminate()
|
||||
print('total time: {:.4f}'.format(time.time() - tic_total))
|
||||
|
||||
if __name__ == "__main__":
|
||||
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
# 加载人脸识别模型
|
||||
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
|
||||
# 加载人脸检测模型
|
||||
retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
|
||||
retinaface_model = load_retinaface_model(retinaface_args)
|
||||
k_v = load_npy("./Database/student.npy")
|
||||
#print(list(k_v.keys()))
|
||||
database_name_list = list(k_v.keys())
|
||||
vector_list = np.array(list(k_v.values()))
|
||||
print(vector_list.shape)
|
||||
index = faiss.IndexFlatL2(512)
|
||||
index.add(vector_list)
|
||||
|
||||
#detect_rtsp("software.mp4", 'rtsp://localhost/test2', retinaface_model, arcface_model, index ,database_name_list, retinaface_args)
|
||||
detect_rtsp("cut.mp4", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, k_v, retinaface_args)
|
101
data/wider_face.py
Normal file
@ -0,0 +1,101 @@
|
||||
import os
|
||||
import os.path
|
||||
import sys
|
||||
import torch
|
||||
import torch.utils.data as data
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
class WiderFaceDetection(data.Dataset):
|
||||
def __init__(self, txt_path, preproc=None):
|
||||
self.preproc = preproc
|
||||
self.imgs_path = []
|
||||
self.words = []
|
||||
f = open(txt_path,'r')
|
||||
lines = f.readlines()
|
||||
isFirst = True
|
||||
labels = []
|
||||
for line in lines:
|
||||
line = line.rstrip()
|
||||
if line.startswith('#'):
|
||||
if isFirst is True:
|
||||
isFirst = False
|
||||
else:
|
||||
labels_copy = labels.copy()
|
||||
self.words.append(labels_copy)
|
||||
labels.clear()
|
||||
path = line[2:]
|
||||
path = txt_path.replace('label.txt','images/') + path
|
||||
self.imgs_path.append(path)
|
||||
else:
|
||||
line = line.split(' ')
|
||||
label = [float(x) for x in line]
|
||||
labels.append(label)
|
||||
|
||||
self.words.append(labels)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.imgs_path)
|
||||
|
||||
def __getitem__(self, index):
|
||||
img = cv2.imread(self.imgs_path[index])
|
||||
height, width, _ = img.shape
|
||||
|
||||
labels = self.words[index]
|
||||
annotations = np.zeros((0, 15))
|
||||
if len(labels) == 0:
|
||||
return annotations
|
||||
for idx, label in enumerate(labels):
|
||||
annotation = np.zeros((1, 15))
|
||||
# bbox
|
||||
annotation[0, 0] = label[0] # x1
|
||||
annotation[0, 1] = label[1] # y1
|
||||
annotation[0, 2] = label[0] + label[2] # x2
|
||||
annotation[0, 3] = label[1] + label[3] # y2
|
||||
|
||||
# landmarks
|
||||
annotation[0, 4] = label[4] # l0_x
|
||||
annotation[0, 5] = label[5] # l0_y
|
||||
annotation[0, 6] = label[7] # l1_x
|
||||
annotation[0, 7] = label[8] # l1_y
|
||||
annotation[0, 8] = label[10] # l2_x
|
||||
annotation[0, 9] = label[11] # l2_y
|
||||
annotation[0, 10] = label[13] # l3_x
|
||||
annotation[0, 11] = label[14] # l3_y
|
||||
annotation[0, 12] = label[16] # l4_x
|
||||
annotation[0, 13] = label[17] # l4_y
|
||||
if (annotation[0, 4]<0):
|
||||
annotation[0, 14] = -1
|
||||
else:
|
||||
annotation[0, 14] = 1
|
||||
|
||||
annotations = np.append(annotations, annotation, axis=0)
|
||||
target = np.array(annotations)
|
||||
if self.preproc is not None:
|
||||
img, target = self.preproc(img, target)
|
||||
|
||||
return torch.from_numpy(img), target
|
||||
|
||||
def detection_collate(batch):
|
||||
"""Custom collate fn for dealing with batches of images that have a different
|
||||
number of associated object annotations (bounding boxes).
|
||||
|
||||
Arguments:
|
||||
batch: (tuple) A tuple of tensor images and lists of annotations
|
||||
|
||||
Return:
|
||||
A tuple containing:
|
||||
1) (tensor) batch of images stacked on their 0 dim
|
||||
2) (list of tensors) annotations for a given image are stacked on 0 dim
|
||||
"""
|
||||
targets = []
|
||||
imgs = []
|
||||
for _, sample in enumerate(batch):
|
||||
for _, tup in enumerate(sample):
|
||||
if torch.is_tensor(tup):
|
||||
imgs.append(tup)
|
||||
elif isinstance(tup, type(np.empty(0))):
|
||||
annos = torch.from_numpy(tup).float()
|
||||
targets.append(annos)
|
||||
|
||||
return (torch.stack(imgs, 0), targets)
|
107
dataset.py
Normal file
@ -0,0 +1,107 @@
|
||||
import numbers
|
||||
import os
|
||||
import queue as Queue
|
||||
import threading
|
||||
|
||||
import mxnet as mx
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
from torchvision import transforms
|
||||
|
||||
|
||||
class BackgroundGenerator(threading.Thread):
|
||||
def __init__(self, generator, local_rank, max_prefetch=6):
|
||||
super(BackgroundGenerator, self).__init__()
|
||||
self.queue = Queue.Queue(max_prefetch)
|
||||
self.generator = generator
|
||||
self.local_rank = local_rank
|
||||
self.daemon = True
|
||||
self.start()
|
||||
|
||||
def run(self):
|
||||
torch.cuda.set_device(self.local_rank)
|
||||
for item in self.generator:
|
||||
self.queue.put(item)
|
||||
self.queue.put(None)
|
||||
|
||||
def next(self):
|
||||
next_item = self.queue.get()
|
||||
if next_item is None:
|
||||
raise StopIteration
|
||||
return next_item
|
||||
|
||||
def __next__(self):
|
||||
return self.next()
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
|
||||
class DataLoaderX(DataLoader):
|
||||
def __init__(self, local_rank, **kwargs):
|
||||
super(DataLoaderX, self).__init__(**kwargs)
|
||||
self.stream = torch.cuda.Stream(local_rank)
|
||||
self.local_rank = local_rank
|
||||
|
||||
def __iter__(self):
|
||||
self.iter = super(DataLoaderX, self).__iter__()
|
||||
self.iter = BackgroundGenerator(self.iter, self.local_rank)
|
||||
self.preload()
|
||||
return self
|
||||
|
||||
def preload(self):
|
||||
self.batch = next(self.iter, None)
|
||||
if self.batch is None:
|
||||
return None
|
||||
with torch.cuda.stream(self.stream):
|
||||
for k in range(len(self.batch)):
|
||||
self.batch[k] = self.batch[k].to(device=self.local_rank,
|
||||
non_blocking=True)
|
||||
|
||||
def __next__(self):
|
||||
torch.cuda.current_stream().wait_stream(self.stream)
|
||||
batch = self.batch
|
||||
if batch is None:
|
||||
raise StopIteration
|
||||
self.preload()
|
||||
return batch
|
||||
|
||||
|
||||
class MXFaceDataset(Dataset):
|
||||
def __init__(self, root_dir, local_rank):
|
||||
super(MXFaceDataset, self).__init__()
|
||||
self.transform = transforms.Compose(
|
||||
[transforms.ToPILImage(),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
|
||||
])
|
||||
self.root_dir = root_dir
|
||||
self.local_rank = local_rank
|
||||
path_imgrec = os.path.join(root_dir, 'train.rec')
|
||||
path_imgidx = os.path.join(root_dir, 'train.idx')
|
||||
self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')
|
||||
s = self.imgrec.read_idx(0)
|
||||
header, _ = mx.recordio.unpack(s)
|
||||
if header.flag > 0:
|
||||
self.header0 = (int(header.label[0]), int(header.label[1]))
|
||||
self.imgidx = np.array(range(1, int(header.label[0])))
|
||||
else:
|
||||
self.imgidx = np.array(list(self.imgrec.keys))
|
||||
|
||||
def __getitem__(self, index):
|
||||
idx = self.imgidx[index]
|
||||
s = self.imgrec.read_idx(idx)
|
||||
header, img = mx.recordio.unpack(s)
|
||||
label = header.label
|
||||
if not isinstance(label, numbers.Number):
|
||||
label = label[0]
|
||||
label = torch.tensor(label, dtype=torch.long)
|
||||
sample = mx.image.imdecode(img).asnumpy()
|
||||
if self.transform is not None:
|
||||
sample = self.transform(sample)
|
||||
return sample, label
|
||||
|
||||
def __len__(self):
|
||||
return len(self.imgidx)
|
0
eval/__init__.py
Normal file
409
eval/verification.py
Normal file
@ -0,0 +1,409 @@
|
||||
"""Helper for evaluation on the Labeled Faces in the Wild dataset
|
||||
"""
|
||||
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2016 David Sandberg
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import pickle
|
||||
|
||||
import mxnet as mx
|
||||
import numpy as np
|
||||
import sklearn
|
||||
import torch
|
||||
from mxnet import ndarray as nd
|
||||
from scipy import interpolate
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
|
||||
class LFold:
|
||||
def __init__(self, n_splits=2, shuffle=False):
|
||||
self.n_splits = n_splits
|
||||
if self.n_splits > 1:
|
||||
self.k_fold = KFold(n_splits=n_splits, shuffle=shuffle)
|
||||
|
||||
def split(self, indices):
|
||||
if self.n_splits > 1:
|
||||
return self.k_fold.split(indices)
|
||||
else:
|
||||
return [(indices, indices)]
|
||||
|
||||
|
||||
def calculate_roc(thresholds,
|
||||
embeddings1,
|
||||
embeddings2,
|
||||
actual_issame,
|
||||
nrof_folds=10,
|
||||
pca=0):
|
||||
assert (embeddings1.shape[0] == embeddings2.shape[0])
|
||||
assert (embeddings1.shape[1] == embeddings2.shape[1])
|
||||
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
|
||||
nrof_thresholds = len(thresholds)
|
||||
k_fold = LFold(n_splits=nrof_folds, shuffle=False)
|
||||
|
||||
tprs = np.zeros((nrof_folds, nrof_thresholds))
|
||||
fprs = np.zeros((nrof_folds, nrof_thresholds))
|
||||
accuracy = np.zeros((nrof_folds))
|
||||
indices = np.arange(nrof_pairs)
|
||||
|
||||
if pca == 0:
|
||||
diff = np.subtract(embeddings1, embeddings2)
|
||||
dist = np.sum(np.square(diff), 1)
|
||||
|
||||
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
|
||||
if pca > 0:
|
||||
print('doing pca on', fold_idx)
|
||||
embed1_train = embeddings1[train_set]
|
||||
embed2_train = embeddings2[train_set]
|
||||
_embed_train = np.concatenate((embed1_train, embed2_train), axis=0)
|
||||
pca_model = PCA(n_components=pca)
|
||||
pca_model.fit(_embed_train)
|
||||
embed1 = pca_model.transform(embeddings1)
|
||||
embed2 = pca_model.transform(embeddings2)
|
||||
embed1 = sklearn.preprocessing.normalize(embed1)
|
||||
embed2 = sklearn.preprocessing.normalize(embed2)
|
||||
diff = np.subtract(embed1, embed2)
|
||||
dist = np.sum(np.square(diff), 1)
|
||||
|
||||
# Find the best threshold for the fold
|
||||
acc_train = np.zeros((nrof_thresholds))
|
||||
for threshold_idx, threshold in enumerate(thresholds):
|
||||
_, _, acc_train[threshold_idx] = calculate_accuracy(
|
||||
threshold, dist[train_set], actual_issame[train_set])
|
||||
best_threshold_index = np.argmax(acc_train)
|
||||
for threshold_idx, threshold in enumerate(thresholds):
|
||||
tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(
|
||||
threshold, dist[test_set],
|
||||
actual_issame[test_set])
|
||||
_, _, accuracy[fold_idx] = calculate_accuracy(
|
||||
thresholds[best_threshold_index], dist[test_set],
|
||||
actual_issame[test_set])
|
||||
|
||||
tpr = np.mean(tprs, 0)
|
||||
fpr = np.mean(fprs, 0)
|
||||
return tpr, fpr, accuracy
|
||||
|
||||
|
||||
def calculate_accuracy(threshold, dist, actual_issame):
|
||||
predict_issame = np.less(dist, threshold)
|
||||
tp = np.sum(np.logical_and(predict_issame, actual_issame))
|
||||
fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
|
||||
tn = np.sum(
|
||||
np.logical_and(np.logical_not(predict_issame),
|
||||
np.logical_not(actual_issame)))
|
||||
fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
|
||||
|
||||
tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
|
||||
fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
|
||||
acc = float(tp + tn) / dist.size
|
||||
return tpr, fpr, acc
|
||||
|
||||
|
||||
def calculate_val(thresholds,
|
||||
embeddings1,
|
||||
embeddings2,
|
||||
actual_issame,
|
||||
far_target,
|
||||
nrof_folds=10):
|
||||
assert (embeddings1.shape[0] == embeddings2.shape[0])
|
||||
assert (embeddings1.shape[1] == embeddings2.shape[1])
|
||||
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
|
||||
nrof_thresholds = len(thresholds)
|
||||
k_fold = LFold(n_splits=nrof_folds, shuffle=False)
|
||||
|
||||
val = np.zeros(nrof_folds)
|
||||
far = np.zeros(nrof_folds)
|
||||
|
||||
diff = np.subtract(embeddings1, embeddings2)
|
||||
dist = np.sum(np.square(diff), 1)
|
||||
indices = np.arange(nrof_pairs)
|
||||
|
||||
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
|
||||
|
||||
# Find the threshold that gives FAR = far_target
|
||||
far_train = np.zeros(nrof_thresholds)
|
||||
for threshold_idx, threshold in enumerate(thresholds):
|
||||
_, far_train[threshold_idx] = calculate_val_far(
|
||||
threshold, dist[train_set], actual_issame[train_set])
|
||||
if np.max(far_train) >= far_target:
|
||||
f = interpolate.interp1d(far_train, thresholds, kind='slinear')
|
||||
threshold = f(far_target)
|
||||
else:
|
||||
threshold = 0.0
|
||||
|
||||
val[fold_idx], far[fold_idx] = calculate_val_far(
|
||||
threshold, dist[test_set], actual_issame[test_set])
|
||||
|
||||
val_mean = np.mean(val)
|
||||
far_mean = np.mean(far)
|
||||
val_std = np.std(val)
|
||||
return val_mean, val_std, far_mean
|
||||
|
||||
|
||||
def calculate_val_far(threshold, dist, actual_issame):
|
||||
predict_issame = np.less(dist, threshold)
|
||||
true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
|
||||
false_accept = np.sum(
|
||||
np.logical_and(predict_issame, np.logical_not(actual_issame)))
|
||||
n_same = np.sum(actual_issame)
|
||||
n_diff = np.sum(np.logical_not(actual_issame))
|
||||
# print(true_accept, false_accept)
|
||||
# print(n_same, n_diff)
|
||||
val = float(true_accept) / float(n_same)
|
||||
far = float(false_accept) / float(n_diff)
|
||||
return val, far
|
||||
|
||||
|
||||
def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0):
|
||||
# Calculate evaluation metrics
|
||||
thresholds = np.arange(0, 4, 0.01)
|
||||
embeddings1 = embeddings[0::2]
|
||||
embeddings2 = embeddings[1::2]
|
||||
tpr, fpr, accuracy = calculate_roc(thresholds,
|
||||
embeddings1,
|
||||
embeddings2,
|
||||
np.asarray(actual_issame),
|
||||
nrof_folds=nrof_folds,
|
||||
pca=pca)
|
||||
thresholds = np.arange(0, 4, 0.001)
|
||||
val, val_std, far = calculate_val(thresholds,
|
||||
embeddings1,
|
||||
embeddings2,
|
||||
np.asarray(actual_issame),
|
||||
1e-3,
|
||||
nrof_folds=nrof_folds)
|
||||
return tpr, fpr, accuracy, val, val_std, far
|
||||
|
||||
@torch.no_grad()
|
||||
def load_bin(path, image_size):
|
||||
try:
|
||||
with open(path, 'rb') as f:
|
||||
bins, issame_list = pickle.load(f) # py2
|
||||
except UnicodeDecodeError as e:
|
||||
with open(path, 'rb') as f:
|
||||
bins, issame_list = pickle.load(f, encoding='bytes') # py3
|
||||
data_list = []
|
||||
for flip in [0, 1]:
|
||||
data = torch.empty((len(issame_list) * 2, 3, image_size[0], image_size[1]))
|
||||
data_list.append(data)
|
||||
for idx in range(len(issame_list) * 2):
|
||||
_bin = bins[idx]
|
||||
img = mx.image.imdecode(_bin)
|
||||
if img.shape[1] != image_size[0]:
|
||||
img = mx.image.resize_short(img, image_size[0])
|
||||
img = nd.transpose(img, axes=(2, 0, 1))
|
||||
for flip in [0, 1]:
|
||||
if flip == 1:
|
||||
img = mx.ndarray.flip(data=img, axis=2)
|
||||
data_list[flip][idx][:] = torch.from_numpy(img.asnumpy())
|
||||
if idx % 1000 == 0:
|
||||
print('loading bin', idx)
|
||||
print(data_list[0].shape)
|
||||
return data_list, issame_list
|
||||
|
||||
@torch.no_grad()
|
||||
def test(data_set, backbone, batch_size, nfolds=10):
|
||||
print('testing verification..')
|
||||
data_list = data_set[0]
|
||||
issame_list = data_set[1]
|
||||
embeddings_list = []
|
||||
time_consumed = 0.0
|
||||
for i in range(len(data_list)):
|
||||
data = data_list[i]
|
||||
embeddings = None
|
||||
ba = 0
|
||||
while ba < data.shape[0]:
|
||||
bb = min(ba + batch_size, data.shape[0])
|
||||
count = bb - ba
|
||||
_data = data[bb - batch_size: bb]
|
||||
time0 = datetime.datetime.now()
|
||||
img = ((_data / 255) - 0.5) / 0.5
|
||||
net_out: torch.Tensor = backbone(img)
|
||||
_embeddings = net_out.detach().cpu().numpy()
|
||||
time_now = datetime.datetime.now()
|
||||
diff = time_now - time0
|
||||
time_consumed += diff.total_seconds()
|
||||
if embeddings is None:
|
||||
embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
|
||||
embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
|
||||
ba = bb
|
||||
embeddings_list.append(embeddings)
|
||||
|
||||
_xnorm = 0.0
|
||||
_xnorm_cnt = 0
|
||||
for embed in embeddings_list:
|
||||
for i in range(embed.shape[0]):
|
||||
_em = embed[i]
|
||||
_norm = np.linalg.norm(_em)
|
||||
_xnorm += _norm
|
||||
_xnorm_cnt += 1
|
||||
_xnorm /= _xnorm_cnt
|
||||
|
||||
embeddings = embeddings_list[0].copy()
|
||||
embeddings = sklearn.preprocessing.normalize(embeddings)
|
||||
acc1 = 0.0
|
||||
std1 = 0.0
|
||||
embeddings = embeddings_list[0] + embeddings_list[1]
|
||||
embeddings = sklearn.preprocessing.normalize(embeddings)
|
||||
print(embeddings.shape)
|
||||
print('infer time', time_consumed)
|
||||
_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
|
||||
acc2, std2 = np.mean(accuracy), np.std(accuracy)
|
||||
return acc1, std1, acc2, std2, _xnorm, embeddings_list
|
||||
|
||||
|
||||
def dumpR(data_set,
|
||||
backbone,
|
||||
batch_size,
|
||||
name='',
|
||||
data_extra=None,
|
||||
label_shape=None):
|
||||
print('dump verification embedding..')
|
||||
data_list = data_set[0]
|
||||
issame_list = data_set[1]
|
||||
embeddings_list = []
|
||||
time_consumed = 0.0
|
||||
for i in range(len(data_list)):
|
||||
data = data_list[i]
|
||||
embeddings = None
|
||||
ba = 0
|
||||
while ba < data.shape[0]:
|
||||
bb = min(ba + batch_size, data.shape[0])
|
||||
count = bb - ba
|
||||
|
||||
_data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb)
|
||||
time0 = datetime.datetime.now()
|
||||
if data_extra is None:
|
||||
db = mx.io.DataBatch(data=(_data,), label=(_label,))
|
||||
else:
|
||||
db = mx.io.DataBatch(data=(_data, _data_extra),
|
||||
label=(_label,))
|
||||
model.forward(db, is_train=False)
|
||||
net_out = model.get_outputs()
|
||||
_embeddings = net_out[0].asnumpy()
|
||||
time_now = datetime.datetime.now()
|
||||
diff = time_now - time0
|
||||
time_consumed += diff.total_seconds()
|
||||
if embeddings is None:
|
||||
embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
|
||||
embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
|
||||
ba = bb
|
||||
embeddings_list.append(embeddings)
|
||||
embeddings = embeddings_list[0] + embeddings_list[1]
|
||||
embeddings = sklearn.preprocessing.normalize(embeddings)
|
||||
actual_issame = np.asarray(issame_list)
|
||||
outname = os.path.join('temp.bin')
|
||||
with open(outname, 'wb') as f:
|
||||
pickle.dump((embeddings, issame_list),
|
||||
f,
|
||||
protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
#
|
||||
# parser = argparse.ArgumentParser(description='do verification')
|
||||
# # general
|
||||
# parser.add_argument('--data-dir', default='', help='')
|
||||
# parser.add_argument('--model',
|
||||
# default='../model/softmax,50',
|
||||
# help='path to load model.')
|
||||
# parser.add_argument('--target',
|
||||
# default='lfw,cfp_ff,cfp_fp,agedb_30',
|
||||
# help='test targets.')
|
||||
# parser.add_argument('--gpu', default=0, type=int, help='gpu id')
|
||||
# parser.add_argument('--batch-size', default=32, type=int, help='')
|
||||
# parser.add_argument('--max', default='', type=str, help='')
|
||||
# parser.add_argument('--mode', default=0, type=int, help='')
|
||||
# parser.add_argument('--nfolds', default=10, type=int, help='')
|
||||
# args = parser.parse_args()
|
||||
# image_size = [112, 112]
|
||||
# print('image_size', image_size)
|
||||
# ctx = mx.gpu(args.gpu)
|
||||
# nets = []
|
||||
# vec = args.model.split(',')
|
||||
# prefix = args.model.split(',')[0]
|
||||
# epochs = []
|
||||
# if len(vec) == 1:
|
||||
# pdir = os.path.dirname(prefix)
|
||||
# for fname in os.listdir(pdir):
|
||||
# if not fname.endswith('.params'):
|
||||
# continue
|
||||
# _file = os.path.join(pdir, fname)
|
||||
# if _file.startswith(prefix):
|
||||
# epoch = int(fname.split('.')[0].split('-')[1])
|
||||
# epochs.append(epoch)
|
||||
# epochs = sorted(epochs, reverse=True)
|
||||
# if len(args.max) > 0:
|
||||
# _max = [int(x) for x in args.max.split(',')]
|
||||
# assert len(_max) == 2
|
||||
# if len(epochs) > _max[1]:
|
||||
# epochs = epochs[_max[0]:_max[1]]
|
||||
#
|
||||
# else:
|
||||
# epochs = [int(x) for x in vec[1].split('|')]
|
||||
# print('model number', len(epochs))
|
||||
# time0 = datetime.datetime.now()
|
||||
# for epoch in epochs:
|
||||
# print('loading', prefix, epoch)
|
||||
# sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
||||
# # arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
|
||||
# all_layers = sym.get_internals()
|
||||
# sym = all_layers['fc1_output']
|
||||
# model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
|
||||
# # model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
|
||||
# model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0],
|
||||
# image_size[1]))])
|
||||
# model.set_params(arg_params, aux_params)
|
||||
# nets.append(model)
|
||||
# time_now = datetime.datetime.now()
|
||||
# diff = time_now - time0
|
||||
# print('model loading time', diff.total_seconds())
|
||||
#
|
||||
# ver_list = []
|
||||
# ver_name_list = []
|
||||
# for name in args.target.split(','):
|
||||
# path = os.path.join(args.data_dir, name + ".bin")
|
||||
# if os.path.exists(path):
|
||||
# print('loading.. ', name)
|
||||
# data_set = load_bin(path, image_size)
|
||||
# ver_list.append(data_set)
|
||||
# ver_name_list.append(name)
|
||||
#
|
||||
# if args.mode == 0:
|
||||
# for i in range(len(ver_list)):
|
||||
# results = []
|
||||
# for model in nets:
|
||||
# acc1, std1, acc2, std2, xnorm, embeddings_list = test(
|
||||
# ver_list[i], model, args.batch_size, args.nfolds)
|
||||
# print('[%s]XNorm: %f' % (ver_name_list[i], xnorm))
|
||||
# print('[%s]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], acc1, std1))
|
||||
# print('[%s]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], acc2, std2))
|
||||
# results.append(acc2)
|
||||
# print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results)))
|
||||
# elif args.mode == 1:
|
||||
# raise ValueError
|
||||
# else:
|
||||
# model = nets[0]
|
||||
# dumpR(ver_list[0], model, args.batch_size, args.target)
|
483
eval_ijbc.py
Normal file
@ -0,0 +1,483 @@
|
||||
# coding: utf-8
|
||||
|
||||
import os
|
||||
import pickle
|
||||
|
||||
import matplotlib
|
||||
import pandas as pd
|
||||
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
import timeit
|
||||
import sklearn
|
||||
import argparse
|
||||
from sklearn.metrics import roc_curve, auc
|
||||
|
||||
from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap
|
||||
from prettytable import PrettyTable
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
sys.path.insert(0, "../")
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
parser = argparse.ArgumentParser(description='do ijb test')
|
||||
# general
|
||||
parser.add_argument('--model-prefix', default='', help='path to load model.')
|
||||
parser.add_argument('--image-path', default='', type=str, help='')
|
||||
parser.add_argument('--result-dir', default='.', type=str, help='')
|
||||
parser.add_argument('--batch-size', default=128, type=int, help='')
|
||||
parser.add_argument('--network', default='iresnet50', type=str, help='')
|
||||
parser.add_argument('--job', default='insightface', type=str, help='job name')
|
||||
parser.add_argument('--target', default='IJBC', type=str, help='target, set to IJBC or IJBB')
|
||||
args = parser.parse_args()
|
||||
|
||||
target = args.target
|
||||
model_path = args.model_prefix
|
||||
image_path = args.image_path
|
||||
result_dir = args.result_dir
|
||||
gpu_id = None
|
||||
use_norm_score = True # if Ture, TestMode(N1)
|
||||
use_detector_score = True # if Ture, TestMode(D1)
|
||||
use_flip_test = True # if Ture, TestMode(F1)
|
||||
job = args.job
|
||||
batch_size = args.batch_size
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
from skimage import transform as trans
|
||||
import backbones
|
||||
|
||||
|
||||
class Embedding(object):
|
||||
def __init__(self, prefix, data_shape, batch_size=1):
|
||||
image_size = (112, 112)
|
||||
self.image_size = image_size
|
||||
weight = torch.load(prefix)
|
||||
resnet = eval("backbones.{}".format(args.network))(False).cuda()
|
||||
resnet.load_state_dict(weight)
|
||||
model = torch.nn.DataParallel(resnet)
|
||||
self.model = model
|
||||
self.model.eval()
|
||||
src = np.array([
|
||||
[30.2946, 51.6963],
|
||||
[65.5318, 51.5014],
|
||||
[48.0252, 71.7366],
|
||||
[33.5493, 92.3655],
|
||||
[62.7299, 92.2041]], dtype=np.float32)
|
||||
src[:, 0] += 8.0
|
||||
self.src = src
|
||||
self.batch_size = batch_size
|
||||
self.data_shape = data_shape
|
||||
|
||||
def get(self, rimg, landmark):
|
||||
|
||||
assert landmark.shape[0] == 68 or landmark.shape[0] == 5
|
||||
assert landmark.shape[1] == 2
|
||||
if landmark.shape[0] == 68:
|
||||
landmark5 = np.zeros((5, 2), dtype=np.float32)
|
||||
landmark5[0] = (landmark[36] + landmark[39]) / 2
|
||||
landmark5[1] = (landmark[42] + landmark[45]) / 2
|
||||
landmark5[2] = landmark[30]
|
||||
landmark5[3] = landmark[48]
|
||||
landmark5[4] = landmark[54]
|
||||
else:
|
||||
landmark5 = landmark
|
||||
tform = trans.SimilarityTransform()
|
||||
tform.estimate(landmark5, self.src)
|
||||
M = tform.params[0:2, :]
|
||||
img = cv2.warpAffine(rimg,
|
||||
M, (self.image_size[1], self.image_size[0]),
|
||||
borderValue=0.0)
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
img_flip = np.fliplr(img)
|
||||
img = np.transpose(img, (2, 0, 1)) # 3*112*112, RGB
|
||||
img_flip = np.transpose(img_flip, (2, 0, 1))
|
||||
input_blob = np.zeros((2, 3, self.image_size[1], self.image_size[0]), dtype=np.uint8)
|
||||
input_blob[0] = img
|
||||
input_blob[1] = img_flip
|
||||
return input_blob
|
||||
|
||||
@torch.no_grad()
|
||||
def forward_db(self, batch_data):
|
||||
imgs = torch.Tensor(batch_data).cuda()
|
||||
imgs.div_(255).sub_(0.5).div_(0.5)
|
||||
feat = self.model(imgs)
|
||||
feat = feat.reshape([self.batch_size, 2 * feat.shape[1]])
|
||||
return feat.cpu().numpy()
|
||||
|
||||
|
||||
# 将一个list尽量均分成n份,限制len(list)==n,份数大于原list内元素个数则分配空list[]
|
||||
def divideIntoNstrand(listTemp, n):
|
||||
twoList = [[] for i in range(n)]
|
||||
for i, e in enumerate(listTemp):
|
||||
twoList[i % n].append(e)
|
||||
return twoList
|
||||
|
||||
|
||||
def read_template_media_list(path):
|
||||
# ijb_meta = np.loadtxt(path, dtype=str)
|
||||
ijb_meta = pd.read_csv(path, sep=' ', header=None).values
|
||||
templates = ijb_meta[:, 1].astype(np.int)
|
||||
medias = ijb_meta[:, 2].astype(np.int)
|
||||
return templates, medias
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def read_template_pair_list(path):
|
||||
# pairs = np.loadtxt(path, dtype=str)
|
||||
pairs = pd.read_csv(path, sep=' ', header=None).values
|
||||
# print(pairs.shape)
|
||||
# print(pairs[:, 0].astype(np.int))
|
||||
t1 = pairs[:, 0].astype(np.int)
|
||||
t2 = pairs[:, 1].astype(np.int)
|
||||
label = pairs[:, 2].astype(np.int)
|
||||
return t1, t2, label
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def read_image_feature(path):
|
||||
with open(path, 'rb') as fid:
|
||||
img_feats = pickle.load(fid)
|
||||
return img_feats
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def get_image_feature(img_path, files_list, model_path, epoch, gpu_id):
|
||||
batch_size = args.batch_size
|
||||
data_shape = (3, 112, 112)
|
||||
|
||||
files = files_list
|
||||
print('files:', len(files))
|
||||
rare_size = len(files) % batch_size
|
||||
faceness_scores = []
|
||||
batch = 0
|
||||
img_feats = np.empty((len(files), 1024), dtype=np.float32)
|
||||
|
||||
batch_data = np.empty((2 * batch_size, 3, 112, 112))
|
||||
embedding = Embedding(model_path, data_shape, batch_size)
|
||||
for img_index, each_line in enumerate(files[:len(files) - rare_size]):
|
||||
name_lmk_score = each_line.strip().split(' ')
|
||||
img_name = os.path.join(img_path, name_lmk_score[0])
|
||||
img = cv2.imread(img_name)
|
||||
lmk = np.array([float(x) for x in name_lmk_score[1:-1]],
|
||||
dtype=np.float32)
|
||||
lmk = lmk.reshape((5, 2))
|
||||
input_blob = embedding.get(img, lmk)
|
||||
|
||||
batch_data[2 * (img_index - batch * batch_size)][:] = input_blob[0]
|
||||
batch_data[2 * (img_index - batch * batch_size) + 1][:] = input_blob[1]
|
||||
if (img_index + 1) % batch_size == 0:
|
||||
print('batch', batch)
|
||||
img_feats[batch * batch_size:batch * batch_size +
|
||||
batch_size][:] = embedding.forward_db(batch_data)
|
||||
batch += 1
|
||||
faceness_scores.append(name_lmk_score[-1])
|
||||
|
||||
batch_data = np.empty((2 * rare_size, 3, 112, 112))
|
||||
embedding = Embedding(model_path, data_shape, rare_size)
|
||||
for img_index, each_line in enumerate(files[len(files) - rare_size:]):
|
||||
name_lmk_score = each_line.strip().split(' ')
|
||||
img_name = os.path.join(img_path, name_lmk_score[0])
|
||||
img = cv2.imread(img_name)
|
||||
lmk = np.array([float(x) for x in name_lmk_score[1:-1]],
|
||||
dtype=np.float32)
|
||||
lmk = lmk.reshape((5, 2))
|
||||
input_blob = embedding.get(img, lmk)
|
||||
batch_data[2 * img_index][:] = input_blob[0]
|
||||
batch_data[2 * img_index + 1][:] = input_blob[1]
|
||||
if (img_index + 1) % rare_size == 0:
|
||||
print('batch', batch)
|
||||
img_feats[len(files) -
|
||||
rare_size:][:] = embedding.forward_db(batch_data)
|
||||
batch += 1
|
||||
faceness_scores.append(name_lmk_score[-1])
|
||||
faceness_scores = np.array(faceness_scores).astype(np.float32)
|
||||
# img_feats = np.ones( (len(files), 1024), dtype=np.float32) * 0.01
|
||||
# faceness_scores = np.ones( (len(files), ), dtype=np.float32 )
|
||||
return img_feats, faceness_scores
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def image2template_feature(img_feats=None, templates=None, medias=None):
|
||||
# ==========================================================
|
||||
# 1. face image feature l2 normalization. img_feats:[number_image x feats_dim]
|
||||
# 2. compute media feature.
|
||||
# 3. compute template feature.
|
||||
# ==========================================================
|
||||
unique_templates = np.unique(templates)
|
||||
template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))
|
||||
|
||||
for count_template, uqt in enumerate(unique_templates):
|
||||
|
||||
(ind_t,) = np.where(templates == uqt)
|
||||
face_norm_feats = img_feats[ind_t]
|
||||
face_medias = medias[ind_t]
|
||||
unique_medias, unique_media_counts = np.unique(face_medias,
|
||||
return_counts=True)
|
||||
media_norm_feats = []
|
||||
for u, ct in zip(unique_medias, unique_media_counts):
|
||||
(ind_m,) = np.where(face_medias == u)
|
||||
if ct == 1:
|
||||
media_norm_feats += [face_norm_feats[ind_m]]
|
||||
else: # image features from the same video will be aggregated into one feature
|
||||
media_norm_feats += [
|
||||
np.mean(face_norm_feats[ind_m], axis=0, keepdims=True)
|
||||
]
|
||||
media_norm_feats = np.array(media_norm_feats)
|
||||
# media_norm_feats = media_norm_feats / np.sqrt(np.sum(media_norm_feats ** 2, -1, keepdims=True))
|
||||
template_feats[count_template] = np.sum(media_norm_feats, axis=0)
|
||||
if count_template % 2000 == 0:
|
||||
print('Finish Calculating {} template features.'.format(
|
||||
count_template))
|
||||
# template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True))
|
||||
template_norm_feats = sklearn.preprocessing.normalize(template_feats)
|
||||
# print(template_norm_feats.shape)
|
||||
return template_norm_feats, unique_templates
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def verification(template_norm_feats=None,
|
||||
unique_templates=None,
|
||||
p1=None,
|
||||
p2=None):
|
||||
# ==========================================================
|
||||
# Compute set-to-set Similarity Score.
|
||||
# ==========================================================
|
||||
template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
|
||||
for count_template, uqt in enumerate(unique_templates):
|
||||
template2id[uqt] = count_template
|
||||
|
||||
score = np.zeros((len(p1),)) # save cosine distance between pairs
|
||||
|
||||
total_pairs = np.array(range(len(p1)))
|
||||
batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation
|
||||
sublists = [
|
||||
total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)
|
||||
]
|
||||
total_sublists = len(sublists)
|
||||
for c, s in enumerate(sublists):
|
||||
feat1 = template_norm_feats[template2id[p1[s]]]
|
||||
feat2 = template_norm_feats[template2id[p2[s]]]
|
||||
similarity_score = np.sum(feat1 * feat2, -1)
|
||||
score[s] = similarity_score.flatten()
|
||||
if c % 10 == 0:
|
||||
print('Finish {}/{} pairs.'.format(c, total_sublists))
|
||||
return score
|
||||
|
||||
|
||||
# In[ ]:
|
||||
def verification2(template_norm_feats=None,
|
||||
unique_templates=None,
|
||||
p1=None,
|
||||
p2=None):
|
||||
template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
|
||||
for count_template, uqt in enumerate(unique_templates):
|
||||
template2id[uqt] = count_template
|
||||
score = np.zeros((len(p1),)) # save cosine distance between pairs
|
||||
total_pairs = np.array(range(len(p1)))
|
||||
batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation
|
||||
sublists = [
|
||||
total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)
|
||||
]
|
||||
total_sublists = len(sublists)
|
||||
for c, s in enumerate(sublists):
|
||||
feat1 = template_norm_feats[template2id[p1[s]]]
|
||||
feat2 = template_norm_feats[template2id[p2[s]]]
|
||||
similarity_score = np.sum(feat1 * feat2, -1)
|
||||
score[s] = similarity_score.flatten()
|
||||
if c % 10 == 0:
|
||||
print('Finish {}/{} pairs.'.format(c, total_sublists))
|
||||
return score
|
||||
|
||||
|
||||
def read_score(path):
|
||||
with open(path, 'rb') as fid:
|
||||
img_feats = pickle.load(fid)
|
||||
return img_feats
|
||||
|
||||
|
||||
# # Step1: Load Meta Data
|
||||
|
||||
# In[ ]:
|
||||
|
||||
assert target == 'IJBC' or target == 'IJBB'
|
||||
|
||||
# =============================================================
|
||||
# load image and template relationships for template feature embedding
|
||||
# tid --> template id, mid --> media id
|
||||
# format:
|
||||
# image_name tid mid
|
||||
# =============================================================
|
||||
start = timeit.default_timer()
|
||||
templates, medias = read_template_media_list(
|
||||
os.path.join('%s/meta' % image_path,
|
||||
'%s_face_tid_mid.txt' % target.lower()))
|
||||
stop = timeit.default_timer()
|
||||
print('Time: %.2f s. ' % (stop - start))
|
||||
|
||||
# In[ ]:
|
||||
|
||||
# =============================================================
|
||||
# load template pairs for template-to-template verification
|
||||
# tid : template id, label : 1/0
|
||||
# format:
|
||||
# tid_1 tid_2 label
|
||||
# =============================================================
|
||||
start = timeit.default_timer()
|
||||
p1, p2, label = read_template_pair_list(
|
||||
os.path.join('%s/meta' % image_path,
|
||||
'%s_template_pair_label.txt' % target.lower()))
|
||||
stop = timeit.default_timer()
|
||||
print('Time: %.2f s. ' % (stop - start))
|
||||
|
||||
# # Step 2: Get Image Features
|
||||
|
||||
# In[ ]:
|
||||
|
||||
# =============================================================
|
||||
# load image features
|
||||
# format:
|
||||
# img_feats: [image_num x feats_dim] (227630, 512)
|
||||
# =============================================================
|
||||
start = timeit.default_timer()
|
||||
img_path = '%s/loose_crop' % image_path
|
||||
img_list_path = '%s/meta/%s_name_5pts_score.txt' % (image_path, target.lower())
|
||||
img_list = open(img_list_path)
|
||||
files = img_list.readlines()
|
||||
# files_list = divideIntoNstrand(files, rank_size)
|
||||
files_list = files
|
||||
|
||||
# img_feats
|
||||
# for i in range(rank_size):
|
||||
img_feats, faceness_scores = get_image_feature(img_path, files_list,
|
||||
model_path, 0, gpu_id)
|
||||
stop = timeit.default_timer()
|
||||
print('Time: %.2f s. ' % (stop - start))
|
||||
print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0],
|
||||
img_feats.shape[1]))
|
||||
|
||||
# # Step3: Get Template Features
|
||||
|
||||
# In[ ]:
|
||||
|
||||
# =============================================================
|
||||
# compute template features from image features.
|
||||
# =============================================================
|
||||
start = timeit.default_timer()
|
||||
# ==========================================================
|
||||
# Norm feature before aggregation into template feature?
|
||||
# Feature norm from embedding network and faceness score are able to decrease weights for noise samples (not face).
|
||||
# ==========================================================
|
||||
# 1. FaceScore (Feature Norm)
|
||||
# 2. FaceScore (Detector)
|
||||
|
||||
if use_flip_test:
|
||||
# concat --- F1
|
||||
# img_input_feats = img_feats
|
||||
# add --- F2
|
||||
img_input_feats = img_feats[:, 0:img_feats.shape[1] //
|
||||
2] + img_feats[:, img_feats.shape[1] // 2:]
|
||||
else:
|
||||
img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2]
|
||||
|
||||
if use_norm_score:
|
||||
img_input_feats = img_input_feats
|
||||
else:
|
||||
# normalise features to remove norm information
|
||||
img_input_feats = img_input_feats / np.sqrt(
|
||||
np.sum(img_input_feats ** 2, -1, keepdims=True))
|
||||
|
||||
if use_detector_score:
|
||||
print(img_input_feats.shape, faceness_scores.shape)
|
||||
img_input_feats = img_input_feats * faceness_scores[:, np.newaxis]
|
||||
else:
|
||||
img_input_feats = img_input_feats
|
||||
|
||||
template_norm_feats, unique_templates = image2template_feature(
|
||||
img_input_feats, templates, medias)
|
||||
stop = timeit.default_timer()
|
||||
print('Time: %.2f s. ' % (stop - start))
|
||||
|
||||
# # Step 4: Get Template Similarity Scores
|
||||
|
||||
# In[ ]:
|
||||
|
||||
# =============================================================
|
||||
# compute verification scores between template pairs.
|
||||
# =============================================================
|
||||
start = timeit.default_timer()
|
||||
score = verification(template_norm_feats, unique_templates, p1, p2)
|
||||
stop = timeit.default_timer()
|
||||
print('Time: %.2f s. ' % (stop - start))
|
||||
|
||||
# In[ ]:
|
||||
save_path = os.path.join(result_dir, args.job)
|
||||
# save_path = result_dir + '/%s_result' % target
|
||||
|
||||
if not os.path.exists(save_path):
|
||||
os.makedirs(save_path)
|
||||
|
||||
score_save_file = os.path.join(save_path, "%s.npy" % target.lower())
|
||||
np.save(score_save_file, score)
|
||||
|
||||
# # Step 5: Get ROC Curves and TPR@FPR Table
|
||||
|
||||
# In[ ]:
|
||||
|
||||
files = [score_save_file]
|
||||
methods = []
|
||||
scores = []
|
||||
for file in files:
|
||||
methods.append(Path(file).stem)
|
||||
scores.append(np.load(file))
|
||||
|
||||
methods = np.array(methods)
|
||||
scores = dict(zip(methods, scores))
|
||||
colours = dict(
|
||||
zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))
|
||||
x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1]
|
||||
tpr_fpr_table = PrettyTable(['Methods'] + [str(x) for x in x_labels])
|
||||
fig = plt.figure()
|
||||
for method in methods:
|
||||
fpr, tpr, _ = roc_curve(label, scores[method])
|
||||
roc_auc = auc(fpr, tpr)
|
||||
fpr = np.flipud(fpr)
|
||||
tpr = np.flipud(tpr) # select largest tpr at same fpr
|
||||
plt.plot(fpr,
|
||||
tpr,
|
||||
color=colours[method],
|
||||
lw=1,
|
||||
label=('[%s (AUC = %0.4f %%)]' %
|
||||
(method.split('-')[-1], roc_auc * 100)))
|
||||
tpr_fpr_row = []
|
||||
tpr_fpr_row.append("%s-%s" % (method, target))
|
||||
for fpr_iter in np.arange(len(x_labels)):
|
||||
_, min_index = min(
|
||||
list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr)))))
|
||||
tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100))
|
||||
tpr_fpr_table.add_row(tpr_fpr_row)
|
||||
plt.xlim([10 ** -6, 0.1])
|
||||
plt.ylim([0.3, 1.0])
|
||||
plt.grid(linestyle='--', linewidth=1)
|
||||
plt.xticks(x_labels)
|
||||
plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True))
|
||||
plt.xscale('log')
|
||||
plt.xlabel('False Positive Rate')
|
||||
plt.ylabel('True Positive Rate')
|
||||
plt.title('ROC on IJB')
|
||||
plt.legend(loc="lower right")
|
||||
fig.savefig(os.path.join(save_path, '%s.pdf' % target.lower()))
|
||||
print(tpr_fpr_table)
|
377
face_api.py
Normal file
@ -0,0 +1,377 @@
|
||||
import os
|
||||
import time
|
||||
import re
|
||||
import torch
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from anti import anti_spoofing, load_anti_model
|
||||
from backbones import iresnet50, iresnet18, iresnet100
|
||||
from retinaface_detect import load_retinaface_model, detect_one, detect_video, set_retinaface_conf
|
||||
from torch2trt import torch2trt, TRTModule
|
||||
|
||||
threshold = 0.7
|
||||
|
||||
|
||||
# 读取112x112的本地图片并变换通道位置归一化
|
||||
def load_image(img_path):
|
||||
img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||
img = img.transpose((2, 0, 1))
|
||||
img = img[np.newaxis, :, :, :]
|
||||
img = np.array(img, dtype=np.float32)
|
||||
img -= 127.5
|
||||
img /= 127.5
|
||||
return img
|
||||
|
||||
|
||||
# 计算两个特征向量的欧式距离
|
||||
def findEuclideanDistance(source_representation, test_representation):
|
||||
euclidean_distance = source_representation - test_representation
|
||||
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
|
||||
euclidean_distance = np.sqrt(euclidean_distance)
|
||||
return euclidean_distance
|
||||
|
||||
|
||||
# 计算两个特征向量的余弦距离
|
||||
def findCosineDistance(source_representation, test_representation):
|
||||
a = np.matmul(np.transpose(source_representation), test_representation)
|
||||
b = np.sum(np.multiply(source_representation, source_representation))
|
||||
c = np.sum(np.multiply(test_representation, test_representation))
|
||||
return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
|
||||
|
||||
|
||||
# 归一化欧氏距离
|
||||
def l2_normalize(x):
|
||||
return x / np.sqrt(np.sum(np.multiply(x, x)))
|
||||
|
||||
|
||||
# 归一化余弦距离
|
||||
def cosin_metric(x1, x2):
|
||||
return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))
|
||||
|
||||
|
||||
# 加载保存的姓名、人脸特征向量的人脸库
|
||||
def load_npy(path):
|
||||
data = np.load(path, allow_pickle=True)
|
||||
data = data.item()
|
||||
return data
|
||||
|
||||
|
||||
# 批量化生成人脸特征向量并保存到人脸库
|
||||
def create_database_batch(path, model, database_path):
|
||||
name_list = os.listdir(path)
|
||||
k_v = {}
|
||||
if os.path.exists(database_path):
|
||||
k_v = np.load(database_path, allow_pickle=True)
|
||||
k_v = k_v.item()
|
||||
batch = 256
|
||||
order_name = []
|
||||
order_path = []
|
||||
emb_list = []
|
||||
for name in name_list[:]:
|
||||
img_path = os.path.join(path, name)
|
||||
# for img_name in img_path[:1]:
|
||||
order_name.append(name[:-4])
|
||||
order_path.append(img_path)
|
||||
order_img = np.zeros((len(order_path), 3, 112, 112), dtype=np.float32)
|
||||
for index, img_path in enumerate(order_path):
|
||||
order_img[index] = load_image(img_path)
|
||||
print(order_img.shape)
|
||||
order_img = torch.from_numpy(order_img)
|
||||
order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||
now = 0
|
||||
number = len(order_img)
|
||||
with torch.no_grad():
|
||||
while now < number:
|
||||
if now + batch < number:
|
||||
emb = model(order_img[now:now + batch])
|
||||
else:
|
||||
emb = model(order_img[now:])
|
||||
now = now + batch
|
||||
emb = emb.cpu().numpy()
|
||||
for em in emb:
|
||||
emb_list.append(em)
|
||||
print("batch" + str(now))
|
||||
|
||||
for i, emb in enumerate(emb_list):
|
||||
k_v[order_name[i]] = l2_normalize(emb)
|
||||
np.save(database_path, k_v)
|
||||
|
||||
def create_database_from_img(order_name, order_img, model, database_path, cpu_or_cuda):
|
||||
k_v = {}
|
||||
if os.path.exists(database_path):
|
||||
k_v = np.load(database_path, allow_pickle=True)
|
||||
k_v = k_v.item()
|
||||
batch = 256
|
||||
emb_list = []
|
||||
|
||||
print(order_img.shape)
|
||||
order_img = torch.from_numpy(order_img)
|
||||
order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||
now = 0
|
||||
number = len(order_img)
|
||||
with torch.no_grad():
|
||||
while now < number:
|
||||
if now + batch < number:
|
||||
emb = model(order_img[now:now + batch])
|
||||
else:
|
||||
emb = model(order_img[now:])
|
||||
now = now + batch
|
||||
emb = emb.cpu().numpy()
|
||||
for em in emb:
|
||||
emb_list.append(em)
|
||||
print("batch" + str(now))
|
||||
for i, emb in enumerate(emb_list):
|
||||
k_v[order_name[i]] = l2_normalize(emb)
|
||||
np.save(database_path, k_v)
|
||||
|
||||
# 向人脸库中新增一个人的姓名和人脸特征向量,若人脸库不存在则创建
|
||||
def add_one_to_database(img, model, name, database_path, cpu_or_cuda):
|
||||
img = torch.from_numpy(img)
|
||||
img = img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||
with torch.no_grad():
|
||||
pred = model(img)
|
||||
pred = pred.cpu().numpy()
|
||||
k_v = {}
|
||||
if os.path.exists(database_path):
|
||||
k_v = np.load(database_path, allow_pickle=True)
|
||||
k_v = k_v.item()
|
||||
k_v[name] = l2_normalize(pred)
|
||||
np.save(database_path, k_v)
|
||||
|
||||
|
||||
# 计算此特征向量与人脸库中的哪个人脸特征向量距离最近
|
||||
def findmindistance(pred, threshold, k_v):
|
||||
distance = 10
|
||||
most_like = ""
|
||||
for name in k_v.keys():
|
||||
tmp = findEuclideanDistance(k_v[name], pred)
|
||||
if distance > tmp:
|
||||
distance = tmp
|
||||
most_like = name
|
||||
if distance < threshold:
|
||||
return most_like, distance
|
||||
else:
|
||||
return -1, distance
|
||||
|
||||
|
||||
def faiss_find_face(pred, index, database_name_list):
|
||||
name_list = []
|
||||
start_time = time.time()
|
||||
D, I = index.search(pred, 1)
|
||||
end_time = time.time()
|
||||
# print("faiss cost %fs" % (end_time - start_time))
|
||||
# print(D, I)
|
||||
if len(pred) == 1:
|
||||
if D[0][0] < threshold:
|
||||
# print(database_name_list[I[0][0]])
|
||||
return database_name_list[I[0][0]], D[0][0]
|
||||
else:
|
||||
return "unknown", D[0][0]
|
||||
else:
|
||||
for i,index in enumerate(I):
|
||||
if D[i][0] < threshold:
|
||||
#print(database_name_list[I[0][0]])
|
||||
name_list.append(database_name_list[index[0]]+str(D[i][0]))
|
||||
else:
|
||||
name_list.append("unknown"+str(D[i][0]))
|
||||
return name_list
|
||||
|
||||
|
||||
# 从人脸库中找到单个人脸
|
||||
def findOne(img, model, index, database_name_list, cpu_or_cuda):
|
||||
img = torch.from_numpy(img)
|
||||
img = img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||
with torch.no_grad():
|
||||
start_time = time.time()
|
||||
pred = model(img)
|
||||
end_time = time.time()
|
||||
print("predOne time: " + str(end_time - start_time))
|
||||
pred = pred.cpu().numpy()
|
||||
# start_time = time.time()
|
||||
# name, distance = findmindistance(l2_normalize(pred), threshold=threshold, k_v=k_v)
|
||||
# end_time = time.time()
|
||||
# print("baoli time: " + str(end_time - start_time))
|
||||
name, distance = faiss_find_face(l2_normalize(pred), index, database_name_list)
|
||||
print(pred.shape)
|
||||
if name != -1:
|
||||
mo = r'[\u4e00-\u9fa5_a-zA-Z0-9]*'
|
||||
name = re.match(mo, name)
|
||||
return name.group(0), distance
|
||||
else:
|
||||
return "unknown", distance
|
||||
|
||||
|
||||
# 从人脸库中找到传入的人脸列表中的所有人脸
|
||||
def findAll(imglist, model, index ,database_name_list, cpu_or_cuda):
|
||||
imglist = torch.from_numpy(imglist)
|
||||
imglist = imglist.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||
with torch.no_grad():
|
||||
name_list =[]
|
||||
start_time = time.time()
|
||||
pred = model(imglist)
|
||||
end_time = time.time()
|
||||
print("predOne time: " + str(end_time - start_time))
|
||||
pred = pred.cpu().numpy()
|
||||
start_time = time.time()
|
||||
#name_list = faiss_find_face(l2_normalize(pred), index, database_name_list)
|
||||
for pr in pred:
|
||||
pr = np.expand_dims(l2_normalize(pr), 0)
|
||||
# #print(pr.shape)
|
||||
name, distance = faiss_find_face(l2_normalize(pr), index, database_name_list)
|
||||
#name_list.append(name+" "+str(distance))
|
||||
name_list.append(name)
|
||||
# for pr in pred:
|
||||
# name, distance = findmindistance(l2_normalize(pr), threshold=threshold, k_v=k_v)
|
||||
# if name != -1:
|
||||
# mo = r'[\u4e00-\u9fa5_a-zA-Z]*'
|
||||
# name = re.match(mo, name)
|
||||
# name_list.append(name.group(0) + str(distance))
|
||||
# else:
|
||||
# name_list.append("unknown" + str(distance))
|
||||
end_time = time.time()
|
||||
print("searchALL time: " + str(end_time - start_time))
|
||||
return name_list
|
||||
|
||||
|
||||
# 提取为512维特征向量
|
||||
def embedding(order_img, model, cpu_or_cuda):
|
||||
number = len(order_img)
|
||||
order_img = torch.from_numpy(order_img)
|
||||
order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||
batch = 64
|
||||
emb_list = []
|
||||
now = 0
|
||||
with torch.no_grad():
|
||||
while now < number:
|
||||
if now + batch < number:
|
||||
emb = model(order_img[now:now + batch])
|
||||
else:
|
||||
emb = model(order_img[now:])
|
||||
now = now + batch
|
||||
emb = emb.cpu().numpy()
|
||||
for em in emb:
|
||||
emb_list.append(l2_normalize(em))
|
||||
# print("batch" + str(now))
|
||||
emb_list = np.array(emb_list)
|
||||
return emb_list
|
||||
|
||||
|
||||
# 处理聚类人脸文件夹,返回特征向量列表,文件名列表
|
||||
def get_claster_tmp_file_embedding(file_path, retinaface_model, retinaface_args, arcface_model, cpu_or_cuda):
|
||||
img_name = os.listdir(file_path)
|
||||
img_list = []
|
||||
for name in img_name:
|
||||
all_face, box_and_point = detect_one(os.path.join(file_path, name), retinaface_model, retinaface_args)
|
||||
img_list.append(all_face[0])
|
||||
img_list = np.array(img_list)
|
||||
# print(img_list.shape)
|
||||
emb_list = embedding(img_list, arcface_model, cpu_or_cuda)
|
||||
return emb_list, img_name
|
||||
|
||||
|
||||
# 同一个人聚为一类
|
||||
def cluster(emb_list, name_list):
|
||||
all_claster = []
|
||||
cla = []
|
||||
in_claster_name = []
|
||||
img_number = len(emb_list)
|
||||
for index, emb in enumerate(emb_list):
|
||||
if name_list[index] in in_claster_name:
|
||||
continue
|
||||
for j in range(img_number - index - 1):
|
||||
if findEuclideanDistance(emb, emb_list[index + 1 + j]) < threshold:
|
||||
if name_list[index + 1 + j] not in in_claster_name:
|
||||
cla.append(name_list[index + 1 + j])
|
||||
in_claster_name.append(name_list[index + 1 + j])
|
||||
cla.append(name_list[index])
|
||||
in_claster_name.append(name_list[index])
|
||||
all_claster.append(cla)
|
||||
cla = []
|
||||
return all_claster
|
||||
|
||||
|
||||
# 加载人脸识别模型
|
||||
def load_arcface_model(model_path, cpu_or_cuda):
|
||||
if cpu_or_cuda == "trt":
|
||||
model = TRTModule()
|
||||
model.load_state_dict(torch.load('./model/arcface_trt.pth'))
|
||||
elif cpu_or_cuda == "trt_new":
|
||||
model = iresnet100()
|
||||
model.load_state_dict(torch.load(model_path, map_location="cuda"))
|
||||
model = model.eval()
|
||||
model.to(torch.device("cuda"))
|
||||
x = torch.ones((1, 3, 112, 112)).to(torch.device("cuda"))
|
||||
model = torch2trt(model, [x], max_batch_size=4)
|
||||
torch.save(model.state_dict(), './model/arcface_trt.pth')
|
||||
else:
|
||||
model = iresnet100()
|
||||
model.load_state_dict(torch.load(model_path, map_location=cpu_or_cuda))
|
||||
model = model.eval()
|
||||
model.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||
return model
|
||||
|
||||
|
||||
# 对比两张人脸是否相同
|
||||
def face_verification(img1, img2, model, cpu_or_cuda):
|
||||
img_list = np.concatenate((img1, img2), axis=0)
|
||||
img_list = torch.from_numpy(img_list)
|
||||
img_list = img_list.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||
with torch.no_grad():
|
||||
pred = model(img_list)
|
||||
pred = pred.cpu().numpy()
|
||||
distance = findEuclideanDistance(l2_normalize(pred[0]), l2_normalize(pred[1]))
|
||||
# print("EuclideanDistance is :" + str(distance))
|
||||
if distance < threshold:
|
||||
return 'same ',distance
|
||||
else:
|
||||
return 'different ', distance
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
|
||||
# retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
|
||||
# retinaface_model = load_retinaface_model(retinaface_args)
|
||||
#
|
||||
# anti_spoofing_model_path = "model/anti_spoof_models"
|
||||
# anti_model = load_anti_model(anti_spoofing_model_path, 0)
|
||||
#
|
||||
# k_v = load_npy("./Database/student.npy")
|
||||
# 对比两张人脸
|
||||
# img1, box_and_point = detect_one("D:\Download\lfw\lfw\Aaron_Peirsol\Aaron_Peirsol_0001.jpg", retinaface_model, retinaface_args)
|
||||
# img2, box_and_point = detect_one("D:\Download\lfw\lfw\Aaron_Peirsol\Aaron_Peirsol_0002.jpg", retinaface_model, retinaface_args)
|
||||
# print(face_verification(img1, img2, arcface_model))
|
||||
|
||||
# img3 = load_image(r"D:\Download\out\alig_students\student.jpg")
|
||||
# img3 = torch.from_numpy(img3)
|
||||
# 单张人脸活体检测
|
||||
# img3, b_p = detect_one(r"C:\Users\ASUS\Desktop\face\IMG_20210525_113950.jpg", retinaface_model, retinaface_args)
|
||||
# b = b_p[0]
|
||||
# w = b[2] - b[0]
|
||||
# h = b[3] - b[1]
|
||||
# b[2] = w
|
||||
# b[3] = h
|
||||
# label, value = anti_spoofing("./img/recognition/000_0.bmp", "model/anti_spoof_models", 0, np.array(b[:4], int), anti_model)
|
||||
# print(label,value)
|
||||
# name = findOne(img3, arcface_model, k_v, cpu_or_cuda)
|
||||
# print(name)
|
||||
|
||||
# 人脸聚类
|
||||
# emb_list, name_list = get_claster_tmp_file_embedding("./img/cluster_tmp_file/face", retinaface_model,
|
||||
# retinaface_args, arcface_model, cpu_or_cuda)
|
||||
# print(cluster(emb_list, name_list))
|
||||
|
||||
# img3, box_and_point = detect_one("D:\Download\out\students\student.jpg", retinaface_model, retinaface_args)
|
||||
# print(embedding(img3,arcface_model).shape)
|
||||
|
||||
# 人脸库中增加一张人脸
|
||||
# add_one_to_database(img1,arcface_model,"Aaron_Peirsol","./Database/student.npy")
|
||||
# name = findOne(img1, arcface_model, k_v)
|
||||
# print(name)
|
||||
|
||||
# 人脸库中批量增加人脸
|
||||
create_database_batch(r"D:\Download\out\alig_students_all", arcface_model, "./Database/sfz.npy")
|
||||
|
||||
# 识别视频中的人脸
|
||||
# detect_video("software.mp4","out.avi",retinaface_model,arcface_model,k_v,retinaface_args)
|
98
gender_age.py
Normal file
@ -0,0 +1,98 @@
|
||||
import datetime
|
||||
import mxnet as mx
|
||||
import numpy as np
|
||||
from retinaface_detect import detect_one, load_retinaface_model, set_retinaface_conf
|
||||
|
||||
|
||||
# 年龄性别配置
|
||||
class ConfGenderModel(object):
|
||||
def __init__(self, image_size, image, model, gpu, det):
|
||||
self.image_size = image_size
|
||||
self.image = image
|
||||
self.gpu = gpu
|
||||
self.model = model
|
||||
self.det = det
|
||||
|
||||
|
||||
# 实例化一个配置
|
||||
def set_gender_conf():
|
||||
args = ConfGenderModel(image_size='112,112',
|
||||
image=r'C:\Users\ASUS\Desktop\man.png',
|
||||
gpu=-1,
|
||||
model='model/model,0',
|
||||
det=0)
|
||||
return args
|
||||
|
||||
|
||||
# 加载性别年龄模型
|
||||
def load_gender_model(args, layer):
|
||||
if args.gpu >= 0:
|
||||
ctx = mx.gpu(args.gpu)
|
||||
else:
|
||||
ctx = mx.cpu()
|
||||
_vec = args.image_size.split(',')
|
||||
assert len(_vec) == 2
|
||||
image_size = (int(_vec[0]), int(_vec[1]))
|
||||
|
||||
_vec = args.model.split(',')
|
||||
assert len(_vec) == 2
|
||||
prefix = _vec[0]
|
||||
epoch = int(_vec[1])
|
||||
print('loading', prefix, epoch)
|
||||
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
||||
all_layers = sym.get_internals()
|
||||
sym = all_layers[layer + '_output']
|
||||
model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
|
||||
model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
|
||||
model.set_params(arg_params, aux_params)
|
||||
return model
|
||||
|
||||
|
||||
# 前向推理
|
||||
def get_ga(model, img):
|
||||
# print(data)
|
||||
model.forward(img, is_train=False)
|
||||
ret = model.get_outputs()[0].asnumpy()
|
||||
g = ret[:, 0:2].flatten()
|
||||
gender = np.argmax(g)
|
||||
a = ret[:, 2:202].reshape((100, 2))
|
||||
a = np.argmax(a, axis=1)
|
||||
age = int(sum(a))
|
||||
return gender, age
|
||||
|
||||
|
||||
# 预测人脸列表中每个人的性别年龄
|
||||
def gender_age(img_list, gender_model):
|
||||
gender_list = []
|
||||
age_list = []
|
||||
if len(img_list) == 0:
|
||||
print("find no face")
|
||||
else:
|
||||
time_now = datetime.datetime.now()
|
||||
img_list *= 127.5
|
||||
img_list += 127.5
|
||||
|
||||
for img in img_list:
|
||||
img = np.expand_dims(img, axis=0)
|
||||
img = mx.nd.array(img)
|
||||
img = mx.io.DataBatch(data=(img,))
|
||||
gender, age = get_ga(gender_model, img)
|
||||
if gender == 1:
|
||||
gender_list.append("man")
|
||||
else:
|
||||
gender_list.append('woman')
|
||||
age_list.append(age)
|
||||
time_now2 = datetime.datetime.now()
|
||||
diff = time_now2 - time_now
|
||||
print('time cost', diff.total_seconds())
|
||||
return gender_list,age_list
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = set_gender_conf()
|
||||
retinaface_args = set_retinaface_conf()
|
||||
gender_model = load_gender_model(args, 'fc1')
|
||||
retinaface_model = load_retinaface_model(retinaface_args)
|
||||
img_list, box_and_point = detect_one(args.image, retinaface_model,retinaface_args)
|
||||
gender_list, age_list = gender_age(img_list, gender_model)
|
||||
print(gender_list)
|
49
gender_model.py
Normal file
@ -0,0 +1,49 @@
|
||||
import numpy as np
|
||||
import mxnet as mx
|
||||
|
||||
|
||||
# 加载性别年龄模型
|
||||
def get_model(ctx, image_size, model_str, layer):
|
||||
_vec = model_str.split(',')
|
||||
assert len(_vec) == 2
|
||||
prefix = _vec[0]
|
||||
epoch = int(_vec[1])
|
||||
print('loading', prefix, epoch)
|
||||
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
||||
all_layers = sym.get_internals()
|
||||
sym = all_layers[layer + '_output']
|
||||
model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
|
||||
model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
|
||||
model.set_params(arg_params, aux_params)
|
||||
return model
|
||||
|
||||
|
||||
class GenderModel:
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
if args.gpu >= 0:
|
||||
ctx = mx.gpu(args.gpu)
|
||||
else:
|
||||
ctx = mx.cpu()
|
||||
_vec = args.image_size.split(',')
|
||||
assert len(_vec) == 2
|
||||
image_size = (int(_vec[0]), int(_vec[1]))
|
||||
self.model = None
|
||||
if len(args.model) > 0:
|
||||
self.model = get_model(ctx, image_size, args.model, 'fc1')
|
||||
|
||||
self.det_minsize = 50
|
||||
self.det_threshold = [0.6, 0.7, 0.8]
|
||||
# self.det_factor = 0.9
|
||||
self.image_size = image_size
|
||||
|
||||
def get_ga(self, data):
|
||||
# print(data)
|
||||
self.model.forward(data, is_train=False)
|
||||
ret = self.model.get_outputs()[0].asnumpy()
|
||||
g = ret[:, 0:2].flatten()
|
||||
gender = np.argmax(g)
|
||||
a = ret[:, 2:202].reshape((100, 2))
|
||||
a = np.argmax(a, axis=1)
|
||||
age = int(sum(a))
|
||||
return gender, age
|
BIN
img/search/000_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/002_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/377_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/000_0.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/000_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/000_2.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/000_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/000_4.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/001_0.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/001_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/001_2.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/001_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/001_4.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/002_0.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/002_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/002_2.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/002_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/002_4.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/003_0.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/003_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/003_2.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/003_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/003_4.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/004_0.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/004_1.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/004_2.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/004_3.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
BIN
img/search/face/004_4.bmp
Normal file
After Width: | Height: | Size: 900 KiB |
2
layers/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
from .functions import *
|
||||
from .modules import *
|
BIN
layers/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
layers/functions/__pycache__/prior_box.cpython-38.pyc
Normal file
34
layers/functions/prior_box.py
Normal file
@ -0,0 +1,34 @@
|
||||
import torch
|
||||
from itertools import product as product
|
||||
import numpy as np
|
||||
from math import ceil
|
||||
|
||||
|
||||
class PriorBox(object):
|
||||
def __init__(self, cfg, image_size=None, phase='train'):
|
||||
super(PriorBox, self).__init__()
|
||||
self.min_sizes = cfg['min_sizes']
|
||||
self.steps = cfg['steps']
|
||||
self.clip = cfg['clip']
|
||||
self.image_size = image_size
|
||||
self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
|
||||
self.name = "s"
|
||||
|
||||
def forward(self):
|
||||
anchors = []
|
||||
for k, f in enumerate(self.feature_maps):
|
||||
min_sizes = self.min_sizes[k]
|
||||
for i, j in product(range(f[0]), range(f[1])):
|
||||
for min_size in min_sizes:
|
||||
s_kx = min_size / self.image_size[1]
|
||||
s_ky = min_size / self.image_size[0]
|
||||
dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
|
||||
dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
|
||||
for cy, cx in product(dense_cy, dense_cx):
|
||||
anchors += [cx, cy, s_kx, s_ky]
|
||||
|
||||
# back to torch land
|
||||
output = torch.Tensor(anchors).view(-1, 4)
|
||||
if self.clip:
|
||||
output.clamp_(max=1, min=0)
|
||||
return output
|
3
layers/modules/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .multibox_loss import MultiBoxLoss
|
||||
|
||||
__all__ = ['MultiBoxLoss']
|
BIN
layers/modules/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
layers/modules/__pycache__/multibox_loss.cpython-38.pyc
Normal file
125
layers/modules/multibox_loss.py
Normal file
@ -0,0 +1,125 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.autograd import Variable
|
||||
from utils.box_utils import match, log_sum_exp
|
||||
from data import cfg_mnet
|
||||
GPU = cfg_mnet['gpu_train']
|
||||
|
||||
class MultiBoxLoss(nn.Module):
|
||||
"""SSD Weighted Loss Function
|
||||
Compute Targets:
|
||||
1) Produce Confidence Target Indices by matching ground truth boxes
|
||||
with (default) 'priorboxes' that have jaccard index > threshold parameter
|
||||
(default threshold: 0.5).
|
||||
2) Produce localization target by 'encoding' variance into offsets of ground
|
||||
truth boxes and their matched 'priorboxes'.
|
||||
3) Hard negative mining to filter the excessive number of negative examples
|
||||
that comes with using a large number of default bounding boxes.
|
||||
(default negative:positive ratio 3:1)
|
||||
Objective Loss:
|
||||
L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
|
||||
Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
|
||||
weighted by α which is set to 1 by cross val.
|
||||
Args:
|
||||
c: class confidences,
|
||||
l: predicted boxes,
|
||||
g: ground truth boxes
|
||||
N: number of matched default boxes
|
||||
See: https://arxiv.org/pdf/1512.02325.pdf for more details.
|
||||
"""
|
||||
|
||||
def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target):
|
||||
super(MultiBoxLoss, self).__init__()
|
||||
self.num_classes = num_classes
|
||||
self.threshold = overlap_thresh
|
||||
self.background_label = bkg_label
|
||||
self.encode_target = encode_target
|
||||
self.use_prior_for_matching = prior_for_matching
|
||||
self.do_neg_mining = neg_mining
|
||||
self.negpos_ratio = neg_pos
|
||||
self.neg_overlap = neg_overlap
|
||||
self.variance = [0.1, 0.2]
|
||||
|
||||
def forward(self, predictions, priors, targets):
|
||||
"""Multibox Loss
|
||||
Args:
|
||||
predictions (tuple): A tuple containing loc preds, conf preds,
|
||||
and prior boxes from SSD net.
|
||||
conf shape: torch.size(batch_size,num_priors,num_classes)
|
||||
loc shape: torch.size(batch_size,num_priors,4)
|
||||
priors shape: torch.size(num_priors,4)
|
||||
|
||||
ground_truth (tensor): Ground truth boxes and labels for a batch,
|
||||
shape: [batch_size,num_objs,5] (last idx is the label).
|
||||
"""
|
||||
|
||||
loc_data, conf_data, landm_data = predictions
|
||||
priors = priors
|
||||
num = loc_data.size(0)
|
||||
num_priors = (priors.size(0))
|
||||
|
||||
# match priors (default boxes) and ground truth boxes
|
||||
loc_t = torch.Tensor(num, num_priors, 4)
|
||||
landm_t = torch.Tensor(num, num_priors, 10)
|
||||
conf_t = torch.LongTensor(num, num_priors)
|
||||
for idx in range(num):
|
||||
truths = targets[idx][:, :4].data
|
||||
labels = targets[idx][:, -1].data
|
||||
landms = targets[idx][:, 4:14].data
|
||||
defaults = priors.data
|
||||
match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
|
||||
if GPU:
|
||||
loc_t = loc_t.cuda()
|
||||
conf_t = conf_t.cuda()
|
||||
landm_t = landm_t.cuda()
|
||||
|
||||
zeros = torch.tensor(0).cuda()
|
||||
# landm Loss (Smooth L1)
|
||||
# Shape: [batch,num_priors,10]
|
||||
pos1 = conf_t > zeros
|
||||
num_pos_landm = pos1.long().sum(1, keepdim=True)
|
||||
N1 = max(num_pos_landm.data.sum().float(), 1)
|
||||
pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
|
||||
landm_p = landm_data[pos_idx1].view(-1, 10)
|
||||
landm_t = landm_t[pos_idx1].view(-1, 10)
|
||||
loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
|
||||
|
||||
|
||||
pos = conf_t != zeros
|
||||
conf_t[pos] = 1
|
||||
|
||||
# Localization Loss (Smooth L1)
|
||||
# Shape: [batch,num_priors,4]
|
||||
pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
|
||||
loc_p = loc_data[pos_idx].view(-1, 4)
|
||||
loc_t = loc_t[pos_idx].view(-1, 4)
|
||||
loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
|
||||
|
||||
# Compute max conf across batch for hard negative mining
|
||||
batch_conf = conf_data.view(-1, self.num_classes)
|
||||
loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
|
||||
|
||||
# Hard Negative Mining
|
||||
loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
|
||||
loss_c = loss_c.view(num, -1)
|
||||
_, loss_idx = loss_c.sort(1, descending=True)
|
||||
_, idx_rank = loss_idx.sort(1)
|
||||
num_pos = pos.long().sum(1, keepdim=True)
|
||||
num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
|
||||
neg = idx_rank < num_neg.expand_as(idx_rank)
|
||||
|
||||
# Confidence Loss Including Positive and Negative Examples
|
||||
pos_idx = pos.unsqueeze(2).expand_as(conf_data)
|
||||
neg_idx = neg.unsqueeze(2).expand_as(conf_data)
|
||||
conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
|
||||
targets_weighted = conf_t[(pos+neg).gt(0)]
|
||||
loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
|
||||
|
||||
# Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
|
||||
N = max(num_pos.data.sum().float(), 1)
|
||||
loss_l /= N
|
||||
loss_c /= N
|
||||
loss_landm /= N1
|
||||
|
||||
return loss_l, loss_c, loss_landm
|
33
losses.py
Normal file
@ -0,0 +1,33 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
|
||||
class CosFace(nn.Module):
|
||||
def __init__(self, s=64.0, m=0.40):
|
||||
super(CosFace, self).__init__()
|
||||
self.s = s
|
||||
self.m = m
|
||||
|
||||
def forward(self, cosine, label):
|
||||
index = torch.where(label != -1)[0]
|
||||
m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
|
||||
m_hot.scatter_(1, label[index, None], self.m)
|
||||
cosine[index] -= m_hot
|
||||
ret = cosine * self.s
|
||||
return ret
|
||||
|
||||
|
||||
class ArcFace(nn.Module):
|
||||
def __init__(self, s=64.0, m=0.5):
|
||||
super(ArcFace, self).__init__()
|
||||
self.s = s
|
||||
self.m = m
|
||||
|
||||
def forward(self, cosine: torch.Tensor, label):
|
||||
index = torch.where(label != -1)[0]
|
||||
m_hot = torch.zeros(index.size()[0], cosine.size()[1], device=cosine.device)
|
||||
m_hot.scatter_(1, label[index, None], self.m)
|
||||
cosine.acos_()
|
||||
cosine[index] += m_hot
|
||||
cosine.cos_().mul_(self.s)
|
||||
return cosine
|
BIN
model/anti_spoof_models/2.7_80x80_MiniFASNetV2.pth
Normal file
BIN
model/anti_spoof_models/4_0_0_80x80_MiniFASNetV1SE.pth
Normal file
BIN
model/backbone100.pth
Normal file
BIN
model/model-0000.params
Normal file
2399
model/model-symbol.json
Normal file
BIN
model/onnx/centerface.onnx
Normal file
BIN
model/onnx/centerface_bnmerged.onnx
Normal file
0
models/__init__.py
Normal file
BIN
models/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
models/__pycache__/net.cpython-38.pyc
Normal file
BIN
models/__pycache__/retinaface.cpython-38.pyc
Normal file
137
models/net.py
Normal file
@ -0,0 +1,137 @@
|
||||
import time
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.models._utils as _utils
|
||||
import torchvision.models as models
|
||||
import torch.nn.functional as F
|
||||
from torch.autograd import Variable
|
||||
|
||||
def conv_bn(inp, oup, stride = 1, leaky = 0):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
nn.LeakyReLU(negative_slope=leaky, inplace=True)
|
||||
)
|
||||
|
||||
def conv_bn_no_relu(inp, oup, stride):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
)
|
||||
|
||||
def conv_bn1X1(inp, oup, stride, leaky=0):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
nn.LeakyReLU(negative_slope=leaky, inplace=True)
|
||||
)
|
||||
|
||||
def conv_dw(inp, oup, stride, leaky=0.1):
|
||||
return nn.Sequential(
|
||||
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
|
||||
nn.BatchNorm2d(inp),
|
||||
nn.LeakyReLU(negative_slope= leaky,inplace=True),
|
||||
|
||||
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
||||
nn.BatchNorm2d(oup),
|
||||
nn.LeakyReLU(negative_slope= leaky,inplace=True),
|
||||
)
|
||||
|
||||
class SSH(nn.Module):
|
||||
def __init__(self, in_channel, out_channel):
|
||||
super(SSH, self).__init__()
|
||||
assert out_channel % 4 == 0
|
||||
leaky = 0
|
||||
if (out_channel <= 64):
|
||||
leaky = 0.1
|
||||
self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
|
||||
|
||||
self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
|
||||
self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
|
||||
|
||||
self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
|
||||
self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
|
||||
|
||||
def forward(self, input):
|
||||
conv3X3 = self.conv3X3(input)
|
||||
|
||||
conv5X5_1 = self.conv5X5_1(input)
|
||||
conv5X5 = self.conv5X5_2(conv5X5_1)
|
||||
|
||||
conv7X7_2 = self.conv7X7_2(conv5X5_1)
|
||||
conv7X7 = self.conv7x7_3(conv7X7_2)
|
||||
|
||||
out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
|
||||
out = F.relu(out)
|
||||
return out
|
||||
|
||||
class FPN(nn.Module):
|
||||
def __init__(self,in_channels_list,out_channels):
|
||||
super(FPN,self).__init__()
|
||||
leaky = 0
|
||||
if (out_channels <= 64):
|
||||
leaky = 0.1
|
||||
self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky)
|
||||
self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky)
|
||||
self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky)
|
||||
|
||||
self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky)
|
||||
self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky)
|
||||
|
||||
def forward(self, input):
|
||||
# names = list(input.keys())
|
||||
input = list(input.values())
|
||||
|
||||
output1 = self.output1(input[0])
|
||||
output2 = self.output2(input[1])
|
||||
output3 = self.output3(input[2])
|
||||
|
||||
up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
|
||||
output2 = output2 + up3
|
||||
output2 = self.merge2(output2)
|
||||
|
||||
up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
|
||||
output1 = output1 + up2
|
||||
output1 = self.merge1(output1)
|
||||
|
||||
out = [output1, output2, output3]
|
||||
return out
|
||||
|
||||
|
||||
|
||||
class MobileNetV1(nn.Module):
|
||||
def __init__(self):
|
||||
super(MobileNetV1, self).__init__()
|
||||
self.stage1 = nn.Sequential(
|
||||
conv_bn(3, 8, 2, leaky = 0.1), # 3
|
||||
conv_dw(8, 16, 1), # 7
|
||||
conv_dw(16, 32, 2), # 11
|
||||
conv_dw(32, 32, 1), # 19
|
||||
conv_dw(32, 64, 2), # 27
|
||||
conv_dw(64, 64, 1), # 43
|
||||
)
|
||||
self.stage2 = nn.Sequential(
|
||||
conv_dw(64, 128, 2), # 43 + 16 = 59
|
||||
conv_dw(128, 128, 1), # 59 + 32 = 91
|
||||
conv_dw(128, 128, 1), # 91 + 32 = 123
|
||||
conv_dw(128, 128, 1), # 123 + 32 = 155
|
||||
conv_dw(128, 128, 1), # 155 + 32 = 187
|
||||
conv_dw(128, 128, 1), # 187 + 32 = 219
|
||||
)
|
||||
self.stage3 = nn.Sequential(
|
||||
conv_dw(128, 256, 2), # 219 +3 2 = 241
|
||||
conv_dw(256, 256, 1), # 241 + 64 = 301
|
||||
)
|
||||
self.avg = nn.AdaptiveAvgPool2d((1,1))
|
||||
self.fc = nn.Linear(256, 1000)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.stage1(x)
|
||||
x = self.stage2(x)
|
||||
x = self.stage3(x)
|
||||
x = self.avg(x)
|
||||
# x = self.model(x)
|
||||
x = x.view(-1, 256)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
127
models/retinaface.py
Normal file
@ -0,0 +1,127 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torchvision.models.detection.backbone_utils as backbone_utils
|
||||
import torchvision.models._utils as _utils
|
||||
import torch.nn.functional as F
|
||||
from collections import OrderedDict
|
||||
|
||||
from models.net import MobileNetV1 as MobileNetV1
|
||||
from models.net import FPN as FPN
|
||||
from models.net import SSH as SSH
|
||||
|
||||
|
||||
|
||||
class ClassHead(nn.Module):
|
||||
def __init__(self,inchannels=512,num_anchors=3):
|
||||
super(ClassHead,self).__init__()
|
||||
self.num_anchors = num_anchors
|
||||
self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
|
||||
|
||||
def forward(self,x):
|
||||
out = self.conv1x1(x)
|
||||
out = out.permute(0,2,3,1).contiguous()
|
||||
|
||||
return out.view(out.shape[0], -1, 2)
|
||||
|
||||
class BboxHead(nn.Module):
|
||||
def __init__(self,inchannels=512,num_anchors=3):
|
||||
super(BboxHead,self).__init__()
|
||||
self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
|
||||
|
||||
def forward(self,x):
|
||||
out = self.conv1x1(x)
|
||||
out = out.permute(0,2,3,1).contiguous()
|
||||
|
||||
return out.view(out.shape[0], -1, 4)
|
||||
|
||||
class LandmarkHead(nn.Module):
|
||||
def __init__(self,inchannels=512,num_anchors=3):
|
||||
super(LandmarkHead,self).__init__()
|
||||
self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
|
||||
|
||||
def forward(self,x):
|
||||
out = self.conv1x1(x)
|
||||
out = out.permute(0,2,3,1).contiguous()
|
||||
|
||||
return out.view(out.shape[0], -1, 10)
|
||||
|
||||
class RetinaFace(nn.Module):
|
||||
def __init__(self, cfg = None, phase = 'train'):
|
||||
"""
|
||||
:param cfg: Network related settings.
|
||||
:param phase: train or test.
|
||||
"""
|
||||
super(RetinaFace,self).__init__()
|
||||
self.phase = phase
|
||||
backbone = None
|
||||
if cfg['name'] == 'mobilenet0.25':
|
||||
backbone = MobileNetV1()
|
||||
if cfg['pretrain']:
|
||||
checkpoint = torch.load("./weights/mobilenetV1X0.25_pretrain.tar", map_location=torch.device('cpu'))
|
||||
from collections import OrderedDict
|
||||
new_state_dict = OrderedDict()
|
||||
for k, v in checkpoint['state_dict'].items():
|
||||
name = k[7:] # remove module.
|
||||
new_state_dict[name] = v
|
||||
# load params
|
||||
backbone.load_state_dict(new_state_dict)
|
||||
elif cfg['name'] == 'Resnet50':
|
||||
import torchvision.models as models
|
||||
backbone = models.resnet50(pretrained=cfg['pretrain'])
|
||||
|
||||
self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers'])
|
||||
in_channels_stage2 = cfg['in_channel']
|
||||
in_channels_list = [
|
||||
in_channels_stage2 * 2,
|
||||
in_channels_stage2 * 4,
|
||||
in_channels_stage2 * 8,
|
||||
]
|
||||
out_channels = cfg['out_channel']
|
||||
self.fpn = FPN(in_channels_list,out_channels)
|
||||
self.ssh1 = SSH(out_channels, out_channels)
|
||||
self.ssh2 = SSH(out_channels, out_channels)
|
||||
self.ssh3 = SSH(out_channels, out_channels)
|
||||
|
||||
self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
|
||||
self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
|
||||
self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
|
||||
|
||||
def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=2):
|
||||
classhead = nn.ModuleList()
|
||||
for i in range(fpn_num):
|
||||
classhead.append(ClassHead(inchannels,anchor_num))
|
||||
return classhead
|
||||
|
||||
def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=2):
|
||||
bboxhead = nn.ModuleList()
|
||||
for i in range(fpn_num):
|
||||
bboxhead.append(BboxHead(inchannels,anchor_num))
|
||||
return bboxhead
|
||||
|
||||
def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=2):
|
||||
landmarkhead = nn.ModuleList()
|
||||
for i in range(fpn_num):
|
||||
landmarkhead.append(LandmarkHead(inchannels,anchor_num))
|
||||
return landmarkhead
|
||||
|
||||
def forward(self,inputs):
|
||||
out = self.body(inputs)
|
||||
|
||||
# FPN
|
||||
fpn = self.fpn(out)
|
||||
|
||||
# SSH
|
||||
feature1 = self.ssh1(fpn[0])
|
||||
feature2 = self.ssh2(fpn[1])
|
||||
feature3 = self.ssh3(fpn[2])
|
||||
features = [feature1, feature2, feature3]
|
||||
|
||||
bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
|
||||
classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
|
||||
ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
|
||||
|
||||
if self.phase == 'train':
|
||||
output = (bbox_regressions, classifications, ldm_regressions)
|
||||
else:
|
||||
output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
|
||||
return output
|
161
partial_fc.py
Normal file
@ -0,0 +1,161 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
from torch.nn import Module
|
||||
from torch.nn.functional import normalize, linear
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
|
||||
class PartialFC(Module):
|
||||
"""
|
||||
Author: {Xiang An, Yang Xiao, XuHan Zhu} in DeepGlint,
|
||||
Partial FC: Training 10 Million Identities on a Single Machine
|
||||
See the original paper:
|
||||
https://arxiv.org/abs/2010.05222
|
||||
"""
|
||||
|
||||
@torch.no_grad()
|
||||
def __init__(self, rank, local_rank, world_size, batch_size, resume,
|
||||
margin_softmax, num_classes, sample_rate=1.0, embedding_size=512, prefix="./"):
|
||||
super(PartialFC, self).__init__()
|
||||
#
|
||||
self.num_classes: int = num_classes
|
||||
self.rank: int = rank
|
||||
self.local_rank: int = local_rank
|
||||
self.device: torch.device = torch.device("cuda:{}".format(self.local_rank))
|
||||
self.world_size: int = world_size
|
||||
self.batch_size: int = batch_size
|
||||
self.margin_softmax: callable = margin_softmax
|
||||
self.sample_rate: float = sample_rate
|
||||
self.embedding_size: int = embedding_size
|
||||
self.prefix: str = prefix
|
||||
self.num_local: int = num_classes // world_size + int(rank < num_classes % world_size)
|
||||
self.class_start: int = num_classes // world_size * rank + min(rank, num_classes % world_size)
|
||||
self.num_sample: int = int(self.sample_rate * self.num_local)
|
||||
|
||||
self.weight_name = os.path.join(self.prefix, "rank:{}_softmax_weight.pt".format(self.rank))
|
||||
self.weight_mom_name = os.path.join(self.prefix, "rank:{}_softmax_weight_mom.pt".format(self.rank))
|
||||
|
||||
if resume:
|
||||
try:
|
||||
self.weight: torch.Tensor = torch.load(self.weight_name)
|
||||
logging.info("softmax weight resume successfully!")
|
||||
except (FileNotFoundError, KeyError, IndexError):
|
||||
self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
|
||||
logging.info("softmax weight resume fail!")
|
||||
|
||||
try:
|
||||
self.weight_mom: torch.Tensor = torch.load(self.weight_mom_name)
|
||||
logging.info("softmax weight mom resume successfully!")
|
||||
except (FileNotFoundError, KeyError, IndexError):
|
||||
self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
|
||||
logging.info("softmax weight mom resume fail!")
|
||||
else:
|
||||
self.weight = torch.normal(0, 0.01, (self.num_local, self.embedding_size), device=self.device)
|
||||
self.weight_mom: torch.Tensor = torch.zeros_like(self.weight)
|
||||
logging.info("softmax weight init successfully!")
|
||||
logging.info("softmax weight mom init successfully!")
|
||||
self.stream: torch.cuda.Stream = torch.cuda.Stream(local_rank)
|
||||
|
||||
self.index = None
|
||||
if int(self.sample_rate) == 1:
|
||||
self.update = lambda: 0
|
||||
self.sub_weight = Parameter(self.weight)
|
||||
self.sub_weight_mom = self.weight_mom
|
||||
else:
|
||||
self.sub_weight = Parameter(torch.empty((0, 0)).cuda(local_rank))
|
||||
|
||||
def save_params(self):
|
||||
torch.save(self.weight.data, self.weight_name)
|
||||
torch.save(self.weight_mom, self.weight_mom_name)
|
||||
|
||||
@torch.no_grad()
|
||||
def sample(self, total_label):
|
||||
index_positive = (self.class_start <= total_label) & (total_label < self.class_start + self.num_local)
|
||||
total_label[~index_positive] = -1
|
||||
total_label[index_positive] -= self.class_start
|
||||
if int(self.sample_rate) != 1:
|
||||
positive = torch.unique(total_label[index_positive], sorted=True)
|
||||
if self.num_sample - positive.size(0) >= 0:
|
||||
perm = torch.rand(size=[self.num_local], device=self.device)
|
||||
perm[positive] = 2.0
|
||||
index = torch.topk(perm, k=self.num_sample)[1]
|
||||
index = index.sort()[0]
|
||||
else:
|
||||
index = positive
|
||||
self.index = index
|
||||
total_label[index_positive] = torch.searchsorted(index, total_label[index_positive])
|
||||
self.sub_weight = Parameter(self.weight[index])
|
||||
self.sub_weight_mom = self.weight_mom[index]
|
||||
|
||||
def forward(self, total_features, norm_weight):
|
||||
torch.cuda.current_stream().wait_stream(self.stream)
|
||||
logits = linear(total_features, norm_weight)
|
||||
return logits
|
||||
|
||||
@torch.no_grad()
|
||||
def update(self):
|
||||
self.weight_mom[self.index] = self.sub_weight_mom
|
||||
self.weight[self.index] = self.sub_weight
|
||||
|
||||
def prepare(self, label, optimizer):
|
||||
with torch.cuda.stream(self.stream):
|
||||
total_label = torch.zeros(
|
||||
size=[self.batch_size * self.world_size], device=self.device, dtype=torch.long)
|
||||
dist.all_gather(list(total_label.chunk(self.world_size, dim=0)), label)
|
||||
self.sample(total_label)
|
||||
optimizer.state.pop(optimizer.param_groups[-1]['params'][0], None)
|
||||
optimizer.param_groups[-1]['params'][0] = self.sub_weight
|
||||
optimizer.state[self.sub_weight]['momentum_buffer'] = self.sub_weight_mom
|
||||
norm_weight = normalize(self.sub_weight)
|
||||
return total_label, norm_weight
|
||||
|
||||
def forward_backward(self, label, features, optimizer):
|
||||
total_label, norm_weight = self.prepare(label, optimizer)
|
||||
total_features = torch.zeros(
|
||||
size=[self.batch_size * self.world_size, self.embedding_size], device=self.device)
|
||||
dist.all_gather(list(total_features.chunk(self.world_size, dim=0)), features.data)
|
||||
total_features.requires_grad = True
|
||||
|
||||
logits = self.forward(total_features, norm_weight)
|
||||
logits = self.margin_softmax(logits, total_label)
|
||||
|
||||
with torch.no_grad():
|
||||
max_fc = torch.max(logits, dim=1, keepdim=True)[0]
|
||||
dist.all_reduce(max_fc, dist.ReduceOp.MAX)
|
||||
|
||||
# calculate exp(logits) and all-reduce
|
||||
logits_exp = torch.exp(logits - max_fc)
|
||||
logits_sum_exp = logits_exp.sum(dim=1, keepdims=True)
|
||||
dist.all_reduce(logits_sum_exp, dist.ReduceOp.SUM)
|
||||
|
||||
# calculate prob
|
||||
logits_exp.div_(logits_sum_exp)
|
||||
|
||||
# get one-hot
|
||||
grad = logits_exp
|
||||
index = torch.where(total_label != -1)[0]
|
||||
one_hot = torch.zeros(size=[index.size()[0], grad.size()[1]], device=grad.device)
|
||||
one_hot.scatter_(1, total_label[index, None], 1)
|
||||
|
||||
# calculate loss
|
||||
loss = torch.zeros(grad.size()[0], 1, device=grad.device)
|
||||
loss[index] = grad[index].gather(1, total_label[index, None])
|
||||
dist.all_reduce(loss, dist.ReduceOp.SUM)
|
||||
loss_v = loss.clamp_min_(1e-30).log_().mean() * (-1)
|
||||
|
||||
# calculate grad
|
||||
grad[index] -= one_hot
|
||||
grad.div_(self.batch_size * self.world_size)
|
||||
|
||||
logits.backward(grad)
|
||||
if total_features.grad is not None:
|
||||
total_features.grad.detach_()
|
||||
x_grad: torch.Tensor = torch.zeros_like(features, requires_grad=True)
|
||||
# feature gradient all-reduce
|
||||
dist.reduce_scatter(x_grad, list(total_features.grad.chunk(self.world_size, dim=0)))
|
||||
x_grad = x_grad * self.world_size
|
||||
# backward backbone
|
||||
return x_grad, loss_v
|
14
play.py
Normal file
@ -0,0 +1,14 @@
|
||||
import cv2
|
||||
cap = cv2.VideoCapture("rtsp://admin:2020@uestc@192.168.30.83:554/h264")
|
||||
ret, frame = cap.read()
|
||||
h, w = frame.shape[:2]
|
||||
print("hight:"+str(h)+"with:"+str(w))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
print(fps)
|
||||
# while ret:
|
||||
# cv2.imshow('out', frame)
|
||||
# if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
# break
|
||||
# ret, frame = cap.read()
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
282
realtime_detect.py
Normal file
@ -0,0 +1,282 @@
|
||||
import argparse
|
||||
import subprocess
|
||||
import time
|
||||
import cv2
|
||||
import torch
|
||||
import numpy as np
|
||||
from skimage import transform as trans
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from data import cfg_mnet, cfg_re50
|
||||
from face_api import load_arcface_model, load_npy
|
||||
from layers.functions.prior_box import PriorBox
|
||||
from retinaface_detect import set_retinaface_conf, load_retinaface_model, findAll
|
||||
from utils.nms.py_cpu_nms import py_cpu_nms
|
||||
from utils.box_utils import decode, decode_landm
|
||||
import faiss
|
||||
|
||||
ppi = 1280
|
||||
ppi2 = 1100
|
||||
step = 3
|
||||
|
||||
def detect_rtsp(rtsp, out_rtsp, net, arcface_model, index ,database_name_list, k_v, args):
|
||||
tic_total = time.time()
|
||||
cfg = None
|
||||
if args.network == "mobile0.25":
|
||||
cfg = cfg_mnet
|
||||
elif args.network == "resnet50":
|
||||
cfg = cfg_re50
|
||||
device = torch.device("cpu" if args.cpu else "cuda")
|
||||
resize = 1
|
||||
|
||||
# testing begin
|
||||
cap = cv2.VideoCapture(rtsp)
|
||||
ret, frame = cap.read()
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
factor = 0
|
||||
if (w > ppi):
|
||||
factor = h / w
|
||||
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||
h, w = frame.shape[:2]
|
||||
arf = 1
|
||||
detect_h, detect_w = frame.shape[:2]
|
||||
frame_detect = frame
|
||||
factor2 = 0
|
||||
if (w > ppi2):
|
||||
factor2 = h / w
|
||||
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
|
||||
detect_h, detect_w = frame_detect.shape[:2]
|
||||
arf = w/detect_w
|
||||
print(w,h)
|
||||
print(detect_w,detect_h)
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
#print(fps)
|
||||
size = (w, h)
|
||||
sizeStr = str(size[0]) + 'x' + str(size[1])
|
||||
if(out_rtsp.startswith("rtsp")):
|
||||
command = ['ffmpeg',
|
||||
'-y', '-an',
|
||||
'-f', 'rawvideo',
|
||||
'-vcodec', 'rawvideo',
|
||||
'-pix_fmt', 'bgr24',
|
||||
'-s', sizeStr,
|
||||
'-r', "25",
|
||||
'-i', '-',
|
||||
'-c:v', 'libx265',
|
||||
'-b:v', '3000k',
|
||||
'-pix_fmt', 'yuv420p',
|
||||
'-preset', 'ultrafast',
|
||||
'-f', 'rtsp',
|
||||
out_rtsp]
|
||||
pipe = subprocess.Popen(command, shell=False, stdin=subprocess.PIPE)
|
||||
#out = cv2.VideoWriter("output.avi", cv2.VideoWriter_fourcc(*'XVID'), fps, size)
|
||||
number = step
|
||||
dets = []
|
||||
name_list = []
|
||||
font = ImageFont.truetype("font.ttf", 22)
|
||||
priorbox = PriorBox(cfg, image_size=(detect_h, detect_w))
|
||||
priors = priorbox.forward()
|
||||
priors = priors.to(device)
|
||||
prior_data = priors.data
|
||||
|
||||
scale = torch.Tensor([detect_w, detect_h, detect_w, detect_h])
|
||||
scale = scale.to(device)
|
||||
scale1 = torch.Tensor([detect_w, detect_h, detect_w, detect_h,
|
||||
detect_w, detect_h, detect_w, detect_h,
|
||||
detect_w, detect_h])
|
||||
scale1 = scale1.to(device)
|
||||
|
||||
src1 = np.array([
|
||||
[38.3814, 51.6963],
|
||||
[73.6186, 51.5014],
|
||||
[56.1120, 71.7366],
|
||||
[41.6361, 92.3655],
|
||||
[70.8167, 92.2041]], dtype=np.float32)
|
||||
tform = trans.SimilarityTransform()
|
||||
|
||||
while ret:
|
||||
tic_all = time.time()
|
||||
if number == step:
|
||||
tic = time.time()
|
||||
img = np.float32(frame_detect)
|
||||
img -= (104, 117, 123)
|
||||
img = img.transpose(2, 0, 1)
|
||||
img = torch.from_numpy(img).unsqueeze(0)
|
||||
img = img.to(device)
|
||||
|
||||
loc, conf, landms = net(img) # forward pass
|
||||
|
||||
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||
boxes = boxes * scale / resize
|
||||
boxes = boxes.cpu().numpy()
|
||||
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||
|
||||
landms = landms * scale1 / resize
|
||||
landms = landms.cpu().numpy()
|
||||
|
||||
# ignore low scores
|
||||
inds = np.where(scores > args.confidence_threshold)[0]
|
||||
boxes = boxes[inds]
|
||||
landms = landms[inds]
|
||||
scores = scores[inds]
|
||||
|
||||
# keep top-K before NMS
|
||||
order = scores.argsort()[::-1][:args.top_k]
|
||||
boxes = boxes[order]
|
||||
landms = landms[order]
|
||||
scores = scores[order]
|
||||
|
||||
# do NMS
|
||||
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||
dets = dets[keep, :]
|
||||
landms = landms[keep]
|
||||
|
||||
# keep top-K faster NMS
|
||||
dets = dets[:args.keep_top_k, :]
|
||||
landms = landms[:args.keep_top_k, :]
|
||||
|
||||
dets = np.concatenate((dets, landms), axis=1)
|
||||
face_list = []
|
||||
name_list = []
|
||||
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||
start_time_findall = time.time()
|
||||
for i, det in enumerate(dets[:4]):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
#boxes, score = det[:4], det[4]
|
||||
dst = np.reshape(landms[i], (5, 2))
|
||||
dst = dst * arf
|
||||
|
||||
tform.estimate(dst, src1)
|
||||
M = tform.params[0:2, :]
|
||||
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||
img112 = frame2[0:112, 0:112, :]
|
||||
face_list.append(img112)
|
||||
|
||||
if len(face_list) != 0:
|
||||
face_list = np.array(face_list)
|
||||
face_list = face_list.transpose((0, 3, 1, 2))
|
||||
face_list = np.array(face_list, dtype=np.float32)
|
||||
face_list -= 127.5
|
||||
face_list /= 127.5
|
||||
print(face_list.shape)
|
||||
print("warpALL time: " + str(time.time() - start_time_findall ))
|
||||
#start_time = time.time()
|
||||
name_list = findAll(face_list, arcface_model, index ,database_name_list, k_v, "cpu" if args.cpu else "cuda")
|
||||
#print(name_list)
|
||||
|
||||
#print("findOneframe time: " + str(time.time() - start_time_findall))
|
||||
# start_time = time.time()
|
||||
# if (len(dets) != 0):
|
||||
# for i, det in enumerate(dets[:]):
|
||||
# if det[4] < args.vis_thres:
|
||||
# continue
|
||||
# boxes, score = det[:4], det[4]
|
||||
# boxes = boxes * arf
|
||||
# name = name_list[i]
|
||||
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (255, 0, 0), 2)
|
||||
# cv2.putText(frame, name, (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,(0, 225, 255), 1)
|
||||
start_time = time.time()
|
||||
if(len(dets) != 0):
|
||||
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
draw = ImageDraw.Draw(img_PIL)
|
||||
for i, det in enumerate(dets[:4]):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
boxes, score = det[:4], det[4]
|
||||
boxes = boxes * arf
|
||||
name = name_list[i]
|
||||
if not isinstance(name, np.unicode):
|
||||
name = name.decode('utf8')
|
||||
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
|
||||
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||
pipe.stdin.write(frame.tostring())
|
||||
#out.write(frame)
|
||||
print("drawOneframe time: " + str(time.time() - start_time))
|
||||
start_time = time.time()
|
||||
ret, frame = cap.read()
|
||||
frame_detect = frame
|
||||
number = 0
|
||||
if (ret != 0 and factor != 0):
|
||||
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||
if (ret != 0 and factor2 != 0):
|
||||
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
|
||||
print("readframe time: " + str(time.time() - start_time))
|
||||
else:
|
||||
number += 1
|
||||
# if (len(dets) != 0):
|
||||
# for i, det in enumerate(dets[:4]):
|
||||
# if det[4] < args.vis_thres:
|
||||
# continue
|
||||
# boxes, score = det[:4], det[4]
|
||||
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||
if (len(dets) != 0):
|
||||
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
draw = ImageDraw.Draw(img_PIL)
|
||||
for i, det in enumerate(dets[:4]):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
boxes, score = det[:4], det[4]
|
||||
boxes = boxes * arf
|
||||
name = name_list[i]
|
||||
if not isinstance(name, np.unicode):
|
||||
name = name.decode('utf8')
|
||||
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
|
||||
width=3)
|
||||
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||
start_time = time.time()
|
||||
pipe.stdin.write(frame.tostring())
|
||||
#out.write(frame)
|
||||
print("writeframe time: " + str(time.time() - start_time))
|
||||
start_time = time.time()
|
||||
ret, frame = cap.read()
|
||||
frame_detect = frame
|
||||
if (ret != 0 and factor != 0):
|
||||
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||
if (ret != 0 and factor2 != 0):
|
||||
frame_detect = cv2.resize(frame, (ppi2, int(ppi2 * factor2)))
|
||||
print("readframe time: " + str(time.time() - start_time))
|
||||
print('all time: {:.4f}'.format(time.time() - tic_all))
|
||||
cap.release()
|
||||
#out.release()
|
||||
pipe.terminate()
|
||||
print('total time: {:.4f}'.format(time.time() - tic_total))
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--rtsp",
|
||||
type=str,
|
||||
default="",
|
||||
dest="rtsp_path"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
# 加载人脸识别模型
|
||||
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda="cuda")
|
||||
# 加载人脸检测模型
|
||||
retinaface_args = set_retinaface_conf(cpu_or_cuda=cpu_or_cuda)
|
||||
retinaface_model = load_retinaface_model(retinaface_args)
|
||||
k_v = load_npy("./Database/student.npy")
|
||||
#print(list(k_v.keys()))
|
||||
database_name_list = list(k_v.keys())
|
||||
vector_list = np.array(list(k_v.values()))
|
||||
print(vector_list.shape)
|
||||
nlist = 10
|
||||
quantizer = faiss.IndexFlatL2(512) # the other index
|
||||
index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
|
||||
index.train(vector_list)
|
||||
#index = faiss.IndexFlatL2(512)
|
||||
index.add(vector_list)
|
||||
index.nprobe=10
|
||||
|
||||
detect_rtsp(args.rtsp_path, 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, index ,database_name_list, k_v, retinaface_args)
|
||||
|
||||
#detect_rtsp("rtsp://admin:2020@uestc@192.168.14.32:8557/h264", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, index ,database_name_list, k_v, retinaface_args)
|
||||
#detect_rtsp("cut.mp4", 'rtsp://localhost:5001/test2', retinaface_model, arcface_model, k_v, retinaface_args)
|
283
recognition_video.py
Normal file
@ -0,0 +1,283 @@
|
||||
import time
|
||||
from centerface import CenterFace
|
||||
from skimage import transform as trans
|
||||
import numpy as np
|
||||
import torch
|
||||
import cv2
|
||||
from backbones import iresnet100, iresnet18
|
||||
from create_database import findOne, load_npy,findAll
|
||||
from PIL import Image, ImageDraw,ImageFont
|
||||
|
||||
def show():
|
||||
cap = cv2.VideoCapture("test.mp4")
|
||||
ret, frame = cap.read()
|
||||
h, w = frame.shape[:2]
|
||||
centerface = CenterFace()
|
||||
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||
out = cv2.VideoWriter('ccvt6.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 30, size)
|
||||
while ret:
|
||||
start_time = time.time()
|
||||
dets, lms = centerface(frame, h, w, threshold=0.35)
|
||||
end_time = time.time()
|
||||
print("findOne time: " + str(end_time - start_time))
|
||||
for det in dets:
|
||||
boxes, score = det[:4], det[4]
|
||||
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||
for lm in lms:
|
||||
for i in range(0, 5):
|
||||
cv2.circle(frame, (int(lm[i * 2]), int(lm[i * 2 + 1])), 2, (0, 0, 255), -1)
|
||||
cv2.imshow('out', frame)
|
||||
out.write(frame)
|
||||
# Press Q on keyboard to stop recording
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
ret, frame = cap.read()
|
||||
cap.release()
|
||||
out.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
def video():
|
||||
model = iresnet100()
|
||||
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||
model.eval()
|
||||
k_v = load_npy("student.npy")
|
||||
count = 0
|
||||
#cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
|
||||
cap = cv2.VideoCapture("software.mp4")
|
||||
ret, frame = cap.read()
|
||||
h, w = frame.shape[:2]
|
||||
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
|
||||
centerface = CenterFace()
|
||||
while ret:
|
||||
start_time = time.time()
|
||||
dets, lms = centerface(frame, h, w, threshold=0.35)
|
||||
end_time = time.time()
|
||||
print("detectOneframe time: " + str(end_time - start_time))
|
||||
face_list = []
|
||||
name_list = []
|
||||
for i,det in enumerate(dets):
|
||||
boxes, score = det[:4], det[4]
|
||||
img_w = int(boxes[2] - boxes[0])
|
||||
img_h = int(boxes[3] - boxes[1])
|
||||
distace = int(abs(img_w - img_h) / 2)
|
||||
img_w1 = int(boxes[0]) - distace
|
||||
img_w2 = int(boxes[2]) + distace
|
||||
# print(img_w,img_h,distace,max_hw)
|
||||
if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
|
||||
img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
|
||||
img112 = cv2.resize(img112, (112, 112))
|
||||
# cv2.imwrite("./img/man"+str(count)+".jpg", img112)
|
||||
# count += 1
|
||||
face_list.append(img112)
|
||||
else:
|
||||
img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
|
||||
img112 = cv2.resize(img112, (112, 112))
|
||||
face_list.append(img112)
|
||||
if len(face_list) != 0:
|
||||
face_list = np.array(face_list)
|
||||
face_list = face_list.transpose((0,3,1,2))
|
||||
face_list = np.array(face_list, dtype=np.float32)
|
||||
face_list -= 127.5
|
||||
face_list /= 127.5
|
||||
print(face_list.shape)
|
||||
face_list = torch.from_numpy(face_list)
|
||||
start_time = time.time()
|
||||
|
||||
for face in face_list:
|
||||
face = face[np.newaxis, :, :, :]
|
||||
|
||||
name_list.append(findOne(face,model,k_v))
|
||||
end_time = time.time()
|
||||
print("findOneframe time: "+str(end_time-start_time))
|
||||
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
draw = ImageDraw.Draw(img_PIL)
|
||||
font = ImageFont.truetype("font.ttf",12)
|
||||
for i,det in enumerate(dets):
|
||||
boxes, score = det[:4], det[4]
|
||||
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||
# cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
|
||||
# (0, 225, 255), 1)
|
||||
name = name_list[i][:3]
|
||||
if not isinstance(name, np.unicode):
|
||||
name = name.decode('utf8')
|
||||
draw.text((int(boxes[0]), int(boxes[1])),name,fill=(0, 225, 255),font=font)
|
||||
draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=1)
|
||||
frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
|
||||
cv2.imshow('out', frame)
|
||||
out.write(frame)
|
||||
# Press Q on keyboard to stop recording
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
ret, frame = cap.read()
|
||||
cap.release()
|
||||
out.release()
|
||||
cv2.destroyAllWindows()
|
||||
def video_GPU():
|
||||
model = iresnet100()
|
||||
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||
model.eval()
|
||||
k_v = load_npy("student.npy")
|
||||
count = 0
|
||||
#cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
|
||||
cap = cv2.VideoCapture("software.mp4")
|
||||
ret, frame = cap.read()
|
||||
h, w = frame.shape[:2]
|
||||
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
|
||||
centerface = CenterFace()
|
||||
while ret:
|
||||
start_time = time.time()
|
||||
dets, lms = centerface(frame, h, w, threshold=0.35)
|
||||
end_time = time.time()
|
||||
print("detectOneframe time: " + str(end_time - start_time))
|
||||
face_list = []
|
||||
name_list = []
|
||||
for i,det in enumerate(dets):
|
||||
boxes, score = det[:4], det[4]
|
||||
img_w = int(boxes[2] - boxes[0])
|
||||
img_h = int(boxes[3] - boxes[1])
|
||||
distace = int(abs(img_w - img_h) / 2)
|
||||
img_w1 = int(boxes[0]) - distace
|
||||
img_w2 = int(boxes[2]) + distace
|
||||
# print(img_w,img_h,distace,max_hw)
|
||||
if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
|
||||
img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
|
||||
img112 = cv2.resize(img112, (112, 112))
|
||||
# cv2.imwrite("./img/man"+str(count)+".jpg", img112)
|
||||
# count += 1
|
||||
face_list.append(img112)
|
||||
else:
|
||||
img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
|
||||
img112 = cv2.resize(img112, (112, 112))
|
||||
face_list.append(img112)
|
||||
if len(face_list) != 0:
|
||||
face_list = np.array(face_list)
|
||||
face_list = face_list.transpose((0,3,1,2))
|
||||
face_list = np.array(face_list, dtype=np.float32)
|
||||
face_list -= 127.5
|
||||
face_list /= 127.5
|
||||
print(face_list.shape)
|
||||
face_list = torch.from_numpy(face_list)
|
||||
start_time = time.time()
|
||||
name_list = findAll(face_list, model, k_v)
|
||||
# for face in face_list:
|
||||
# face = face[np.newaxis, :, :, :]
|
||||
#
|
||||
# name_list.append(findOne(face,model,k_v))
|
||||
end_time = time.time()
|
||||
print("findOneframe time: "+str(end_time-start_time))
|
||||
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
draw = ImageDraw.Draw(img_PIL)
|
||||
font = ImageFont.truetype("font.ttf",18)
|
||||
for i,det in enumerate(dets):
|
||||
boxes, score = det[:4], det[4]
|
||||
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||
# cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
|
||||
# (0, 225, 255), 1)
|
||||
name = name_list[i][:3]
|
||||
if not isinstance(name, np.unicode):
|
||||
name = name.decode('utf8')
|
||||
draw.text((int(boxes[0]), int(boxes[1])),name,fill=(255, 0, 0),font=font)
|
||||
draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=2)
|
||||
frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
|
||||
cv2.imshow('out', frame)
|
||||
out.write(frame)
|
||||
# Press Q on keyboard to stop recording
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
ret, frame = cap.read()
|
||||
cap.release()
|
||||
out.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
def video_GPU_retinaface():
|
||||
model = iresnet100()
|
||||
model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||
model.eval()
|
||||
k_v = load_npy("student.npy")
|
||||
count = 0
|
||||
#cap = cv2.VideoCapture("http://ivi.bupt.edu.cn/hls/cctv6hd.m3u8")
|
||||
cap = cv2.VideoCapture("software.mp4")
|
||||
ret, frame = cap.read()
|
||||
h, w = frame.shape[:2]
|
||||
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
out = cv2.VideoWriter('ttt.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
|
||||
centerface = CenterFace()
|
||||
while ret:
|
||||
start_time = time.time()
|
||||
dets, lms = centerface(frame, h, w, threshold=0.35)
|
||||
end_time = time.time()
|
||||
print("detectOneframe time: " + str(end_time - start_time))
|
||||
face_list = []
|
||||
name_list = []
|
||||
print(dets.shape)
|
||||
for i,det in enumerate(dets):
|
||||
boxes, score = det[:4], det[4]
|
||||
img_w = int(boxes[2] - boxes[0])
|
||||
img_h = int(boxes[3] - boxes[1])
|
||||
distace = int(abs(img_w - img_h) / 2)
|
||||
img_w1 = int(boxes[0]) - distace
|
||||
img_w2 = int(boxes[2]) + distace
|
||||
# print(img_w,img_h,distace,max_hw)
|
||||
if img_w <= img_h and img_w1 >= 0 and img_w2 <= frame.shape[1]:
|
||||
img112 = frame[int(boxes[1]):int(boxes[3]), img_w1:img_w2, :]
|
||||
img112 = cv2.resize(img112, (112, 112))
|
||||
# cv2.imwrite("./img/man"+str(count)+".jpg", img112)
|
||||
# count += 1
|
||||
face_list.append(img112)
|
||||
else:
|
||||
img112 = frame[int(boxes[1]):int(boxes[3]), int(boxes[0]):int(boxes[2]), :]
|
||||
img112 = cv2.resize(img112, (112, 112))
|
||||
face_list.append(img112)
|
||||
if len(face_list) != 0:
|
||||
face_list = np.array(face_list)
|
||||
face_list = face_list.transpose((0,3,1,2))
|
||||
face_list = np.array(face_list, dtype=np.float32)
|
||||
face_list -= 127.5
|
||||
face_list /= 127.5
|
||||
print(face_list.shape)
|
||||
face_list = torch.from_numpy(face_list)
|
||||
start_time = time.time()
|
||||
name_list = findAll(face_list, model, k_v)
|
||||
# for face in face_list:
|
||||
# face = face[np.newaxis, :, :, :]
|
||||
#
|
||||
# name_list.append(findOne(face,model,k_v))
|
||||
end_time = time.time()
|
||||
print("findOneframe time: "+str(end_time-start_time))
|
||||
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
draw = ImageDraw.Draw(img_PIL)
|
||||
font = ImageFont.truetype("font.ttf",18)
|
||||
for i,det in enumerate(dets):
|
||||
boxes, score = det[:4], det[4]
|
||||
# cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||
# cv2.putText(frame, name_list[i], (int(boxes[0]), int(boxes[1])), cv2.FONT_HERSHEY_COMPLEX, 0.4,
|
||||
# (0, 225, 255), 1)
|
||||
name = name_list[i][:3]
|
||||
if not isinstance(name, np.unicode):
|
||||
name = name.decode('utf8')
|
||||
draw.text((int(boxes[0]), int(boxes[1])),name,fill=(255, 0, 0),font=font)
|
||||
draw.rectangle((int(boxes[0]), int(boxes[1]),int(boxes[2]), int(boxes[3])),outline="green",width=2)
|
||||
frame = cv2.cvtColor(np.asarray(img_PIL),cv2.COLOR_RGB2BGR)
|
||||
cv2.imshow('out', frame)
|
||||
out.write(frame)
|
||||
# Press Q on keyboard to stop recording
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
ret, frame = cap.read()
|
||||
cap.release()
|
||||
out.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
video_GPU_retinaface()
|
||||
#video_GPU()
|
||||
#show()
|
111
requirements.txt
Normal file
@ -0,0 +1,111 @@
|
||||
Package Version
|
||||
---------------------- -----------
|
||||
appdirs 1.4.4
|
||||
attrs 21.2.0
|
||||
backcall 0.2.0
|
||||
beautifulsoup4 4.9.3
|
||||
certifi 2021.5.30
|
||||
cffi 1.14.0
|
||||
chardet 4.0.0
|
||||
click 8.0.1
|
||||
conda 4.9.1
|
||||
conda-build 3.20.5
|
||||
conda-package-handling 1.7.0
|
||||
cryptography 2.9.2
|
||||
cycler 0.10.0
|
||||
dataclasses 0.6
|
||||
decorator 4.4.2
|
||||
dnspython 2.0.0
|
||||
faiss-cpu 1.7.1
|
||||
filelock 3.0.12
|
||||
fire 0.4.0
|
||||
Flask 1.1.2
|
||||
future 0.18.2
|
||||
glob2 0.7
|
||||
graphsurgeon 0.4.5
|
||||
graphviz 0.8.4
|
||||
h5py 3.3.0
|
||||
idna 2.10
|
||||
imageio 2.9.0
|
||||
iniconfig 1.1.1
|
||||
ipython 7.18.1
|
||||
ipython-genutils 0.2.0
|
||||
itsdangerous 2.0.1
|
||||
jedi 0.17.2
|
||||
Jinja2 3.0.1
|
||||
joblib 1.0.1
|
||||
kiwisolver 1.3.1
|
||||
libarchive-c 2.9
|
||||
Mako 1.1.4
|
||||
MarkupSafe 2.0.1
|
||||
matplotlib 3.4.1
|
||||
mkl-fft 1.2.0
|
||||
mkl-random 1.1.1
|
||||
mkl-service 2.3.0
|
||||
mxnet 1.8.0.post0
|
||||
networkx 2.5.1
|
||||
nltk 3.6
|
||||
numpy 1.20.3
|
||||
olefile 0.46
|
||||
opencv-python 4.5.1.48
|
||||
packaging 21.0
|
||||
pandas 1.2.4
|
||||
parso 0.7.0
|
||||
pexpect 4.8.0
|
||||
pickleshare 0.7.5
|
||||
Pillow 8.0.0
|
||||
pip 20.0.2
|
||||
pkginfo 1.6.0
|
||||
pluggy 1.0.0
|
||||
prefetch-generator 1.0.1
|
||||
prompt-toolkit 3.0.8
|
||||
protobuf 3.15.8
|
||||
psutil 5.7.2
|
||||
ptyprocess 0.6.0
|
||||
py 1.9.0
|
||||
pycosat 0.6.3
|
||||
pycparser 2.20
|
||||
pycuda 2021.1
|
||||
Pygments 2.7.1
|
||||
pyOpenSSL 19.1.0
|
||||
pyparsing 2.4.7
|
||||
PySocks 1.7.1
|
||||
pytest 6.2.5
|
||||
python-dateutil 2.8.1
|
||||
python-etcd 0.4.5
|
||||
pytools 2021.2.6
|
||||
pytz 2020.1
|
||||
PyWavelets 1.1.1
|
||||
PyYAML 5.3.1
|
||||
pyzmq 22.1.0
|
||||
regex 2021.8.3
|
||||
requests 2.25.1
|
||||
ruamel-yaml 0.15.87
|
||||
scikit-image 0.18.1
|
||||
scipy 1.6.3
|
||||
seaborn 0.11.1
|
||||
setuptools 57.1.0
|
||||
six 1.14.0
|
||||
soupsieve 2.0.1
|
||||
tensorboard-logger 0.1.0
|
||||
tensorrt 7.2.3.4
|
||||
termcolor 1.1.0
|
||||
tifffile 2021.4.8
|
||||
toml 0.10.2
|
||||
torch 1.7.1
|
||||
torch2trt 0.2.0
|
||||
torchelastic 0.2.1
|
||||
torchfile 0.1.0
|
||||
torchtext 0.8.0
|
||||
torchvision 0.8.2
|
||||
tornado 6.1
|
||||
tqdm 4.46.0
|
||||
traitlets 5.0.5
|
||||
typing-extensions 3.7.4.3
|
||||
uff 0.6.9
|
||||
urllib3 1.26.5
|
||||
visdom 0.1.8
|
||||
wcwidth 0.2.5
|
||||
websocket-client 1.1.0
|
||||
Werkzeug 2.0.1
|
||||
wheel 0.34.2
|
762
retinaface_arcface.py
Normal file
@ -0,0 +1,762 @@
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import argparse
|
||||
import re
|
||||
|
||||
import faiss
|
||||
import torch
|
||||
import torch.backends.cudnn as cudnn
|
||||
import numpy as np
|
||||
from data import cfg_mnet, cfg_re50
|
||||
from face_api import create_database_from_img, load_arcface_model, findAll
|
||||
from layers.functions.prior_box import PriorBox
|
||||
from utils.nms.py_cpu_nms import py_cpu_nms
|
||||
import cv2
|
||||
from models.retinaface import RetinaFace
|
||||
from utils.box_utils import decode, decode_landm
|
||||
import time
|
||||
from face_api import load_arcface_model, load_npy
|
||||
from skimage import transform as trans
|
||||
from backbones import iresnet100, iresnet18
|
||||
#from create_database import findOne, load_npy,findAll
|
||||
from PIL import Image, ImageDraw,ImageFont
|
||||
|
||||
parser = argparse.ArgumentParser(description='Retinaface')
|
||||
|
||||
parser.add_argument('-m', '--trained_model', default='./weights/mobilenet0.25_Final.pth',
|
||||
type=str, help='Trained state_dict file path to open')
|
||||
parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
|
||||
parser.add_argument('--cpu', action="store_true", default=False if torch.cuda.is_available() else True, help='Use cpu inference')
|
||||
parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold')
|
||||
parser.add_argument('--top_k', default=5000, type=int, help='top_k')
|
||||
parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold')
|
||||
parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k')
|
||||
parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
|
||||
parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def check_keys(model, pretrained_state_dict):
|
||||
ckpt_keys = set(pretrained_state_dict.keys())
|
||||
model_keys = set(model.state_dict().keys())
|
||||
used_pretrained_keys = model_keys & ckpt_keys
|
||||
unused_pretrained_keys = ckpt_keys - model_keys
|
||||
missing_keys = model_keys - ckpt_keys
|
||||
print('Missing keys:{}'.format(len(missing_keys)))
|
||||
print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
|
||||
print('Used keys:{}'.format(len(used_pretrained_keys)))
|
||||
assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
|
||||
return True
|
||||
|
||||
|
||||
def remove_prefix(state_dict, prefix):
|
||||
''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
|
||||
print('remove prefix \'{}\''.format(prefix))
|
||||
f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
|
||||
return {f(key): value for key, value in state_dict.items()}
|
||||
|
||||
|
||||
def load_model(model, pretrained_path, load_to_cpu):
|
||||
print('Loading pretrained model from {}'.format(pretrained_path))
|
||||
if load_to_cpu:
|
||||
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
|
||||
else:
|
||||
device = torch.cuda.current_device()
|
||||
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
|
||||
if "state_dict" in pretrained_dict.keys():
|
||||
pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
|
||||
else:
|
||||
pretrained_dict = remove_prefix(pretrained_dict, 'module.')
|
||||
check_keys(model, pretrained_dict)
|
||||
model.load_state_dict(pretrained_dict, strict=False)
|
||||
return model
|
||||
|
||||
def image_to112x112_retinaface():
|
||||
torch.set_grad_enabled(False)
|
||||
cfg = None
|
||||
if args.network == "mobile0.25":
|
||||
cfg = cfg_mnet
|
||||
elif args.network == "resnet50":
|
||||
cfg = cfg_re50
|
||||
# net and model
|
||||
net = RetinaFace(cfg=cfg, phase = 'test')
|
||||
net = load_model(net, args.trained_model, args.cpu)
|
||||
net.eval()
|
||||
print('Finished loading model!')
|
||||
#print(net)
|
||||
cudnn.benchmark = True
|
||||
device = torch.device("cpu" if args.cpu else "cuda")
|
||||
net = net.to(device)
|
||||
|
||||
resize = 1
|
||||
input_path = r"D:\Download\out\cfp"
|
||||
output_path = "D:\Download\out\cfp_align"
|
||||
folder1 = os.listdir(input_path)
|
||||
count = 0
|
||||
count2 =0
|
||||
for f in folder1:
|
||||
output_name_path = os.path.join(output_path, f)
|
||||
if os.path.exists(output_name_path) == 0:
|
||||
os.makedirs(output_name_path)
|
||||
img_name_path = os.path.join(input_path, f)
|
||||
img_list = os.listdir(img_name_path)
|
||||
|
||||
for img in img_list:
|
||||
count2 +=1
|
||||
print(count2)
|
||||
path = os.path.join(img_name_path, img)
|
||||
align_img_path = os.path.join(output_name_path, img)
|
||||
# print(path)
|
||||
frame = cv2.imread(path)
|
||||
h, w = frame.shape[:2]
|
||||
img = np.float32(frame)
|
||||
im_height, im_width, _ = img.shape
|
||||
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
|
||||
img -= (104, 117, 123)
|
||||
img = img.transpose(2, 0, 1)
|
||||
img = torch.from_numpy(img).unsqueeze(0)
|
||||
img = img.to(device)
|
||||
scale = scale.to(device)
|
||||
|
||||
tic = time.time()
|
||||
loc, conf, landms = net(img) # forward pass
|
||||
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||
|
||||
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
|
||||
priors = priorbox.forward()
|
||||
priors = priors.to(device)
|
||||
prior_data = priors.data
|
||||
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||
boxes = boxes * scale / resize
|
||||
boxes = boxes.cpu().numpy()
|
||||
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||
img.shape[3], img.shape[2]])
|
||||
scale1 = scale1.to(device)
|
||||
landms = landms * scale1 / resize
|
||||
landms = landms.cpu().numpy()
|
||||
|
||||
# ignore low scores
|
||||
inds = np.where(scores > args.confidence_threshold)[0]
|
||||
boxes = boxes[inds]
|
||||
landms = landms[inds]
|
||||
scores = scores[inds]
|
||||
|
||||
# keep top-K before NMS
|
||||
order = scores.argsort()[::-1][:args.top_k]
|
||||
boxes = boxes[order]
|
||||
landms = landms[order]
|
||||
scores = scores[order]
|
||||
|
||||
# do NMS
|
||||
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||
dets = dets[keep, :]
|
||||
landms = landms[keep]
|
||||
|
||||
# keep top-K faster NMS
|
||||
dets = dets[:args.keep_top_k, :]
|
||||
landms = landms[:args.keep_top_k, :]
|
||||
|
||||
dets = np.concatenate((dets, landms), axis=1)
|
||||
score = 500
|
||||
# show image
|
||||
if args.save_image:
|
||||
dst = []
|
||||
for i, det in enumerate(dets):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
center_x = (det[2] + det[0]) / 2
|
||||
center_y = (det[3] + det[1]) / 2
|
||||
if abs(center_x - 125) + abs(center_y - 125) < score:
|
||||
score = abs(center_x - 125) + abs(center_y - 125)
|
||||
dst = np.reshape(landms[i], (5, 2))
|
||||
if len(dst) > 0:
|
||||
src1 = np.array([
|
||||
[38.3814, 51.6963],
|
||||
[73.6186, 51.5014],
|
||||
[56.1120, 71.7366],
|
||||
[41.6361, 92.3655],
|
||||
[70.8167, 92.2041]], dtype=np.float32)
|
||||
tform = trans.SimilarityTransform()
|
||||
tform.estimate(dst, src1)
|
||||
M = tform.params[0:2, :]
|
||||
|
||||
if w < 112 or h < 112:
|
||||
count += 1
|
||||
#print(align_img_path)
|
||||
continue
|
||||
frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||
img112 = frame[0:112, 0:112, :]
|
||||
cv2.imwrite(align_img_path, img112)
|
||||
print(">112 number"+str(count))
|
||||
|
||||
def sfz_to112x112_retinaface(arcface_model,cpu_or_cuda):
|
||||
torch.set_grad_enabled(False)
|
||||
cfg = None
|
||||
if args.network == "mobile0.25":
|
||||
cfg = cfg_mnet
|
||||
elif args.network == "resnet50":
|
||||
cfg = cfg_re50
|
||||
# net and model
|
||||
net = RetinaFace(cfg=cfg, phase = 'test')
|
||||
net = load_model(net, args.trained_model, args.cpu)
|
||||
net.eval()
|
||||
print('Finished loading model!')
|
||||
#print(net)
|
||||
cudnn.benchmark = True
|
||||
device = torch.device("cpu" if args.cpu else "cuda")
|
||||
net = net.to(device)
|
||||
|
||||
resize = 1
|
||||
input_path = r"D:\Download\out\alig_students_all"
|
||||
output_path = r"D:\Download\out\alig_students_all"
|
||||
folder1 = os.listdir(input_path)
|
||||
count = 0
|
||||
count2 =0
|
||||
print(len(folder1))
|
||||
# print(folder1[0][:-4])
|
||||
# return 0
|
||||
order_img = []
|
||||
order_name = []
|
||||
tic = time.time()
|
||||
for img_name in folder1[:2500]:
|
||||
# output_name_path = os.path.join(output_path, img_name)
|
||||
# if os.path.exists(output_name_path) == 0:
|
||||
# os.makedirs(output_name_path)
|
||||
img_name_path = os.path.join(input_path, img_name)
|
||||
#img_list = os.listdir(img_name_path)
|
||||
count2 += 1
|
||||
if (count2 % 1000 == 0):
|
||||
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||
print(count2)
|
||||
if len(order_img) > 0:
|
||||
order_img = np.array(order_img)
|
||||
order_img = order_img.transpose((0, 3, 1, 2))
|
||||
order_img = np.array(order_img, dtype=np.float32)
|
||||
order_img -= 127.5
|
||||
order_img /= 127.5
|
||||
# order_img = np.array(order_img)
|
||||
# print(order_img.shape)
|
||||
# print(len(order_name))
|
||||
create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
|
||||
order_img = []
|
||||
order_name = []
|
||||
tic = time.time()
|
||||
|
||||
# if img_name[19] != "1":
|
||||
# continue
|
||||
|
||||
#path = os.path.join(img_name_path, img)
|
||||
align_img_path = os.path.join(output_path, img_name)
|
||||
# print(path)
|
||||
#frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||
try:
|
||||
frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||
h, w, d = frame.shape
|
||||
except AttributeError:
|
||||
print(img_name)
|
||||
continue
|
||||
if d == 1:
|
||||
continue
|
||||
factor = h / w
|
||||
if (w > 1000):
|
||||
frame = cv2.resize(frame, (600, int(600 * factor)))
|
||||
h, w = frame.shape[:2]
|
||||
img = np.float32(frame)
|
||||
im_height, im_width, _ = img.shape
|
||||
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
|
||||
img -= (104, 117, 123)
|
||||
img = img.transpose(2, 0, 1)
|
||||
img = torch.from_numpy(img).unsqueeze(0)
|
||||
img = img.to(device)
|
||||
scale = scale.to(device)
|
||||
|
||||
#tic = time.time()
|
||||
loc, conf, landms = net(img) # forward pass
|
||||
#print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||
|
||||
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
|
||||
priors = priorbox.forward()
|
||||
priors = priors.to(device)
|
||||
prior_data = priors.data
|
||||
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||
boxes = boxes * scale / resize
|
||||
boxes = boxes.cpu().numpy()
|
||||
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||
img.shape[3], img.shape[2]])
|
||||
scale1 = scale1.to(device)
|
||||
landms = landms * scale1 / resize
|
||||
landms = landms.cpu().numpy()
|
||||
|
||||
# ignore low scores
|
||||
inds = np.where(scores > args.confidence_threshold)[0]
|
||||
boxes = boxes[inds]
|
||||
landms = landms[inds]
|
||||
scores = scores[inds]
|
||||
|
||||
# keep top-K before NMS
|
||||
order = scores.argsort()[::-1][:args.top_k]
|
||||
boxes = boxes[order]
|
||||
landms = landms[order]
|
||||
scores = scores[order]
|
||||
|
||||
# do NMS
|
||||
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||
dets = dets[keep, :]
|
||||
landms = landms[keep]
|
||||
|
||||
# keep top-K faster NMS
|
||||
dets = dets[:args.keep_top_k, :]
|
||||
landms = landms[:args.keep_top_k, :]
|
||||
|
||||
dets = np.concatenate((dets, landms), axis=1)
|
||||
score = 500
|
||||
# show image
|
||||
if args.save_image:
|
||||
dst = []
|
||||
for i, det in enumerate(dets):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
# center_x = (det[2] + det[0]) / 2
|
||||
# center_y = (det[3] + det[1]) / 2
|
||||
# if abs(center_x - 125) + abs(center_y - 125) < score:
|
||||
# score = abs(center_x - 125) + abs(center_y - 125)
|
||||
dst = np.reshape(landms[i], (5, 2))
|
||||
if len(dst) > 0:
|
||||
src1 = np.array([
|
||||
[38.3814, 51.6963],
|
||||
[73.6186, 51.5014],
|
||||
[56.1120, 71.7366],
|
||||
[41.6361, 92.3655],
|
||||
[70.8167, 92.2041]], dtype=np.float32)
|
||||
tform = trans.SimilarityTransform()
|
||||
tform.estimate(dst, src1)
|
||||
M = tform.params[0:2, :]
|
||||
|
||||
if w < 112 or h < 112:
|
||||
count += 1
|
||||
print(img_name_path)
|
||||
continue
|
||||
frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||
img112 = frame[0:112, 0:112, :]
|
||||
order_img.append(img112)
|
||||
order_name.append(img_name[:-6])
|
||||
#cv2.imencode('.jpg', img112)[1].tofile(align_img_path)
|
||||
#cv2.imwrite(align_img_path, img112)
|
||||
|
||||
print(">112 number"+str(count))
|
||||
if len(order_img) > 0:
|
||||
order_img = np.array(order_img)
|
||||
order_img = order_img.transpose((0, 3, 1, 2))
|
||||
order_img = np.array(order_img, dtype=np.float32)
|
||||
order_img -= 127.5
|
||||
order_img /= 127.5
|
||||
#order_img = np.array(order_img)
|
||||
# print(order_img.shape)
|
||||
# print(len(order_name))
|
||||
create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
|
||||
|
||||
def count_accuracy(arcface_model,cpu_or_cuda,index ,database_name_list):
|
||||
torch.set_grad_enabled(False)
|
||||
cfg = None
|
||||
if args.network == "mobile0.25":
|
||||
cfg = cfg_mnet
|
||||
elif args.network == "resnet50":
|
||||
cfg = cfg_re50
|
||||
# net and model
|
||||
net = RetinaFace(cfg=cfg, phase = 'test')
|
||||
net = load_model(net, args.trained_model, args.cpu)
|
||||
net.eval()
|
||||
print('Finished loading model!')
|
||||
#print(net)
|
||||
cudnn.benchmark = True
|
||||
device = torch.device("cpu" if args.cpu else "cuda")
|
||||
net = net.to(device)
|
||||
|
||||
resize = 1
|
||||
input_path = r"../face/czrkzp2"
|
||||
folder1 = os.listdir(input_path)
|
||||
count = 0
|
||||
count2 =0
|
||||
print(len(folder1))
|
||||
# print(folder1[0][:-4])
|
||||
# return 0
|
||||
order_img = []
|
||||
order_name = []
|
||||
tic = time.time()
|
||||
for img_name in folder1[:15000]:
|
||||
# output_name_path = os.path.join(output_path, img_name)
|
||||
# if os.path.exists(output_name_path) == 0:
|
||||
# os.makedirs(output_name_path)
|
||||
img_name_path = os.path.join(input_path, img_name)
|
||||
#img_list = os.listdir(img_name_path)
|
||||
count2 += 1
|
||||
if (count2 % 5000 == 0):
|
||||
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||
print(count2)
|
||||
# if len(order_img) > 0:
|
||||
# order_img = np.array(order_img)
|
||||
# order_img = order_img.transpose((0, 3, 1, 2))
|
||||
# order_img = np.array(order_img, dtype=np.float32)
|
||||
# order_img -= 127.5
|
||||
# order_img /= 127.5
|
||||
# # order_img = np.array(order_img)
|
||||
# # print(order_img.shape)
|
||||
# # print(len(order_name))
|
||||
# create_database_from_img(order_name, order_img, arcface_model, "./Database/sfz_test.npy", cpu_or_cuda)
|
||||
# order_img = []
|
||||
# order_name = []
|
||||
# tic = time.time()
|
||||
|
||||
if img_name[19] == "1":
|
||||
continue
|
||||
|
||||
#path = os.path.join(img_name_path, img)
|
||||
#align_img_path = os.path.join(output_path, img_name)
|
||||
# print(path)
|
||||
#frame = cv2.imdecode(np.fromfile(img_name_path, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||
try:
|
||||
frame = cv2.imread(img_name_path)
|
||||
h, w, d = frame.shape
|
||||
except AttributeError:
|
||||
print(img_name)
|
||||
continue
|
||||
if d == 1:
|
||||
continue
|
||||
factor = h / w
|
||||
if (w > 1000):
|
||||
frame = cv2.resize(frame, (600, int(600 * factor)))
|
||||
h, w = frame.shape[:2]
|
||||
img = np.float32(frame)
|
||||
im_height, im_width, _ = img.shape
|
||||
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
|
||||
img -= (104, 117, 123)
|
||||
img = img.transpose(2, 0, 1)
|
||||
img = torch.from_numpy(img).unsqueeze(0)
|
||||
img = img.to(device)
|
||||
scale = scale.to(device)
|
||||
|
||||
#tic = time.time()
|
||||
loc, conf, landms = net(img) # forward pass
|
||||
#print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||
|
||||
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
|
||||
priors = priorbox.forward()
|
||||
priors = priors.to(device)
|
||||
prior_data = priors.data
|
||||
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||
boxes = boxes * scale / resize
|
||||
boxes = boxes.cpu().numpy()
|
||||
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||
img.shape[3], img.shape[2]])
|
||||
scale1 = scale1.to(device)
|
||||
landms = landms * scale1 / resize
|
||||
landms = landms.cpu().numpy()
|
||||
|
||||
# ignore low scores
|
||||
inds = np.where(scores > args.confidence_threshold)[0]
|
||||
boxes = boxes[inds]
|
||||
landms = landms[inds]
|
||||
scores = scores[inds]
|
||||
|
||||
# keep top-K before NMS
|
||||
order = scores.argsort()[::-1][:args.top_k]
|
||||
boxes = boxes[order]
|
||||
landms = landms[order]
|
||||
scores = scores[order]
|
||||
|
||||
# do NMS
|
||||
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||
dets = dets[keep, :]
|
||||
landms = landms[keep]
|
||||
|
||||
# keep top-K faster NMS
|
||||
dets = dets[:args.keep_top_k, :]
|
||||
landms = landms[:args.keep_top_k, :]
|
||||
|
||||
dets = np.concatenate((dets, landms), axis=1)
|
||||
score = 500
|
||||
# show image
|
||||
if args.save_image:
|
||||
dst = []
|
||||
for i, det in enumerate(dets):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
# center_x = (det[2] + det[0]) / 2
|
||||
# center_y = (det[3] + det[1]) / 2
|
||||
# if abs(center_x - 125) + abs(center_y - 125) < score:
|
||||
# score = abs(center_x - 125) + abs(center_y - 125)
|
||||
dst = np.reshape(landms[i], (5, 2))
|
||||
if len(dst) > 0:
|
||||
src1 = np.array([
|
||||
[38.3814, 51.6963],
|
||||
[73.6186, 51.5014],
|
||||
[56.1120, 71.7366],
|
||||
[41.6361, 92.3655],
|
||||
[70.8167, 92.2041]], dtype=np.float32)
|
||||
tform = trans.SimilarityTransform()
|
||||
tform.estimate(dst, src1)
|
||||
M = tform.params[0:2, :]
|
||||
|
||||
if w < 112 or h < 112:
|
||||
count += 1
|
||||
print(img_name_path)
|
||||
continue
|
||||
frame = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||
img112 = frame[0:112, 0:112, :]
|
||||
order_img.append(img112)
|
||||
order_name.append(img_name)
|
||||
#cv2.imencode('.jpg', img112)[1].tofile(align_img_path)
|
||||
#cv2.imwrite(align_img_path, img112)
|
||||
|
||||
print(">112 number"+str(count))
|
||||
if len(order_img) > 0:
|
||||
order_img = np.array(order_img)
|
||||
order_img = order_img.transpose((0, 3, 1, 2))
|
||||
order_img = np.array(order_img, dtype=np.float32)
|
||||
order_img -= 127.5
|
||||
order_img /= 127.5
|
||||
#order_img = np.array(order_img)
|
||||
# print(order_img.shape)
|
||||
# print(len(order_name))
|
||||
count_acc(order_name,order_img,arcface_model,index ,database_name_list,cpu_or_cuda)
|
||||
|
||||
def count_acc(order_name,order_img,model,index ,database_name_list,cpu_or_cuda):
|
||||
pred_name = []
|
||||
unknown = []
|
||||
print(order_img.shape)
|
||||
|
||||
start_time = time.time()
|
||||
# order_img = torch.from_numpy(order_img)
|
||||
# order_img = order_img.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||
batch = 256
|
||||
now = 0
|
||||
number = len(order_img)
|
||||
# number = 1400
|
||||
for i in range(number):
|
||||
unknown.append("unknown")
|
||||
|
||||
while now < number:
|
||||
if now + batch < number:
|
||||
name = findAll(order_img[now:now + batch], model, index ,database_name_list, cpu_or_cuda)
|
||||
else:
|
||||
name = findAll(order_img[now:number], model, index ,database_name_list, cpu_or_cuda)
|
||||
now = now + batch
|
||||
for na in name:
|
||||
pred_name.append(na)
|
||||
print("batch" + str(now))
|
||||
end_time = time.time()
|
||||
print("findAll time: " + str(end_time - start_time))
|
||||
# print(len(pred_name))
|
||||
right = 0
|
||||
for i, name in enumerate(pred_name):
|
||||
if pred_name[i] == order_name[i][:-6]:
|
||||
right += 1
|
||||
filed = 0
|
||||
for i, name in enumerate(pred_name):
|
||||
if pred_name[i] == unknown[i]:
|
||||
filed += 1
|
||||
#print(order_name[i])
|
||||
error = 0
|
||||
print("----------------")
|
||||
for i, name in enumerate(pred_name):
|
||||
if pred_name[i] != order_name[i][:-6]:
|
||||
error += 1
|
||||
#print(order_name[i] + " " + pred_name[i] + " ")
|
||||
#print(order_name)
|
||||
#print(pred_name)
|
||||
print("total:" + str(number))
|
||||
print("right:" + str(right+filed) + " rate:" + str((filed+right) / number))
|
||||
#print("filed:" + str(filed) + " rate:" + str(filed / number))
|
||||
print("error:" + str(error - filed) + " rate:" + str((error - filed) / number))
|
||||
# if __name__ == '__main__':
|
||||
# torch.set_grad_enabled(False)
|
||||
# cfg = None
|
||||
# if args.network == "mobile0.25":
|
||||
# cfg = cfg_mnet
|
||||
# elif args.network == "resnet50":
|
||||
# cfg = cfg_re50
|
||||
# # net and model
|
||||
# net = RetinaFace(cfg=cfg, phase = 'test')
|
||||
# net = load_model(net, args.trained_model, args.cpu)
|
||||
# net.eval()
|
||||
# print('Finished loading model!')
|
||||
# #print(net)
|
||||
# cudnn.benchmark = True
|
||||
# device = torch.device("cpu" if args.cpu else "cuda")
|
||||
# net = net.to(device)
|
||||
#
|
||||
# resize = 1
|
||||
#
|
||||
# # testing begin
|
||||
# cap = cv2.VideoCapture("rtsp://47.108.74.82:8557/h264")
|
||||
# ret, frame = cap.read()
|
||||
# h, w = frame.shape[:2]
|
||||
# fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
# size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||
# int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||
# #out = cv2.VideoWriter('out.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
|
||||
# out = cv2.VideoWriter('ttttttt.avi', cv2.VideoWriter_fourcc(*'XVID'), fps, size)
|
||||
# number = 0
|
||||
#
|
||||
# model = iresnet100()
|
||||
# model.load_state_dict(torch.load("./model/backbone100.pth", map_location="cpu"))
|
||||
# model.eval()
|
||||
# k_v = load_npy("./Database/student.npy")
|
||||
#
|
||||
# while ret:
|
||||
# tic = time.time()
|
||||
# img = np.float32(frame)
|
||||
# im_height, im_width, _ = img.shape
|
||||
# scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
|
||||
# img -= (104, 117, 123)
|
||||
# img = img.transpose(2, 0, 1)
|
||||
# img = torch.from_numpy(img).unsqueeze(0)
|
||||
# img = img.to(device)
|
||||
# scale = scale.to(device)
|
||||
#
|
||||
# loc, conf, landms = net(img) # forward pass
|
||||
#
|
||||
#
|
||||
# priorbox = PriorBox(cfg, image_size=(im_height, im_width))
|
||||
# priors = priorbox.forward()
|
||||
# priors = priors.to(device)
|
||||
# prior_data = priors.data
|
||||
# boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||
# boxes = boxes * scale / resize
|
||||
# boxes = boxes.cpu().numpy()
|
||||
# scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||
# landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||
# scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||
# img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||
# img.shape[3], img.shape[2]])
|
||||
# scale1 = scale1.to(device)
|
||||
# landms = landms * scale1 / resize
|
||||
# landms = landms.cpu().numpy()
|
||||
#
|
||||
# # ignore low scores
|
||||
# inds = np.where(scores > args.confidence_threshold)[0]
|
||||
# boxes = boxes[inds]
|
||||
# landms = landms[inds]
|
||||
# scores = scores[inds]
|
||||
#
|
||||
# # keep top-K before NMS
|
||||
# order = scores.argsort()[::-1][:args.top_k]
|
||||
# boxes = boxes[order]
|
||||
# landms = landms[order]
|
||||
# scores = scores[order]
|
||||
#
|
||||
# # do NMS
|
||||
# dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
# keep = py_cpu_nms(dets, args.nms_threshold)
|
||||
# # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||
# dets = dets[keep, :]
|
||||
# landms = landms[keep]
|
||||
#
|
||||
# # keep top-K faster NMS
|
||||
# dets = dets[:args.keep_top_k, :]
|
||||
# landms = landms[:args.keep_top_k, :]
|
||||
#
|
||||
# dets = np.concatenate((dets, landms), axis=1)
|
||||
# face_list = []
|
||||
# name_list = []
|
||||
# #print(dets[:4])
|
||||
# print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||
# start_time = time.time()
|
||||
# for i, det in enumerate(dets):
|
||||
# if det[4] < args.vis_thres:
|
||||
# continue
|
||||
# boxes, score = det[:4], det[4]
|
||||
# dst = np.reshape(landms[i],(5,2))
|
||||
# #print(dst.shape)
|
||||
# src1 = np.array([
|
||||
# [38.3814, 51.6963],
|
||||
# [73.6186, 51.5014],
|
||||
# [56.1120, 71.7366],
|
||||
# [41.6361, 92.3655],
|
||||
# [70.8167, 92.2041]], dtype=np.float32)
|
||||
# #print(src1.shape)
|
||||
# tform = trans.SimilarityTransform()
|
||||
# tform.estimate(dst, src1)
|
||||
# M = tform.params[0:2, :]
|
||||
# frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||
# img112 = frame2[0:112, 0:112, :]
|
||||
# # cv2.imwrite("./img/man"+str(count)+".jpg", img112)
|
||||
# # count += 1
|
||||
# face_list.append(img112)
|
||||
#
|
||||
# if len(face_list) != 0:
|
||||
# face_list = np.array(face_list)
|
||||
# face_list = face_list.transpose((0, 3, 1, 2))
|
||||
# face_list = np.array(face_list, dtype=np.float32)
|
||||
# face_list -= 127.5
|
||||
# face_list /= 127.5
|
||||
# print(face_list.shape)
|
||||
# face_list = torch.from_numpy(face_list)
|
||||
#
|
||||
# name_list = findAll(face_list, model, k_v)
|
||||
# end_time = time.time()
|
||||
# print("findOneframe time: " + str(end_time - start_time))
|
||||
# start_time = time.time()
|
||||
# img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
# draw = ImageDraw.Draw(img_PIL)
|
||||
# font = ImageFont.truetype("font.ttf", 22)
|
||||
# for i, det in enumerate(dets):
|
||||
# if det[4] < args.vis_thres:
|
||||
# continue
|
||||
# boxes, score = det[:4], det[4]
|
||||
# #print(name_list)
|
||||
# name = name_list[i]
|
||||
# mo = r'[\u4e00-\u9fa5]*'
|
||||
# name = re.match(mo, name).group(0)
|
||||
# if not isinstance(name, np.unicode):
|
||||
# name = name.decode('utf8')
|
||||
# draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||
# draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
|
||||
# frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||
# cv2.imshow('out', frame)
|
||||
# out.write(frame)
|
||||
# end_time = time.time()
|
||||
# print("drawOneframe time: " + str(end_time - start_time))
|
||||
# # Press Q on keyboard to stop recording
|
||||
# if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
# break
|
||||
# ret, frame = cap.read()
|
||||
# cap.release()
|
||||
# out.release()
|
||||
# cv2.destroyAllWindows()
|
||||
if __name__ == '__main__':
|
||||
cpu_or_cuda = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
arcface_model = load_arcface_model("./model/backbone100.pth", cpu_or_cuda=cpu_or_cuda)
|
||||
|
||||
k_v = load_npy("./Database/sfz_test.npy")
|
||||
database_name_list = list(k_v.keys())
|
||||
vector_list = np.array(list(k_v.values()))
|
||||
print(vector_list.shape)
|
||||
# print(database_name_list)
|
||||
nlist = 500
|
||||
quantizer = faiss.IndexFlatL2(512) # the other index
|
||||
index = faiss.IndexIVFFlat(quantizer, 512, nlist, faiss.METRIC_L2)
|
||||
index.train(vector_list)
|
||||
# index = faiss.IndexFlatL2(512)
|
||||
index.add(vector_list)
|
||||
index.nprobe = 50
|
||||
|
||||
count_accuracy(arcface_model, cpu_or_cuda, index, database_name_list)
|
||||
# sfz_to112x112_retinaface(arcface_model,cpu_or_cuda)
|
||||
|
||||
|
483
retinaface_detect.py
Normal file
@ -0,0 +1,483 @@
|
||||
from __future__ import print_function
|
||||
import re
|
||||
import time
|
||||
import cv2
|
||||
import torch
|
||||
import torch.backends.cudnn as cudnn
|
||||
import numpy as np
|
||||
from skimage import transform as trans
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from data import cfg_mnet, cfg_re50
|
||||
from layers.functions.prior_box import PriorBox
|
||||
from utils.nms.py_cpu_nms import py_cpu_nms
|
||||
from models.retinaface import RetinaFace
|
||||
from utils.box_utils import decode, decode_landm
|
||||
|
||||
threshold = 1.05
|
||||
ppi = 1280
|
||||
step = 3
|
||||
|
||||
class ConfRetinaface(object):
|
||||
def __init__(self, trained_model, network, cpu, confidence_threshold, top_k, nms_threshold, keep_top_k, vis_thres):
|
||||
self.trained_model = trained_model
|
||||
self.network = network
|
||||
self.cpu = cpu
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.top_k = top_k
|
||||
self.nms_threshold = nms_threshold
|
||||
self.keep_top_k = keep_top_k
|
||||
self.vis_thres = vis_thres
|
||||
|
||||
|
||||
def set_retinaface_conf(cpu_or_cuda):
|
||||
args = ConfRetinaface(trained_model='./weights/mobilenet0.25_Final.pth',
|
||||
network='mobile0.25',
|
||||
cpu=True if cpu_or_cuda == 'cpu' else False,
|
||||
confidence_threshold=0.02,
|
||||
top_k=5000,
|
||||
nms_threshold=0.4,
|
||||
keep_top_k=750,
|
||||
vis_thres=0.6)
|
||||
return args
|
||||
|
||||
|
||||
def check_keys(model, pretrained_state_dict):
|
||||
ckpt_keys = set(pretrained_state_dict.keys())
|
||||
model_keys = set(model.state_dict().keys())
|
||||
used_pretrained_keys = model_keys & ckpt_keys
|
||||
unused_pretrained_keys = ckpt_keys - model_keys
|
||||
missing_keys = model_keys - ckpt_keys
|
||||
print('Missing keys:{}'.format(len(missing_keys)))
|
||||
print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
|
||||
print('Used keys:{}'.format(len(used_pretrained_keys)))
|
||||
assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
|
||||
return True
|
||||
|
||||
|
||||
def remove_prefix(state_dict, prefix):
|
||||
''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
|
||||
print('remove prefix \'{}\''.format(prefix))
|
||||
f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
|
||||
return {f(key): value for key, value in state_dict.items()}
|
||||
|
||||
|
||||
def load_model(model, pretrained_path, load_to_cpu):
|
||||
print('Loading pretrained model from {}'.format(pretrained_path))
|
||||
if load_to_cpu:
|
||||
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
|
||||
else:
|
||||
device = torch.cuda.current_device()
|
||||
pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
|
||||
if "state_dict" in pretrained_dict.keys():
|
||||
pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
|
||||
else:
|
||||
pretrained_dict = remove_prefix(pretrained_dict, 'module.')
|
||||
check_keys(model, pretrained_dict)
|
||||
model.load_state_dict(pretrained_dict, strict=False)
|
||||
return model
|
||||
|
||||
|
||||
# 加载retinaface模型
|
||||
def load_retinaface_model(args):
|
||||
torch.set_grad_enabled(False)
|
||||
cfg = None
|
||||
if args.network == "mobile0.25":
|
||||
cfg = cfg_mnet
|
||||
elif args.network == "resnet50":
|
||||
cfg = cfg_re50
|
||||
# net and model
|
||||
net = RetinaFace(cfg=cfg, phase='test')
|
||||
net = load_model(net, args.trained_model, args.cpu)
|
||||
net.eval()
|
||||
cudnn.benchmark = True
|
||||
device = torch.device("cpu" if args.cpu else "cuda")
|
||||
net = net.to(device)
|
||||
print('Finished loading model!')
|
||||
return net
|
||||
|
||||
|
||||
# 计算两个特征向量的欧式距离
|
||||
def findEuclideanDistance(source_representation, test_representation):
|
||||
euclidean_distance = source_representation - test_representation
|
||||
euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
|
||||
euclidean_distance = np.sqrt(euclidean_distance)
|
||||
return euclidean_distance
|
||||
|
||||
|
||||
# 归一化欧氏距离
|
||||
def l2_normalize(x):
|
||||
return x / np.sqrt(np.sum(np.multiply(x, x)))
|
||||
|
||||
|
||||
# 计算此特征向量与人脸库中的哪个人脸特征向量距离最近
|
||||
def findmindistance(pred, threshold, k_v):
|
||||
distance = 10
|
||||
most_like = ""
|
||||
for name in k_v.keys():
|
||||
tmp = findEuclideanDistance(k_v[name], pred)
|
||||
if distance > tmp:
|
||||
distance = tmp
|
||||
most_like = name
|
||||
if distance < threshold:
|
||||
return most_like
|
||||
else:
|
||||
return "unknown"
|
||||
|
||||
#
|
||||
def faiss_find_face(pred,index ,database_name_list):
|
||||
#print(len(database_name_list))
|
||||
start_time = time.time()
|
||||
D, I = index.search(pred, 1)
|
||||
name_list = []
|
||||
end_time = time.time()
|
||||
print("faiss cost %fs" % (end_time - start_time))
|
||||
print(D, I)
|
||||
# if D[0][0] < threshold:
|
||||
# print(database_name_list[I[0][0]])
|
||||
# return database_name_list[I[0][0]]
|
||||
# else:
|
||||
# return "unknown"
|
||||
for i,index in enumerate(I):
|
||||
if D[i][0] < threshold:
|
||||
#print(database_name_list[I[0][0]])
|
||||
name_list.append(database_name_list[index[0]])
|
||||
else:
|
||||
name_list.append("unknown")
|
||||
return name_list
|
||||
|
||||
# 从人脸库中找到传入的人脸列表中的所有人脸
|
||||
def findAll(imglist, model, index ,database_name_list, k_v, cpu_or_cuda):
|
||||
start_time = time.time()
|
||||
imglist = torch.from_numpy(imglist)
|
||||
imglist = imglist.to(torch.device("cuda" if cpu_or_cuda == "cuda" else "cpu"))
|
||||
with torch.no_grad():
|
||||
name_list = []
|
||||
pred = model(imglist)
|
||||
pred = pred.cpu().numpy()
|
||||
print("predOne time: " + str(time.time() - start_time))
|
||||
#print(pred.shape)
|
||||
start_time = time.time()
|
||||
#name_list = faiss_find_face(l2_normalize(pred), index, database_name_list)
|
||||
for pr in pred:
|
||||
name = findmindistance(l2_normalize(pr), threshold=threshold, k_v=k_v)
|
||||
print(name)
|
||||
# print(l2_normalize(pr).shape)
|
||||
#pr = np.expand_dims(l2_normalize(pr), 0)
|
||||
#print(pr.shape)
|
||||
#name = faiss_find_face(pr,index ,database_name_list)
|
||||
if name != "unknown":
|
||||
mo = r'[\u4e00-\u9fa5_a-zA-Z]*'
|
||||
name = re.match(mo, name)
|
||||
name_list.append(name.group(0))
|
||||
else:
|
||||
name_list.append("unknown")
|
||||
#name_list.append(name)
|
||||
print("findOne time: " + str(time.time() - start_time))
|
||||
return name_list
|
||||
|
||||
|
||||
# 检测单张人脸,返回1x3x112x112的数组
|
||||
def detect_one(path, net, args):
|
||||
cfg = None
|
||||
if args.network == "mobile0.25":
|
||||
cfg = cfg_mnet
|
||||
elif args.network == "resnet50":
|
||||
cfg = cfg_re50
|
||||
|
||||
device = torch.device("cpu" if args.cpu else "cuda")
|
||||
resize = 1
|
||||
|
||||
# testing begin
|
||||
frame = cv2.imdecode(np.fromfile(path, dtype=np.uint8), cv2.IMREAD_COLOR)
|
||||
h, w = frame.shape[:2]
|
||||
factor = h / w
|
||||
if (w > 1000):
|
||||
frame = cv2.resize(frame, (600, int(600 * factor)))
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
tic = time.time()
|
||||
img = np.float32(frame)
|
||||
im_height, im_width, _ = img.shape
|
||||
scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
|
||||
img -= (104, 117, 123)
|
||||
img = img.transpose(2, 0, 1)
|
||||
img = torch.from_numpy(img).unsqueeze(0)
|
||||
img = img.to(device)
|
||||
scale = scale.to(device)
|
||||
|
||||
loc, conf, landms = net(img) # forward pass
|
||||
#print(loc.shape,landms.shape,conf.shape)
|
||||
priorbox = PriorBox(cfg, image_size=(im_height, im_width))
|
||||
priors = priorbox.forward()
|
||||
priors = priors.to(device)
|
||||
prior_data = priors.data
|
||||
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||
boxes = boxes * scale / resize
|
||||
boxes = boxes.cpu().numpy()
|
||||
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||
scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||
img.shape[3], img.shape[2], img.shape[3], img.shape[2],
|
||||
img.shape[3], img.shape[2]])
|
||||
scale1 = scale1.to(device)
|
||||
landms = landms * scale1 / resize
|
||||
landms = landms.cpu().numpy()
|
||||
|
||||
# ignore low scores
|
||||
inds = np.where(scores > args.confidence_threshold)[0]
|
||||
boxes = boxes[inds]
|
||||
landms = landms[inds]
|
||||
scores = scores[inds]
|
||||
|
||||
# keep top-K before NMS
|
||||
order = scores.argsort()[::-1][:args.top_k]
|
||||
boxes = boxes[order]
|
||||
landms = landms[order]
|
||||
scores = scores[order]
|
||||
|
||||
# do NMS
|
||||
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||
dets = dets[keep, :]
|
||||
landms = landms[keep]
|
||||
|
||||
# keep top-K faster NMS
|
||||
dets = dets[:args.keep_top_k, :]
|
||||
landms = landms[:args.keep_top_k, :]
|
||||
|
||||
dets = np.concatenate((dets, landms), axis=1)
|
||||
face_list = []
|
||||
box_and_point = []
|
||||
# print(dets[:4])
|
||||
# print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||
print(len(dets))
|
||||
for i, det in enumerate(dets):
|
||||
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
box_and_point.append(det)
|
||||
dst = np.reshape(landms[i], (5, 2))
|
||||
# print(dst.shape)
|
||||
src1 = np.array([
|
||||
[38.3814, 51.6963],
|
||||
[73.6186, 51.5014],
|
||||
[56.1120, 71.7366],
|
||||
[41.6361, 92.3655],
|
||||
[70.8167, 92.2041]], dtype=np.float32)
|
||||
# print(src1.shape)
|
||||
tform = trans.SimilarityTransform()
|
||||
tform.estimate(dst, src1)
|
||||
M = tform.params[0:2, :]
|
||||
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||
img112 = frame2[0:112, 0:112, :]
|
||||
# cv2.imshow('out', img112)
|
||||
# cv2.waitKey(0)
|
||||
face_list.append(img112)
|
||||
if len(face_list) > 0:
|
||||
face_list = np.array(face_list)
|
||||
face_list = face_list.transpose((0, 3, 1, 2))
|
||||
face_list = np.array(face_list, dtype=np.float32)
|
||||
face_list -= 127.5
|
||||
face_list /= 127.5
|
||||
box_and_point = np.array(box_and_point)
|
||||
# face_list = torch.from_numpy(face_list)
|
||||
# cv2.imshow('out', img112)
|
||||
# cv2.waitKey(0)
|
||||
return face_list, box_and_point
|
||||
|
||||
|
||||
# 检测视频中的人脸并人脸识别
|
||||
def detect_video(video_path, output_path, net, arcface_model, k_v, args):
|
||||
tic_total = time.time()
|
||||
cfg = None
|
||||
if args.network == "mobile0.25":
|
||||
cfg = cfg_mnet
|
||||
elif args.network == "resnet50":
|
||||
cfg = cfg_re50
|
||||
device = torch.device("cpu" if args.cpu else "cuda")
|
||||
resize = 1
|
||||
|
||||
# testing begin
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
ret, frame = cap.read()
|
||||
h, w = frame.shape[:2]
|
||||
factor = 0
|
||||
if (w > ppi):
|
||||
factor = h / w
|
||||
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||
h, w = frame.shape[:2]
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
size = (w, h)
|
||||
# size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
|
||||
# int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
|
||||
# out = cv2.VideoWriter('out.mp4', cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
|
||||
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'XVID'), fps, size)
|
||||
number = step
|
||||
dets = []
|
||||
name_list = []
|
||||
font = ImageFont.truetype("font.ttf", 22)
|
||||
priorbox = PriorBox(cfg, image_size=(h, w))
|
||||
priors = priorbox.forward()
|
||||
priors = priors.to(device)
|
||||
prior_data = priors.data
|
||||
|
||||
scale = torch.Tensor([w, h, w, h])
|
||||
scale = scale.to(device)
|
||||
scale1 = torch.Tensor([w, h, w, h,
|
||||
w, h, w, h,
|
||||
w, h])
|
||||
scale1 = scale1.to(device)
|
||||
|
||||
src1 = np.array([
|
||||
[38.3814, 51.6963],
|
||||
[73.6186, 51.5014],
|
||||
[56.1120, 71.7366],
|
||||
[41.6361, 92.3655],
|
||||
[70.8167, 92.2041]], dtype=np.float32)
|
||||
# print(src1.shape)
|
||||
tform = trans.SimilarityTransform()
|
||||
|
||||
while ret:
|
||||
tic_all = time.time()
|
||||
if number == step:
|
||||
tic = time.time()
|
||||
img = np.float32(frame)
|
||||
img -= (104, 117, 123)
|
||||
img = img.transpose(2, 0, 1)
|
||||
img = torch.from_numpy(img).unsqueeze(0)
|
||||
img = img.to(device)
|
||||
|
||||
loc, conf, landms = net(img) # forward pass
|
||||
|
||||
boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
|
||||
boxes = boxes * scale / resize
|
||||
boxes = boxes.cpu().numpy()
|
||||
scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
|
||||
landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
|
||||
|
||||
landms = landms * scale1 / resize
|
||||
landms = landms.cpu().numpy()
|
||||
|
||||
# ignore low scores
|
||||
inds = np.where(scores > args.confidence_threshold)[0]
|
||||
boxes = boxes[inds]
|
||||
landms = landms[inds]
|
||||
scores = scores[inds]
|
||||
|
||||
# keep top-K before NMS
|
||||
order = scores.argsort()[::-1][:args.top_k]
|
||||
boxes = boxes[order]
|
||||
landms = landms[order]
|
||||
scores = scores[order]
|
||||
|
||||
# do NMS
|
||||
dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = py_cpu_nms(dets, args.nms_threshold)
|
||||
# keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
|
||||
dets = dets[keep, :]
|
||||
landms = landms[keep]
|
||||
|
||||
# keep top-K faster NMS
|
||||
dets = dets[:args.keep_top_k, :]
|
||||
landms = landms[:args.keep_top_k, :]
|
||||
|
||||
dets = np.concatenate((dets, landms), axis=1)
|
||||
face_list = []
|
||||
name_list = []
|
||||
# print(dets[:4])
|
||||
print('net forward time: {:.4f}'.format(time.time() - tic))
|
||||
start_time = time.time()
|
||||
for i, det in enumerate(dets[:4]):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
boxes, score = det[:4], det[4]
|
||||
dst = np.reshape(landms[i], (5, 2))
|
||||
# print(dst.shape)
|
||||
|
||||
tform.estimate(dst, src1)
|
||||
M = tform.params[0:2, :]
|
||||
frame2 = cv2.warpAffine(frame, M, (w, h), borderValue=0.0)
|
||||
img112 = frame2[0:112, 0:112, :]
|
||||
face_list.append(img112)
|
||||
|
||||
if len(face_list) != 0:
|
||||
face_list = np.array(face_list)
|
||||
face_list = face_list.transpose((0, 3, 1, 2))
|
||||
face_list = np.array(face_list, dtype=np.float32)
|
||||
face_list -= 127.5
|
||||
face_list /= 127.5
|
||||
print(face_list.shape)
|
||||
# face_list = torch.from_numpy(face_list)
|
||||
name_list = findAll(face_list, arcface_model, k_v, "cpu" if args.cpu else "cuda")
|
||||
end_time = time.time()
|
||||
print("findOneframe time: " + str(end_time - start_time))
|
||||
start_time = time.time()
|
||||
if (len(dets) != 0):
|
||||
for i, det in enumerate(dets[:4]):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
boxes, score = det[:4], det[4]
|
||||
cv2.rectangle(frame, (int(boxes[0]), int(boxes[1])), (int(boxes[2]), int(boxes[3])), (2, 255, 0), 1)
|
||||
|
||||
# if (len(dets) != 0):
|
||||
# img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
# draw = ImageDraw.Draw(img_PIL)
|
||||
#
|
||||
# for i, det in enumerate(dets[:4]):
|
||||
# if det[4] < args.vis_thres:
|
||||
# continue
|
||||
# boxes, score = det[:4], det[4]
|
||||
# # print(name_list)
|
||||
# name = name_list[i]
|
||||
# if not isinstance(name, np.unicode):
|
||||
# name = name.decode('utf8')
|
||||
# draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||
# draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green", width=3)
|
||||
# frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||
#cv2.imshow('out', frame)
|
||||
#cv2.waitKey(0)
|
||||
out.write(frame)
|
||||
end_time = time.time()
|
||||
print("drawOneframe time: " + str(end_time - start_time))
|
||||
# Press Q on keyboard to stop recording
|
||||
# if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
# break
|
||||
ret, frame = cap.read()
|
||||
number = 0
|
||||
if (ret != 0 and factor != 0):
|
||||
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||
else:
|
||||
number += 1
|
||||
if (len(dets) != 0):
|
||||
img_PIL = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
||||
draw = ImageDraw.Draw(img_PIL)
|
||||
for i, det in enumerate(dets[:4]):
|
||||
if det[4] < args.vis_thres:
|
||||
continue
|
||||
boxes, score = det[:4], det[4]
|
||||
# print(name_list)
|
||||
name = name_list[i]
|
||||
if not isinstance(name, np.unicode):
|
||||
name = name.decode('utf8')
|
||||
draw.text((int(boxes[0]), int(boxes[1])), name, fill=(255, 0, 0), font=font)
|
||||
draw.rectangle((int(boxes[0]), int(boxes[1]), int(boxes[2]), int(boxes[3])), outline="green",
|
||||
width=3)
|
||||
frame = cv2.cvtColor(np.asarray(img_PIL), cv2.COLOR_RGB2BGR)
|
||||
out.write(frame)
|
||||
start_time = time.time()
|
||||
ret, frame = cap.read()
|
||||
if (ret != 0 and factor != 0):
|
||||
frame = cv2.resize(frame, (ppi, int(ppi * factor)))
|
||||
print("readframe time: " + str(time.time() - start_time))
|
||||
print('all time: {:.4f}'.format(time.time() - tic_all))
|
||||
cap.release()
|
||||
out.release()
|
||||
print('total time: {:.4f}'.format(time.time() - tic_total))
|
||||
#cv2.destroyAllWindows()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = set_retinaface_conf()
|
||||
print(args.cpu)
|
BIN
src/__pycache__/generate_patches.cpython-38.pyc
Normal file
BIN
src/__pycache__/utility.cpython-38.pyc
Normal file
BIN
src/data_io/__pycache__/functional.cpython-38.pyc
Normal file
BIN
src/data_io/__pycache__/transform.cpython-38.pyc
Normal file
65
src/data_io/dataset_folder.py
Normal file
@ -0,0 +1,65 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 20-6-4 下午4:04
|
||||
# @Author : zhuying
|
||||
# @Company : Minivision
|
||||
# @File : dataset_folder.py
|
||||
# @Software : PyCharm
|
||||
|
||||
import cv2
|
||||
import torch
|
||||
from torchvision import datasets
|
||||
import numpy as np
|
||||
|
||||
|
||||
def opencv_loader(path):
|
||||
img = cv2.imread(path)
|
||||
return img
|
||||
|
||||
|
||||
class DatasetFolderFT(datasets.ImageFolder):
|
||||
def __init__(self, root, transform=None, target_transform=None,
|
||||
ft_width=10, ft_height=10, loader=opencv_loader):
|
||||
super(DatasetFolderFT, self).__init__(root, transform, target_transform, loader)
|
||||
self.root = root
|
||||
self.ft_width = ft_width
|
||||
self.ft_height = ft_height
|
||||
|
||||
def __getitem__(self, index):
|
||||
path, target = self.samples[index]
|
||||
sample = self.loader(path)
|
||||
# generate the FT picture of the sample
|
||||
ft_sample = generate_FT(sample)
|
||||
if sample is None:
|
||||
print('image is None --> ', path)
|
||||
if ft_sample is None:
|
||||
print('FT image is None -->', path)
|
||||
assert sample is not None
|
||||
|
||||
ft_sample = cv2.resize(ft_sample, (self.ft_width, self.ft_height))
|
||||
ft_sample = torch.from_numpy(ft_sample).float()
|
||||
ft_sample = torch.unsqueeze(ft_sample, 0)
|
||||
|
||||
if self.transform is not None:
|
||||
try:
|
||||
sample = self.transform(sample)
|
||||
except Exception as err:
|
||||
print('Error Occured: %s' % err, path)
|
||||
if self.target_transform is not None:
|
||||
target = self.target_transform(target)
|
||||
return sample, ft_sample, target
|
||||
|
||||
|
||||
def generate_FT(image):
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
f = np.fft.fft2(image)
|
||||
fshift = np.fft.fftshift(f)
|
||||
fimg = np.log(np.abs(fshift)+1)
|
||||
maxx = -1
|
||||
minn = 100000
|
||||
for i in range(len(fimg)):
|
||||
if maxx < max(fimg[i]):
|
||||
maxx = max(fimg[i])
|
||||
if minn > min(fimg[i]):
|
||||
minn = min(fimg[i])
|
||||
fimg = (fimg - minn+1) / (maxx - minn+1)
|
||||
return fimg
|
33
src/data_io/dataset_loader.py
Normal file
@ -0,0 +1,33 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 20-6-4 下午3:40
|
||||
# @Author : zhuying
|
||||
# @Company : Minivision
|
||||
# @File : dataset_loader.py
|
||||
# @Software : PyCharm
|
||||
|
||||
from torch.utils.data import DataLoader
|
||||
from src.data_io.dataset_folder import DatasetFolderFT
|
||||
from src.data_io import transform as trans
|
||||
|
||||
|
||||
def get_train_loader(conf):
|
||||
train_transform = trans.Compose([
|
||||
trans.ToPILImage(),
|
||||
trans.RandomResizedCrop(size=tuple(conf.input_size),
|
||||
scale=(0.9, 1.1)),
|
||||
trans.ColorJitter(brightness=0.4,
|
||||
contrast=0.4, saturation=0.4, hue=0.1),
|
||||
trans.RandomRotation(10),
|
||||
trans.RandomHorizontalFlip(),
|
||||
trans.ToTensor()
|
||||
])
|
||||
root_path = '{}/{}'.format(conf.train_root_path, conf.patch_info)
|
||||
trainset = DatasetFolderFT(root_path, train_transform,
|
||||
None, conf.ft_width, conf.ft_height)
|
||||
train_loader = DataLoader(
|
||||
trainset,
|
||||
batch_size=conf.batch_size,
|
||||
shuffle=True,
|
||||
pin_memory=True,
|
||||
num_workers=16)
|
||||
return train_loader
|
589
src/data_io/functional.py
Normal file
@ -0,0 +1,589 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 20-6-4 下午6:18
|
||||
# @Author : zhuying
|
||||
# @Company : Minivision
|
||||
# @File : functional.py
|
||||
# @Software : PyCharm
|
||||
|
||||
from __future__ import division
|
||||
import torch
|
||||
from PIL import Image, ImageOps, ImageEnhance
|
||||
try:
|
||||
import accimage
|
||||
except ImportError:
|
||||
accimage = None
|
||||
import numpy as np
|
||||
import numbers
|
||||
import types
|
||||
import collections
|
||||
import warnings
|
||||
|
||||
|
||||
def _is_pil_image(img):
|
||||
if accimage is not None:
|
||||
return isinstance(img, (Image.Image, accimage.Image))
|
||||
else:
|
||||
return isinstance(img, Image.Image)
|
||||
|
||||
|
||||
def _is_tensor_image(img):
|
||||
return torch.is_tensor(img) and img.ndimension() == 3
|
||||
|
||||
|
||||
def _is_numpy_image(img):
|
||||
return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
|
||||
|
||||
|
||||
def to_tensor(pic):
|
||||
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
|
||||
|
||||
See ``ToTensor`` for more details.
|
||||
|
||||
Args:
|
||||
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
|
||||
|
||||
Returns:
|
||||
Tensor: Converted image.
|
||||
"""
|
||||
if not(_is_pil_image(pic) or _is_numpy_image(pic)):
|
||||
raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
|
||||
|
||||
if isinstance(pic, np.ndarray):
|
||||
# handle numpy array
|
||||
# IR image channel=1: modify by lzc --> 20190730
|
||||
if pic.ndim == 2:
|
||||
pic = pic.reshape((pic.shape[0], pic.shape[1], 1))
|
||||
|
||||
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
||||
# backward compatibility
|
||||
# return img.float().div(255) modify by zkx
|
||||
return img.float()
|
||||
if accimage is not None and isinstance(pic, accimage.Image):
|
||||
nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
|
||||
pic.copyto(nppic)
|
||||
return torch.from_numpy(nppic)
|
||||
|
||||
# handle PIL Image
|
||||
if pic.mode == 'I':
|
||||
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
|
||||
elif pic.mode == 'I;16':
|
||||
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
|
||||
else:
|
||||
img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
|
||||
# PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
|
||||
if pic.mode == 'YCbCr':
|
||||
nchannel = 3
|
||||
elif pic.mode == 'I;16':
|
||||
nchannel = 1
|
||||
else:
|
||||
nchannel = len(pic.mode)
|
||||
img = img.view(pic.size[1], pic.size[0], nchannel)
|
||||
# put it from HWC to CHW format
|
||||
# yikes, this transpose takes 80% of the loading time/CPU
|
||||
img = img.transpose(0, 1).transpose(0, 2).contiguous()
|
||||
if isinstance(img, torch.ByteTensor):
|
||||
# return img.float().div(255) #modified by zkx
|
||||
return img.float()
|
||||
else:
|
||||
return img
|
||||
|
||||
|
||||
def to_pil_image(pic, mode=None):
|
||||
"""Convert a tensor or an ndarray to PIL Image.
|
||||
|
||||
See :class:`~torchvision.transforms.ToPIlImage` for more details.
|
||||
|
||||
Args:
|
||||
pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
|
||||
mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
|
||||
|
||||
.. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes
|
||||
|
||||
Returns:
|
||||
PIL Image: Image converted to PIL Image.
|
||||
"""
|
||||
if not(_is_numpy_image(pic) or _is_tensor_image(pic)):
|
||||
raise TypeError('pic should be Tensor or ndarray. Got {}.'.format(type(pic)))
|
||||
|
||||
npimg = pic
|
||||
if isinstance(pic, torch.FloatTensor):
|
||||
pic = pic.mul(255).byte()
|
||||
if torch.is_tensor(pic):
|
||||
npimg = np.transpose(pic.numpy(), (1, 2, 0))
|
||||
|
||||
if not isinstance(npimg, np.ndarray):
|
||||
raise TypeError('Input pic must be a torch.Tensor or NumPy ndarray, ' +
|
||||
'not {}'.format(type(npimg)))
|
||||
|
||||
if npimg.shape[2] == 1:
|
||||
expected_mode = None
|
||||
npimg = npimg[:, :, 0]
|
||||
if npimg.dtype == np.uint8:
|
||||
expected_mode = 'L'
|
||||
if npimg.dtype == np.int16:
|
||||
expected_mode = 'I;16'
|
||||
if npimg.dtype == np.int32:
|
||||
expected_mode = 'I'
|
||||
elif npimg.dtype == np.float32:
|
||||
expected_mode = 'F'
|
||||
if mode is not None and mode != expected_mode:
|
||||
raise ValueError("Incorrect mode ({}) supplied for input type {}. Should be {}"
|
||||
.format(mode, np.dtype, expected_mode))
|
||||
mode = expected_mode
|
||||
|
||||
elif npimg.shape[2] == 4:
|
||||
permitted_4_channel_modes = ['RGBA', 'CMYK']
|
||||
if mode is not None and mode not in permitted_4_channel_modes:
|
||||
raise ValueError("Only modes {} are supported for 4D inputs".format(permitted_4_channel_modes))
|
||||
|
||||
if mode is None and npimg.dtype == np.uint8:
|
||||
mode = 'RGBA'
|
||||
else:
|
||||
permitted_3_channel_modes = ['RGB', 'YCbCr', 'HSV']
|
||||
if mode is not None and mode not in permitted_3_channel_modes:
|
||||
raise ValueError("Only modes {} are supported for 3D inputs".format(permitted_3_channel_modes))
|
||||
if mode is None and npimg.dtype == np.uint8:
|
||||
mode = 'RGB'
|
||||
|
||||
if mode is None:
|
||||
raise TypeError('Input type {} is not supported'.format(npimg.dtype))
|
||||
|
||||
return Image.fromarray(npimg, mode=mode)
|
||||
|
||||
|
||||
def normalize(tensor, mean, std):
|
||||
"""Normalize a tensor image with mean and standard deviation.
|
||||
|
||||
See ``Normalize`` for more details.
|
||||
|
||||
Args:
|
||||
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
|
||||
mean (sequence): Sequence of means for each channel.
|
||||
std (sequence): Sequence of standard deviations for each channely.
|
||||
|
||||
Returns:
|
||||
Tensor: Normalized Tensor image.
|
||||
"""
|
||||
if not _is_tensor_image(tensor):
|
||||
raise TypeError('tensor is not a torch image.')
|
||||
|
||||
for t, m, s in zip(tensor, mean, std):
|
||||
t.sub_(m).div_(s)
|
||||
return tensor
|
||||
|
||||
|
||||
def resize(img, size, interpolation=Image.BILINEAR):
|
||||
"""Resize the input PIL Image to the given size.
|
||||
|
||||
Args:
|
||||
img (PIL Image): Image to be resized.
|
||||
size (sequence or int): Desired output size. If size is a sequence like
|
||||
(h, w), the output size will be matched to this. If size is an int,
|
||||
the smaller edge of the image will be matched to this number maintaing
|
||||
the aspect ratio. i.e, if height > width, then image will be rescaled to
|
||||
(size * height / width, size)
|
||||
interpolation (int, optional): Desired interpolation. Default is
|
||||
``PIL.Image.BILINEAR``
|
||||
|
||||
Returns:
|
||||
PIL Image: Resized image.
|
||||
"""
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
if not (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)):
|
||||
raise TypeError('Got inappropriate size arg: {}'.format(size))
|
||||
|
||||
if isinstance(size, int):
|
||||
w, h = img.size
|
||||
if (w <= h and w == size) or (h <= w and h == size):
|
||||
return img
|
||||
if w < h:
|
||||
ow = size
|
||||
oh = int(size * h / w)
|
||||
return img.resize((ow, oh), interpolation)
|
||||
else:
|
||||
oh = size
|
||||
ow = int(size * w / h)
|
||||
return img.resize((ow, oh), interpolation)
|
||||
else:
|
||||
return img.resize(size[::-1], interpolation)
|
||||
|
||||
|
||||
def scale(*args, **kwargs):
|
||||
warnings.warn("The use of the transforms.Scale transform is deprecated, " +
|
||||
"please use transforms.Resize instead.")
|
||||
return resize(*args, **kwargs)
|
||||
|
||||
|
||||
def pad(img, padding, fill=0):
|
||||
"""Pad the given PIL Image on all sides with the given "pad" value.
|
||||
|
||||
Args:
|
||||
img (PIL Image): Image to be padded.
|
||||
padding (int or tuple): Padding on each border. If a single int is provided this
|
||||
is used to pad all borders. If tuple of length 2 is provided this is the padding
|
||||
on left/right and top/bottom respectively. If a tuple of length 4 is provided
|
||||
this is the padding for the left, top, right and bottom borders
|
||||
respectively.
|
||||
fill: Pixel fill value. Default is 0. If a tuple of
|
||||
length 3, it is used to fill R, G, B channels respectively.
|
||||
|
||||
Returns:
|
||||
PIL Image: Padded image.
|
||||
"""
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
|
||||
if not isinstance(padding, (numbers.Number, tuple)):
|
||||
raise TypeError('Got inappropriate padding arg')
|
||||
if not isinstance(fill, (numbers.Number, str, tuple)):
|
||||
raise TypeError('Got inappropriate fill arg')
|
||||
|
||||
if isinstance(padding, collections.Sequence) and len(padding) not in [2, 4]:
|
||||
raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " +
|
||||
"{} element tuple".format(len(padding)))
|
||||
|
||||
return ImageOps.expand(img, border=padding, fill=fill)
|
||||
|
||||
|
||||
def crop(img, i, j, h, w):
|
||||
"""Crop the given PIL Image.
|
||||
|
||||
Args:
|
||||
img (PIL Image): Image to be cropped.
|
||||
i: Upper pixel coordinate.
|
||||
j: Left pixel coordinate.
|
||||
h: Height of the cropped image.
|
||||
w: Width of the cropped image.
|
||||
|
||||
Returns:
|
||||
PIL Image: Cropped image.
|
||||
"""
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
|
||||
return img.crop((j, i, j + w, i + h))
|
||||
|
||||
|
||||
def center_crop(img, output_size):
|
||||
if isinstance(output_size, numbers.Number):
|
||||
output_size = (int(output_size), int(output_size))
|
||||
w, h = img.size
|
||||
th, tw = output_size
|
||||
i = int(round((h - th) / 2.))
|
||||
j = int(round((w - tw) / 2.))
|
||||
return crop(img, i, j, th, tw)
|
||||
|
||||
|
||||
def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR):
|
||||
"""Crop the given PIL Image and resize it to desired size.
|
||||
|
||||
Notably used in RandomResizedCrop.
|
||||
|
||||
Args:
|
||||
img (PIL Image): Image to be cropped.
|
||||
i: Upper pixel coordinate.
|
||||
j: Left pixel coordinate.
|
||||
h: Height of the cropped image.
|
||||
w: Width of the cropped image.
|
||||
size (sequence or int): Desired output size. Same semantics as ``scale``.
|
||||
interpolation (int, optional): Desired interpolation. Default is
|
||||
``PIL.Image.BILINEAR``.
|
||||
Returns:
|
||||
PIL Image: Cropped image.
|
||||
"""
|
||||
assert _is_pil_image(img), 'img should be PIL Image'
|
||||
img = crop(img, i, j, h, w)
|
||||
img = resize(img, size, interpolation)
|
||||
return img
|
||||
|
||||
|
||||
def hflip(img):
|
||||
"""Horizontally flip the given PIL Image.
|
||||
|
||||
Args:
|
||||
img (PIL Image): Image to be flipped.
|
||||
|
||||
Returns:
|
||||
PIL Image: Horizontall flipped image.
|
||||
"""
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
|
||||
return img.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
|
||||
|
||||
def vflip(img):
|
||||
"""Vertically flip the given PIL Image.
|
||||
|
||||
Args:
|
||||
img (PIL Image): Image to be flipped.
|
||||
|
||||
Returns:
|
||||
PIL Image: Vertically flipped image.
|
||||
"""
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
|
||||
return img.transpose(Image.FLIP_TOP_BOTTOM)
|
||||
|
||||
|
||||
def five_crop(img, size):
|
||||
"""Crop the given PIL Image into four corners and the central crop.
|
||||
|
||||
.. Note::
|
||||
This transform returns a tuple of images and there may be a
|
||||
mismatch in the number of inputs and targets your ``Dataset`` returns.
|
||||
|
||||
Args:
|
||||
size (sequence or int): Desired output size of the crop. If size is an
|
||||
int instead of sequence like (h, w), a square crop (size, size) is
|
||||
made.
|
||||
Returns:
|
||||
tuple: tuple (tl, tr, bl, br, center) corresponding top left,
|
||||
top right, bottom left, bottom right and center crop.
|
||||
"""
|
||||
if isinstance(size, numbers.Number):
|
||||
size = (int(size), int(size))
|
||||
else:
|
||||
assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
|
||||
|
||||
w, h = img.size
|
||||
crop_h, crop_w = size
|
||||
if crop_w > w or crop_h > h:
|
||||
raise ValueError("Requested crop size {} is bigger than input size {}".format(size,
|
||||
(h, w)))
|
||||
tl = img.crop((0, 0, crop_w, crop_h))
|
||||
tr = img.crop((w - crop_w, 0, w, crop_h))
|
||||
bl = img.crop((0, h - crop_h, crop_w, h))
|
||||
br = img.crop((w - crop_w, h - crop_h, w, h))
|
||||
center = center_crop(img, (crop_h, crop_w))
|
||||
return (tl, tr, bl, br, center)
|
||||
|
||||
|
||||
def ten_crop(img, size, vertical_flip=False):
|
||||
"""Crop the given PIL Image into four corners and the central crop plus the
|
||||
flipped version of these (horizontal flipping is used by default).
|
||||
|
||||
.. Note::
|
||||
This transform returns a tuple of images and there may be a
|
||||
mismatch in the number of inputs and targets your ``Dataset`` returns.
|
||||
|
||||
Args:
|
||||
size (sequence or int): Desired output size of the crop. If size is an
|
||||
int instead of sequence like (h, w), a square crop (size, size) is
|
||||
made.
|
||||
vertical_flip (bool): Use vertical flipping instead of horizontal
|
||||
|
||||
Returns:
|
||||
tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip,
|
||||
br_flip, center_flip) corresponding top left, top right,
|
||||
bottom left, bottom right and center crop and same for the
|
||||
flipped image.
|
||||
"""
|
||||
if isinstance(size, numbers.Number):
|
||||
size = (int(size), int(size))
|
||||
else:
|
||||
assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
|
||||
|
||||
first_five = five_crop(img, size)
|
||||
|
||||
if vertical_flip:
|
||||
img = vflip(img)
|
||||
else:
|
||||
img = hflip(img)
|
||||
|
||||
second_five = five_crop(img, size)
|
||||
return first_five + second_five
|
||||
|
||||
|
||||
def adjust_brightness(img, brightness_factor):
|
||||
"""Adjust brightness of an Image.
|
||||
|
||||
Args:
|
||||
img (PIL Image): PIL Image to be adjusted.
|
||||
brightness_factor (float): How much to adjust the brightness. Can be
|
||||
any non negative number. 0 gives a black image, 1 gives the
|
||||
original image while 2 increases the brightness by a factor of 2.
|
||||
|
||||
Returns:
|
||||
PIL Image: Brightness adjusted image.
|
||||
"""
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
|
||||
enhancer = ImageEnhance.Brightness(img)
|
||||
img = enhancer.enhance(brightness_factor)
|
||||
return img
|
||||
|
||||
|
||||
def adjust_contrast(img, contrast_factor):
|
||||
"""Adjust contrast of an Image.
|
||||
|
||||
Args:
|
||||
img (PIL Image): PIL Image to be adjusted.
|
||||
contrast_factor (float): How much to adjust the contrast. Can be any
|
||||
non negative number. 0 gives a solid gray image, 1 gives the
|
||||
original image while 2 increases the contrast by a factor of 2.
|
||||
|
||||
Returns:
|
||||
PIL Image: Contrast adjusted image.
|
||||
"""
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
|
||||
enhancer = ImageEnhance.Contrast(img)
|
||||
img = enhancer.enhance(contrast_factor)
|
||||
return img
|
||||
|
||||
|
||||
def adjust_saturation(img, saturation_factor):
|
||||
"""Adjust color saturation of an image.
|
||||
|
||||
Args:
|
||||
img (PIL Image): PIL Image to be adjusted.
|
||||
saturation_factor (float): How much to adjust the saturation. 0 will
|
||||
give a black and white image, 1 will give the original image while
|
||||
2 will enhance the saturation by a factor of 2.
|
||||
|
||||
Returns:
|
||||
PIL Image: Saturation adjusted image.
|
||||
"""
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
|
||||
enhancer = ImageEnhance.Color(img)
|
||||
img = enhancer.enhance(saturation_factor)
|
||||
return img
|
||||
|
||||
|
||||
def adjust_hue(img, hue_factor):
|
||||
"""Adjust hue of an image.
|
||||
|
||||
The image hue is adjusted by converting the image to HSV and
|
||||
cyclically shifting the intensities in the hue channel (H).
|
||||
The image is then converted back to original image mode.
|
||||
|
||||
`hue_factor` is the amount of shift in H channel and must be in the
|
||||
interval `[-0.5, 0.5]`.
|
||||
|
||||
See https://en.wikipedia.org/wiki/Hue for more details on Hue.
|
||||
|
||||
Args:
|
||||
img (PIL Image): PIL Image to be adjusted.
|
||||
hue_factor (float): How much to shift the hue channel. Should be in
|
||||
[-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
|
||||
HSV space in positive and negative direction respectively.
|
||||
0 means no shift. Therefore, both -0.5 and 0.5 will give an image
|
||||
with complementary colors while 0 gives the original image.
|
||||
|
||||
Returns:
|
||||
PIL Image: Hue adjusted image.
|
||||
"""
|
||||
if not(-0.5 <= hue_factor <= 0.5):
|
||||
raise ValueError('hue_factor is not in [-0.5, 0.5].'.format(hue_factor))
|
||||
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
|
||||
input_mode = img.mode
|
||||
if input_mode in {'L', '1', 'I', 'F'}:
|
||||
return img
|
||||
|
||||
h, s, v = img.convert('HSV').split()
|
||||
|
||||
np_h = np.array(h, dtype=np.uint8)
|
||||
# uint8 addition take cares of rotation across boundaries
|
||||
with np.errstate(over='ignore'):
|
||||
np_h += np.uint8(hue_factor * 255)
|
||||
h = Image.fromarray(np_h, 'L')
|
||||
|
||||
img = Image.merge('HSV', (h, s, v)).convert(input_mode)
|
||||
return img
|
||||
|
||||
|
||||
def adjust_gamma(img, gamma, gain=1):
|
||||
"""Perform gamma correction on an image.
|
||||
|
||||
Also known as Power Law Transform. Intensities in RGB mode are adjusted
|
||||
based on the following equation:
|
||||
|
||||
I_out = 255 * gain * ((I_in / 255) ** gamma)
|
||||
|
||||
See https://en.wikipedia.org/wiki/Gamma_correction for more details.
|
||||
|
||||
Args:
|
||||
img (PIL Image): PIL Image to be adjusted.
|
||||
gamma (float): Non negative real number. gamma larger than 1 make the
|
||||
shadows darker, while gamma smaller than 1 make dark regions
|
||||
lighter.
|
||||
gain (float): The constant multiplier.
|
||||
"""
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
|
||||
if gamma < 0:
|
||||
raise ValueError('Gamma should be a non-negative real number')
|
||||
|
||||
input_mode = img.mode
|
||||
img = img.convert('RGB')
|
||||
|
||||
np_img = np.array(img, dtype=np.float32)
|
||||
np_img = 255 * gain * ((np_img / 255) ** gamma)
|
||||
np_img = np.uint8(np.clip(np_img, 0, 255))
|
||||
|
||||
img = Image.fromarray(np_img, 'RGB').convert(input_mode)
|
||||
return img
|
||||
|
||||
|
||||
def rotate(img, angle, resample=False, expand=False, center=None):
|
||||
"""Rotate the image by angle and then (optionally) translate it by (n_columns, n_rows)
|
||||
|
||||
|
||||
Args:
|
||||
img (PIL Image): PIL Image to be rotated.
|
||||
angle ({float, int}): In degrees degrees counter clockwise order.
|
||||
resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
|
||||
An optional resampling filter.
|
||||
See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
|
||||
If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
|
||||
expand (bool, optional): Optional expansion flag.
|
||||
If true, expands the output image to make it large enough to hold the entire rotated image.
|
||||
If false or omitted, make the output image the same size as the input image.
|
||||
Note that the expand flag assumes rotation around the center and no translation.
|
||||
center (2-tuple, optional): Optional center of rotation.
|
||||
Origin is the upper left corner.
|
||||
Default is the center of the image.
|
||||
"""
|
||||
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
|
||||
return img.rotate(angle, resample, expand, center)
|
||||
|
||||
|
||||
def to_grayscale(img, num_output_channels=1):
|
||||
"""Convert image to grayscale version of image.
|
||||
|
||||
Args:
|
||||
img (PIL Image): Image to be converted to grayscale.
|
||||
|
||||
Returns:
|
||||
PIL Image: Grayscale version of the image.
|
||||
if num_output_channels == 1 : returned image is single channel
|
||||
if num_output_channels == 3 : returned image is 3 channel with r == g == b
|
||||
"""
|
||||
if not _is_pil_image(img):
|
||||
raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
|
||||
|
||||
if num_output_channels == 1:
|
||||
img = img.convert('L')
|
||||
elif num_output_channels == 3:
|
||||
img = img.convert('L')
|
||||
np_img = np.array(img, dtype=np.uint8)
|
||||
np_img = np.dstack([np_img, np_img, np_img])
|
||||
img = Image.fromarray(np_img, 'RGB')
|
||||
else:
|
||||
raise ValueError('num_output_channels should be either 1 or 3')
|
||||
|
||||
return img
|
347
src/data_io/transform.py
Normal file
@ -0,0 +1,347 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 20-6-4 下午4:19
|
||||
# @Author : zhuying
|
||||
# @Company : Minivision
|
||||
# @File : transform.py
|
||||
# @Software : PyCharm
|
||||
|
||||
from __future__ import division
|
||||
import math
|
||||
import random
|
||||
from PIL import Image
|
||||
try:
|
||||
import accimage
|
||||
except ImportError:
|
||||
accimage = None
|
||||
import numpy as np
|
||||
import numbers
|
||||
import types
|
||||
|
||||
from src.data_io import functional as F
|
||||
|
||||
__all__ = ["Compose", "ToTensor", "ToPILImage", "Normalize", "RandomHorizontalFlip",
|
||||
"Lambda", "RandomResizedCrop", "ColorJitter", "RandomRotation"]
|
||||
|
||||
|
||||
class Compose(object):
|
||||
"""Composes several transforms together.
|
||||
|
||||
Args:
|
||||
transforms (list of ``Transform`` objects): list of transforms to compose.
|
||||
|
||||
Example:
|
||||
>>> transforms.Compose([
|
||||
>>> transforms.CenterCrop(10),
|
||||
>>> transforms.ToTensor(),
|
||||
>>> ])
|
||||
"""
|
||||
|
||||
def __init__(self, transforms):
|
||||
self.transforms = transforms
|
||||
|
||||
def __call__(self, img):
|
||||
for t in self.transforms:
|
||||
img = t(img)
|
||||
return img
|
||||
|
||||
|
||||
class ToTensor(object):
|
||||
|
||||
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
|
||||
|
||||
Converts a PIL Image or numpy.ndarray (H x W x C) in the range
|
||||
[0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
|
||||
"""
|
||||
|
||||
def __call__(self, pic):
|
||||
"""
|
||||
Args:
|
||||
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
|
||||
|
||||
Returns:
|
||||
Tensor: Converted image.
|
||||
"""
|
||||
return F.to_tensor(pic)
|
||||
|
||||
|
||||
class Lambda(object):
|
||||
"""Apply a user-defined lambda as a transform.
|
||||
|
||||
Args:
|
||||
lambd (function): Lambda/function to be used for transform.
|
||||
"""
|
||||
|
||||
def __init__(self, lambd):
|
||||
assert isinstance(lambd, types.LambdaType)
|
||||
self.lambd = lambd
|
||||
|
||||
def __call__(self, img):
|
||||
return self.lambd(img)
|
||||
|
||||
|
||||
class ToPILImage(object):
|
||||
"""Convert a tensor or an ndarray to PIL Image.
|
||||
|
||||
Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
|
||||
H x W x C to a PIL Image while preserving the value range.
|
||||
|
||||
Args:
|
||||
mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
|
||||
If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
|
||||
1. If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
|
||||
2. If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
|
||||
3. If the input has 1 channel, the ``mode`` is determined by the data type (i,e,
|
||||
``int``, ``float``, ``short``).
|
||||
|
||||
.. _PIL.Image mode: http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#modes
|
||||
"""
|
||||
def __init__(self, mode=None):
|
||||
self.mode = mode
|
||||
|
||||
def __call__(self, pic):
|
||||
"""
|
||||
Args:
|
||||
pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
|
||||
|
||||
Returns:
|
||||
PIL Image: Image converted to PIL Image.
|
||||
|
||||
"""
|
||||
return F.to_pil_image(pic, self.mode)
|
||||
|
||||
|
||||
class Normalize(object):
|
||||
"""Normalize an tensor image with mean and standard deviation.
|
||||
Given mean: ``(M1,...,Mn)`` and std: ``(S1,..,Sn)`` for ``n`` channels, this transform
|
||||
will normalize each channel of the input ``torch.*Tensor`` i.e.
|
||||
``input[channel] = (input[channel] - mean[channel]) / std[channel]``
|
||||
|
||||
Args:
|
||||
mean (sequence): Sequence of means for each channel.
|
||||
std (sequence): Sequence of standard deviations for each channel.
|
||||
"""
|
||||
|
||||
def __init__(self, mean, std):
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
|
||||
def __call__(self, tensor):
|
||||
"""
|
||||
Args:
|
||||
tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
|
||||
|
||||
Returns:
|
||||
Tensor: Normalized Tensor image.
|
||||
"""
|
||||
return F.normalize(tensor, self.mean, self.std)
|
||||
|
||||
|
||||
class RandomHorizontalFlip(object):
|
||||
"""Horizontally flip the given PIL Image randomly with a probability of 0.5."""
|
||||
|
||||
def __call__(self, img):
|
||||
"""
|
||||
Args:
|
||||
img (PIL Image): Image to be flipped.
|
||||
|
||||
Returns:
|
||||
PIL Image: Randomly flipped image.
|
||||
"""
|
||||
if random.random() < 0.5:
|
||||
return F.hflip(img)
|
||||
return img
|
||||
|
||||
|
||||
class RandomResizedCrop(object):
|
||||
"""Crop the given PIL Image to random size and aspect ratio.
|
||||
|
||||
A crop of random size (default: of 0.08 to 1.0) of the original size and a random
|
||||
aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
|
||||
is finally resized to given size.
|
||||
This is popularly used to train the Inception networks.
|
||||
|
||||
Args:
|
||||
size: expected output size of each edge
|
||||
scale: range of size of the origin size cropped
|
||||
ratio: range of aspect ratio of the origin aspect ratio cropped
|
||||
interpolation: Default: PIL.Image.BILINEAR
|
||||
"""
|
||||
|
||||
def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
|
||||
if isinstance(size, tuple):
|
||||
self.size = size
|
||||
else:
|
||||
self.size = (size, size)
|
||||
self.interpolation = interpolation
|
||||
self.scale = scale
|
||||
self.ratio = ratio
|
||||
|
||||
@staticmethod
|
||||
def get_params(img, scale, ratio):
|
||||
"""Get parameters for ``crop`` for a random sized crop.
|
||||
|
||||
Args:
|
||||
img (PIL Image): Image to be cropped.
|
||||
scale (tuple): range of size of the origin size cropped
|
||||
ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
|
||||
|
||||
Returns:
|
||||
tuple: params (i, j, h, w) to be passed to ``crop`` for a random
|
||||
sized crop.
|
||||
"""
|
||||
for attempt in range(10):
|
||||
area = img.size[0] * img.size[1]
|
||||
target_area = random.uniform(*scale) * area
|
||||
aspect_ratio = random.uniform(*ratio)
|
||||
|
||||
w = int(round(math.sqrt(target_area * aspect_ratio)))
|
||||
h = int(round(math.sqrt(target_area / aspect_ratio)))
|
||||
|
||||
if random.random() < 0.5:
|
||||
w, h = h, w
|
||||
|
||||
if w <= img.size[0] and h <= img.size[1]:
|
||||
i = random.randint(0, img.size[1] - h)
|
||||
j = random.randint(0, img.size[0] - w)
|
||||
return i, j, h, w
|
||||
|
||||
# Fallback
|
||||
w = min(img.size[0], img.size[1])
|
||||
i = (img.size[1] - w) // 2
|
||||
j = (img.size[0] - w) // 2
|
||||
return i, j, w, w
|
||||
|
||||
def __call__(self, img):
|
||||
"""
|
||||
Args:
|
||||
img (PIL Image): Image to be flipped.
|
||||
|
||||
Returns:
|
||||
PIL Image: Randomly cropped and resize image.
|
||||
"""
|
||||
i, j, h, w = self.get_params(img, self.scale, self.ratio)
|
||||
return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
|
||||
|
||||
|
||||
class ColorJitter(object):
|
||||
"""Randomly change the brightness, contrast and saturation of an image.
|
||||
|
||||
Args:
|
||||
brightness (float): How much to jitter brightness. brightness_factor
|
||||
is chosen uniformly from [max(0, 1 - brightness), 1 + brightness].
|
||||
contrast (float): How much to jitter contrast. contrast_factor
|
||||
is chosen uniformly from [max(0, 1 - contrast), 1 + contrast].
|
||||
saturation (float): How much to jitter saturation. saturation_factor
|
||||
is chosen uniformly from [max(0, 1 - saturation), 1 + saturation].
|
||||
hue(float): How much to jitter hue. hue_factor is chosen uniformly from
|
||||
[-hue, hue]. Should be >=0 and <= 0.5.
|
||||
"""
|
||||
def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
|
||||
self.brightness = brightness
|
||||
self.contrast = contrast
|
||||
self.saturation = saturation
|
||||
self.hue = hue
|
||||
|
||||
@staticmethod
|
||||
def get_params(brightness, contrast, saturation, hue):
|
||||
"""Get a randomized transform to be applied on image.
|
||||
|
||||
Arguments are same as that of __init__.
|
||||
|
||||
Returns:
|
||||
Transform which randomly adjusts brightness, contrast and
|
||||
saturation in a random order.
|
||||
"""
|
||||
transforms = []
|
||||
if brightness > 0:
|
||||
brightness_factor = np.random.uniform(max(0, 1 - brightness), 1 + brightness)
|
||||
transforms.append(Lambda(lambda img: F.adjust_brightness(img, brightness_factor)))
|
||||
|
||||
if contrast > 0:
|
||||
contrast_factor = np.random.uniform(max(0, 1 - contrast), 1 + contrast)
|
||||
transforms.append(Lambda(lambda img: F.adjust_contrast(img, contrast_factor)))
|
||||
|
||||
if saturation > 0:
|
||||
saturation_factor = np.random.uniform(max(0, 1 - saturation), 1 + saturation)
|
||||
transforms.append(Lambda(lambda img: F.adjust_saturation(img, saturation_factor)))
|
||||
|
||||
if hue > 0:
|
||||
hue_factor = np.random.uniform(-hue, hue)
|
||||
transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
|
||||
|
||||
np.random.shuffle(transforms)
|
||||
transform = Compose(transforms)
|
||||
|
||||
return transform
|
||||
|
||||
def __call__(self, img):
|
||||
"""
|
||||
Args:
|
||||
img (PIL Image): Input image.
|
||||
|
||||
Returns:
|
||||
PIL Image: Color jittered image.
|
||||
"""
|
||||
transform = self.get_params(self.brightness, self.contrast,
|
||||
self.saturation, self.hue)
|
||||
return transform(img)
|
||||
|
||||
|
||||
class RandomRotation(object):
|
||||
"""Rotate the image by angle.
|
||||
|
||||
Args:
|
||||
degrees (sequence or float or int): Range of degrees to select from.
|
||||
If degrees is a number instead of sequence like (min, max), the range of degrees
|
||||
will be (-degrees, +degrees).
|
||||
resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional):
|
||||
An optional resampling filter.
|
||||
See http://pillow.readthedocs.io/en/3.4.x/handbook/concepts.html#filters
|
||||
If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST.
|
||||
expand (bool, optional): Optional expansion flag.
|
||||
If true, expands the output to make it large enough to hold the entire rotated image.
|
||||
If false or omitted, make the output image the same size as the input image.
|
||||
Note that the expand flag assumes rotation around the center and no translation.
|
||||
center (2-tuple, optional): Optional center of rotation.
|
||||
Origin is the upper left corner.
|
||||
Default is the center of the image.
|
||||
"""
|
||||
|
||||
def __init__(self, degrees, resample=False, expand=False, center=None):
|
||||
if isinstance(degrees, numbers.Number):
|
||||
if degrees < 0:
|
||||
raise ValueError("If degrees is a single number, it must be positive.")
|
||||
self.degrees = (-degrees, degrees)
|
||||
else:
|
||||
if len(degrees) != 2:
|
||||
raise ValueError("If degrees is a sequence, it must be of len 2.")
|
||||
self.degrees = degrees
|
||||
|
||||
self.resample = resample
|
||||
self.expand = expand
|
||||
self.center = center
|
||||
|
||||
@staticmethod
|
||||
def get_params(degrees):
|
||||
"""Get parameters for ``rotate`` for a random rotation.
|
||||
|
||||
Returns:
|
||||
sequence: params to be passed to ``rotate`` for random rotation.
|
||||
"""
|
||||
angle = np.random.uniform(degrees[0], degrees[1])
|
||||
|
||||
return angle
|
||||
|
||||
def __call__(self, img):
|
||||
"""
|
||||
img (PIL Image): Image to be rotated.
|
||||
|
||||
Returns:
|
||||
PIL Image: Rotated image.
|
||||
"""
|
||||
|
||||
angle = self.get_params(self.degrees)
|
||||
|
||||
return F.rotate(img, angle, self.resample, self.expand, self.center)
|
||||
|
||||
|
73
src/default_config.py
Normal file
@ -0,0 +1,73 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 20-6-4 上午9:12
|
||||
# @Author : zhuying
|
||||
# @Company : Minivision
|
||||
# @File : default_config.py
|
||||
# @Software : PyCharm
|
||||
# --*-- coding: utf-8 --*--
|
||||
"""
|
||||
default config for training
|
||||
"""
|
||||
|
||||
import torch
|
||||
from datetime import datetime
|
||||
from easydict import EasyDict
|
||||
from src.utility import make_if_not_exist, get_width_height, get_kernel
|
||||
|
||||
|
||||
def get_default_config():
|
||||
conf = EasyDict()
|
||||
|
||||
# ----------------------training---------------
|
||||
conf.lr = 1e-1
|
||||
# [9, 13, 15]
|
||||
conf.milestones = [10, 15, 22] # down learing rate
|
||||
conf.gamma = 0.1
|
||||
conf.epochs = 25
|
||||
conf.momentum = 0.9
|
||||
conf.batch_size = 1024
|
||||
|
||||
# model
|
||||
conf.num_classes = 3
|
||||
conf.input_channel = 3
|
||||
conf.embedding_size = 128
|
||||
|
||||
# dataset
|
||||
conf.train_root_path = './datasets/rgb_image'
|
||||
|
||||
# save file path
|
||||
conf.snapshot_dir_path = './saved_logs/snapshot'
|
||||
|
||||
# log path
|
||||
conf.log_path = './saved_logs/jobs'
|
||||
# tensorboard
|
||||
conf.board_loss_every = 10
|
||||
# save model/iter
|
||||
conf.save_every = 30
|
||||
|
||||
return conf
|
||||
|
||||
|
||||
def update_config(args, conf):
|
||||
conf.devices = args.devices
|
||||
conf.patch_info = args.patch_info
|
||||
w_input, h_input = get_width_height(args.patch_info)
|
||||
conf.input_size = [h_input, w_input]
|
||||
conf.kernel_size = get_kernel(h_input, w_input)
|
||||
conf.device = "cuda:{}".format(conf.devices[0]) if torch.cuda.is_available() else "cpu"
|
||||
|
||||
# resize fourier image size
|
||||
conf.ft_height = 2*conf.kernel_size[0]
|
||||
conf.ft_width = 2*conf.kernel_size[1]
|
||||
current_time = datetime.now().strftime('%b%d_%H-%M-%S')
|
||||
job_name = 'Anti_Spoofing_{}'.format(args.patch_info)
|
||||
log_path = '{}/{}/{} '.format(conf.log_path, job_name, current_time)
|
||||
snapshot_dir = '{}/{}'.format(conf.snapshot_dir_path, job_name)
|
||||
|
||||
make_if_not_exist(snapshot_dir)
|
||||
make_if_not_exist(log_path)
|
||||
|
||||
conf.model_path = snapshot_dir
|
||||
conf.log_path = log_path
|
||||
conf.job_name = job_name
|
||||
return conf
|
65
src/generate_patches.py
Normal file
@ -0,0 +1,65 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 20-6-9 下午3:06
|
||||
# @Author : zhuying
|
||||
# @Company : Minivision
|
||||
# @File : test.py
|
||||
# @Software : PyCharm
|
||||
"""
|
||||
Create patch from original input image by using bbox coordinate
|
||||
"""
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
class CropImage:
|
||||
@staticmethod
|
||||
def _get_new_box(src_w, src_h, bbox, scale):
|
||||
x = bbox[0]
|
||||
y = bbox[1]
|
||||
box_w = bbox[2]
|
||||
box_h = bbox[3]
|
||||
|
||||
scale = min((src_h-1)/box_h, min((src_w-1)/box_w, scale))
|
||||
|
||||
new_width = box_w * scale
|
||||
new_height = box_h * scale
|
||||
center_x, center_y = box_w/2+x, box_h/2+y
|
||||
|
||||
left_top_x = center_x-new_width/2
|
||||
left_top_y = center_y-new_height/2
|
||||
right_bottom_x = center_x+new_width/2
|
||||
right_bottom_y = center_y+new_height/2
|
||||
|
||||
if left_top_x < 0:
|
||||
right_bottom_x -= left_top_x
|
||||
left_top_x = 0
|
||||
|
||||
if left_top_y < 0:
|
||||
right_bottom_y -= left_top_y
|
||||
left_top_y = 0
|
||||
|
||||
if right_bottom_x > src_w-1:
|
||||
left_top_x -= right_bottom_x-src_w+1
|
||||
right_bottom_x = src_w-1
|
||||
|
||||
if right_bottom_y > src_h-1:
|
||||
left_top_y -= right_bottom_y-src_h+1
|
||||
right_bottom_y = src_h-1
|
||||
|
||||
return int(left_top_x), int(left_top_y),\
|
||||
int(right_bottom_x), int(right_bottom_y)
|
||||
|
||||
def crop(self, org_img, bbox, scale, out_w, out_h, crop=True):
|
||||
|
||||
if not crop:
|
||||
dst_img = cv2.resize(org_img, (out_w, out_h))
|
||||
else:
|
||||
src_h, src_w, _ = np.shape(org_img)
|
||||
left_top_x, left_top_y, \
|
||||
right_bottom_x, right_bottom_y = self._get_new_box(src_w, src_h, bbox, scale)
|
||||
|
||||
img = org_img[left_top_y: right_bottom_y+1,
|
||||
left_top_x: right_bottom_x+1]
|
||||
dst_img = cv2.resize(img, (out_w, out_h))
|
||||
return dst_img
|
296
src/model_lib/MiniFASNet.py
Normal file
@ -0,0 +1,296 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Time : 20-6-3 下午4:45
|
||||
# @Author : zhuying
|
||||
# @Company : Minivision
|
||||
# @File : MiniFASNet.py
|
||||
# @Software : PyCharm
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, \
|
||||
AdaptiveAvgPool2d, Sequential, Module
|
||||
|
||||
|
||||
class L2Norm(Module):
|
||||
def forward(self, input):
|
||||
return F.normalize(input)
|
||||
|
||||
|
||||
class Flatten(Module):
|
||||
def forward(self, input):
|
||||
return input.view(input.size(0), -1)
|
||||
|
||||
|
||||
class Conv_block(Module):
|
||||
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
||||
super(Conv_block, self).__init__()
|
||||
self.conv = Conv2d(in_c, out_c, kernel_size=kernel, groups=groups,
|
||||
stride=stride, padding=padding, bias=False)
|
||||
self.bn = BatchNorm2d(out_c)
|
||||
self.prelu = PReLU(out_c)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.prelu(x)
|
||||
return x
|
||||
|
||||
|
||||
class Linear_block(Module):
|
||||
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
||||
super(Linear_block, self).__init__()
|
||||
self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel,
|
||||
groups=groups, stride=stride, padding=padding, bias=False)
|
||||
self.bn = BatchNorm2d(out_c)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class Depth_Wise(Module):
|
||||
def __init__(self, c1, c2, c3, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
|
||||
super(Depth_Wise, self).__init__()
|
||||
c1_in, c1_out = c1
|
||||
c2_in, c2_out = c2
|
||||
c3_in, c3_out = c3
|
||||
self.conv = Conv_block(c1_in, out_c=c1_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||
self.conv_dw = Conv_block(c2_in, c2_out, groups=c2_in, kernel=kernel, padding=padding, stride=stride)
|
||||
self.project = Linear_block(c3_in, c3_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||
self.residual = residual
|
||||
|
||||
def forward(self, x):
|
||||
if self.residual:
|
||||
short_cut = x
|
||||
x = self.conv(x)
|
||||
x = self.conv_dw(x)
|
||||
x = self.project(x)
|
||||
if self.residual:
|
||||
output = short_cut + x
|
||||
else:
|
||||
output = x
|
||||
return output
|
||||
|
||||
|
||||
class Residual(Module):
|
||||
def __init__(self, c1, c2, c3, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
|
||||
super(Residual, self).__init__()
|
||||
modules = []
|
||||
for i in range(num_block):
|
||||
c1_tuple = c1[i]
|
||||
c2_tuple = c2[i]
|
||||
c3_tuple = c3[i]
|
||||
modules.append(Depth_Wise(c1_tuple, c2_tuple, c3_tuple, residual=True,
|
||||
kernel=kernel, padding=padding, stride=stride, groups=groups))
|
||||
self.model = Sequential(*modules)
|
||||
|
||||
def forward(self, x):
|
||||
return self.model(x)
|
||||
|
||||
|
||||
class SEModule(Module):
|
||||
def __init__(self, channels, reduction):
|
||||
super(SEModule, self).__init__()
|
||||
self.avg_pool = AdaptiveAvgPool2d(1)
|
||||
self.fc1 = Conv2d(
|
||||
channels, channels // reduction, kernel_size=1, padding=0, bias=False)
|
||||
self.bn1 = BatchNorm2d(channels // reduction)
|
||||
self.relu = ReLU(inplace=True)
|
||||
self.fc2 = Conv2d(
|
||||
channels // reduction, channels, kernel_size=1, padding=0, bias=False)
|
||||
self.bn2 = BatchNorm2d(channels)
|
||||
self.sigmoid = Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
module_input = x
|
||||
x = self.avg_pool(x)
|
||||
x = self.fc1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.fc2(x)
|
||||
x = self.bn2(x)
|
||||
x = self.sigmoid(x)
|
||||
return module_input * x
|
||||
|
||||
|
||||
class ResidualSE(Module):
|
||||
def __init__(self, c1, c2, c3, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1), se_reduct=4):
|
||||
super(ResidualSE, self).__init__()
|
||||
modules = []
|
||||
for i in range(num_block):
|
||||
c1_tuple = c1[i]
|
||||
c2_tuple = c2[i]
|
||||
c3_tuple = c3[i]
|
||||
if i == num_block-1:
|
||||
modules.append(
|
||||
Depth_Wise_SE(c1_tuple, c2_tuple, c3_tuple, residual=True, kernel=kernel, padding=padding, stride=stride,
|
||||
groups=groups, se_reduct=se_reduct))
|
||||
else:
|
||||
modules.append(Depth_Wise(c1_tuple, c2_tuple, c3_tuple, residual=True, kernel=kernel, padding=padding,
|
||||
stride=stride, groups=groups))
|
||||
self.model = Sequential(*modules)
|
||||
|
||||
def forward(self, x):
|
||||
return self.model(x)
|
||||
|
||||
|
||||
class Depth_Wise_SE(Module):
|
||||
def __init__(self, c1, c2, c3, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1, se_reduct=8):
|
||||
super(Depth_Wise_SE, self).__init__()
|
||||
c1_in, c1_out = c1
|
||||
c2_in, c2_out = c2
|
||||
c3_in, c3_out = c3
|
||||
self.conv = Conv_block(c1_in, out_c=c1_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||
self.conv_dw = Conv_block(c2_in, c2_out, groups=c2_in, kernel=kernel, padding=padding, stride=stride)
|
||||
self.project = Linear_block(c3_in, c3_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||
self.residual = residual
|
||||
self.se_module = SEModule(c3_out, se_reduct)
|
||||
|
||||
def forward(self, x):
|
||||
if self.residual:
|
||||
short_cut = x
|
||||
x = self.conv(x)
|
||||
x = self.conv_dw(x)
|
||||
x = self.project(x)
|
||||
if self.residual:
|
||||
x = self.se_module(x)
|
||||
output = short_cut + x
|
||||
else:
|
||||
output = x
|
||||
return output
|
||||
|
||||
|
||||
class MiniFASNet(Module):
|
||||
def __init__(self, keep, embedding_size, conv6_kernel=(7, 7),
|
||||
drop_p=0.0, num_classes=3, img_channel=3):
|
||||
super(MiniFASNet, self).__init__()
|
||||
self.embedding_size = embedding_size
|
||||
|
||||
self.conv1 = Conv_block(img_channel, keep[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1))
|
||||
self.conv2_dw = Conv_block(keep[0], keep[1], kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=keep[1])
|
||||
|
||||
c1 = [(keep[1], keep[2])]
|
||||
c2 = [(keep[2], keep[3])]
|
||||
c3 = [(keep[3], keep[4])]
|
||||
|
||||
self.conv_23 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[3])
|
||||
|
||||
c1 = [(keep[4], keep[5]), (keep[7], keep[8]), (keep[10], keep[11]), (keep[13], keep[14])]
|
||||
c2 = [(keep[5], keep[6]), (keep[8], keep[9]), (keep[11], keep[12]), (keep[14], keep[15])]
|
||||
c3 = [(keep[6], keep[7]), (keep[9], keep[10]), (keep[12], keep[13]), (keep[15], keep[16])]
|
||||
|
||||
self.conv_3 = Residual(c1, c2, c3, num_block=4, groups=keep[4], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||
|
||||
c1 = [(keep[16], keep[17])]
|
||||
c2 = [(keep[17], keep[18])]
|
||||
c3 = [(keep[18], keep[19])]
|
||||
|
||||
self.conv_34 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[19])
|
||||
|
||||
c1 = [(keep[19], keep[20]), (keep[22], keep[23]), (keep[25], keep[26]), (keep[28], keep[29]),
|
||||
(keep[31], keep[32]), (keep[34], keep[35])]
|
||||
c2 = [(keep[20], keep[21]), (keep[23], keep[24]), (keep[26], keep[27]), (keep[29], keep[30]),
|
||||
(keep[32], keep[33]), (keep[35], keep[36])]
|
||||
c3 = [(keep[21], keep[22]), (keep[24], keep[25]), (keep[27], keep[28]), (keep[30], keep[31]),
|
||||
(keep[33], keep[34]), (keep[36], keep[37])]
|
||||
|
||||
self.conv_4 = Residual(c1, c2, c3, num_block=6, groups=keep[19], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||
|
||||
c1 = [(keep[37], keep[38])]
|
||||
c2 = [(keep[38], keep[39])]
|
||||
c3 = [(keep[39], keep[40])]
|
||||
|
||||
self.conv_45 = Depth_Wise(c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[40])
|
||||
|
||||
c1 = [(keep[40], keep[41]), (keep[43], keep[44])]
|
||||
c2 = [(keep[41], keep[42]), (keep[44], keep[45])]
|
||||
c3 = [(keep[42], keep[43]), (keep[45], keep[46])]
|
||||
|
||||
self.conv_5 = Residual(c1, c2, c3, num_block=2, groups=keep[40], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||
self.conv_6_sep = Conv_block(keep[46], keep[47], kernel=(1, 1), stride=(1, 1), padding=(0, 0))
|
||||
self.conv_6_dw = Linear_block(keep[47], keep[48], groups=keep[48], kernel=conv6_kernel, stride=(1, 1), padding=(0, 0))
|
||||
self.conv_6_flatten = Flatten()
|
||||
self.linear = Linear(512, embedding_size, bias=False)
|
||||
self.bn = BatchNorm1d(embedding_size)
|
||||
self.drop = torch.nn.Dropout(p=drop_p)
|
||||
self.prob = Linear(embedding_size, num_classes, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(x)
|
||||
out = self.conv2_dw(out)
|
||||
out = self.conv_23(out)
|
||||
out = self.conv_3(out)
|
||||
out = self.conv_34(out)
|
||||
out = self.conv_4(out)
|
||||
out = self.conv_45(out)
|
||||
out = self.conv_5(out)
|
||||
out = self.conv_6_sep(out)
|
||||
out = self.conv_6_dw(out)
|
||||
out = self.conv_6_flatten(out)
|
||||
if self.embedding_size != 512:
|
||||
out = self.linear(out)
|
||||
out = self.bn(out)
|
||||
out = self.drop(out)
|
||||
out = self.prob(out)
|
||||
return out
|
||||
|
||||
|
||||
class MiniFASNetSE(MiniFASNet):
|
||||
def __init__(self, keep, embedding_size, conv6_kernel=(7, 7),drop_p=0.75, num_classes=4, img_channel=3):
|
||||
super(MiniFASNetSE, self).__init__(keep=keep, embedding_size=embedding_size, conv6_kernel=conv6_kernel,
|
||||
drop_p=drop_p, num_classes=num_classes, img_channel=img_channel)
|
||||
|
||||
c1 = [(keep[4], keep[5]), (keep[7], keep[8]), (keep[10], keep[11]), (keep[13], keep[14])]
|
||||
c2 = [(keep[5], keep[6]), (keep[8], keep[9]), (keep[11], keep[12]), (keep[14], keep[15])]
|
||||
c3 = [(keep[6], keep[7]), (keep[9], keep[10]), (keep[12], keep[13]), (keep[15], keep[16])]
|
||||
|
||||
self.conv_3 = ResidualSE(c1, c2, c3, num_block=4, groups=keep[4], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||
|
||||
c1 = [(keep[19], keep[20]), (keep[22], keep[23]), (keep[25], keep[26]), (keep[28], keep[29]),
|
||||
(keep[31], keep[32]), (keep[34], keep[35])]
|
||||
c2 = [(keep[20], keep[21]), (keep[23], keep[24]), (keep[26], keep[27]), (keep[29], keep[30]),
|
||||
(keep[32], keep[33]), (keep[35], keep[36])]
|
||||
c3 = [(keep[21], keep[22]), (keep[24], keep[25]), (keep[27], keep[28]), (keep[30], keep[31]),
|
||||
(keep[33], keep[34]), (keep[36], keep[37])]
|
||||
|
||||
self.conv_4 = ResidualSE(c1, c2, c3, num_block=6, groups=keep[19], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||
|
||||
c1 = [(keep[40], keep[41]), (keep[43], keep[44])]
|
||||
c2 = [(keep[41], keep[42]), (keep[44], keep[45])]
|
||||
c3 = [(keep[42], keep[43]), (keep[45], keep[46])]
|
||||
self.conv_5 = ResidualSE(c1, c2, c3, num_block=2, groups=keep[40], kernel=(3, 3), stride=(1, 1), padding=(1, 1))
|
||||
|
||||
|
||||
|
||||
keep_dict = {'1.8M': [32, 32, 103, 103, 64, 13, 13, 64, 26, 26,
|
||||
64, 13, 13, 64, 52, 52, 64, 231, 231, 128,
|
||||
154, 154, 128, 52, 52, 128, 26, 26, 128, 52,
|
||||
52, 128, 26, 26, 128, 26, 26, 128, 308, 308,
|
||||
128, 26, 26, 128, 26, 26, 128, 512, 512],
|
||||
|
||||
'1.8M_': [32, 32, 103, 103, 64, 13, 13, 64, 13, 13, 64, 13,
|
||||
13, 64, 13, 13, 64, 231, 231, 128, 231, 231, 128, 52,
|
||||
52, 128, 26, 26, 128, 77, 77, 128, 26, 26, 128, 26, 26,
|
||||
128, 308, 308, 128, 26, 26, 128, 26, 26, 128, 512, 512]
|
||||
}
|
||||
|
||||
|
||||
# (80x80) flops: 0.044, params: 0.41
|
||||
def MiniFASNetV1(embedding_size=128, conv6_kernel=(7, 7),
|
||||
drop_p=0.2, num_classes=3, img_channel=3):
|
||||
return MiniFASNet(keep_dict['1.8M'], embedding_size, conv6_kernel, drop_p, num_classes, img_channel)
|
||||
|
||||
|
||||
# (80x80) flops: 0.044, params: 0.43
|
||||
def MiniFASNetV2(embedding_size=128, conv6_kernel=(7, 7),
|
||||
drop_p=0.2, num_classes=3, img_channel=3):
|
||||
return MiniFASNet(keep_dict['1.8M_'], embedding_size, conv6_kernel, drop_p, num_classes, img_channel)
|
||||
|
||||
def MiniFASNetV1SE(embedding_size=128, conv6_kernel=(7, 7),
|
||||
drop_p=0.75, num_classes=3, img_channel=3):
|
||||
return MiniFASNetSE(keep_dict['1.8M'], embedding_size, conv6_kernel,drop_p, num_classes, img_channel)
|
||||
|
||||
# (80x80) flops: 0.044, params: 0.43
|
||||
def MiniFASNetV2SE(embedding_size=128, conv6_kernel=(7, 7),
|
||||
drop_p=0.75, num_classes=4, img_channel=3):
|
||||
return MiniFASNetSE(keep_dict['1.8M_'], embedding_size, conv6_kernel,drop_p, num_classes, img_channel)
|