-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcleaning_with_face_recognition.py
107 lines (82 loc) · 3.39 KB
/
cleaning_with_face_recognition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import glob
import random
import threading
import time
import cv2
from sklearn.cluster import AgglomerativeClustering
import numpy as np
import face_recognition
from collections import Counter
from PIL import Image
import config
from utils import display
random.seed(1)
"""
bvzjkezkms.mp4 - is bad: Counter({1: 9, 0: 7})
bsqgziaylx.mp4 - two faces, 3 clusters: Counter({0: 16, 1: 15, 2: 1})
"""
def get_encoding(p_img):
img = face_recognition.load_image_file(p_img)
h1, w1, _ = img.shape
encoding = face_recognition.face_encodings(img, known_face_locations=[[0, w1, h1, 0]], model='large')[0]
return encoding
def tmp_show_distance():
for i in range(1, len(paths)):
for j in range(0, len(paths)):
t = time.time()
img1 = face_recognition.load_image_file(paths[i])
img2 = face_recognition.load_image_file(paths[j])
print(f"Face load time: {time.time() - t}")
h1, w1, _ = img1.shape
h2, w2, _ = img2.shape
t = time.time()
encoding1 = face_recognition.face_encodings(img1, known_face_locations=[[0, w1, h1, 0]], model='large')[0]
encoding2 = face_recognition.face_encodings(img2, known_face_locations=[[0, w1, h1, 0]], model='large')[0]
print(f'Encoding time: {time.time() - t}')
t = time.time()
distance = face_recognition.face_distance([encoding1], encoding2)
print(f'Distance{time.time() - t}')
print(f"Distance between : {os.path.basename(paths[i])} and {os.path.basename(paths[j])} is {distance}")
display([img1, img2])
def cluster(paths_to_images):
encodings = [get_encoding(p) for p in paths_to_images]
clustering = AgglomerativeClustering(n_clusters=None, distance_threshold=0.55, linkage='average').fit(encodings)
threading.Thread(target=save_clusters, args=(clustering.labels_, paths_to_images)).start()
def save_clusters(cluster_labels, paths_to_images):
clusters = {}
name = f'{os.path.basename(paths_to_images[0])}'
max_w, max_h = 0, 0
images = []
for p in paths_to_images:
img = cv2.imread(p)
h, w, c = img.shape
if w > max_w:
max_w = w
if h > max_h:
max_h = h
images.append(img)
empty_image = np.ones((max_h+1, max_w+1, 3), dtype='uint8') * 127
for label, img in zip(cluster_labels, images):
if clusters.get(label, None) is None:
clusters[label] = []
template = empty_image.copy()
h, w, _ = img.shape
template[:h, :w] = img
clusters[label].append(template)
max_size = max([len(l) for l in clusters.values()])
rows = []
for k, v in clusters.items():
if len(v) != max_size:
v.extend([empty_image for _ in range(max_size - len(v))])
rows.append(np.hstack(v))
cv2.imwrite(f'{config.DIR_CLUSTERS}/{name}', np.vstack(rows))
if __name__ == '__main__':
# folders = ['bvzjkezkms.mp4', 'bsqgziaylx.mp4'] + [random.choice(os.listdir('faces')) for _ in range(5)]
folders = os.listdir(config.DIR_FACE_IMAGES)
for i, folder in enumerate(folders):
if folder.endswith('mp4'):
if i % 100 == 0:
print(f'{i+1}/{len(folders)} processed')
paths = [p for p in glob.glob(f'{config.DIR_FACE_IMAGES}/{folder}/*.jpg') if not p.endswith('mean_face.jpg')]
cluster(paths)