├── DownloadPhotosToJpg_2.py
├── FindFaceInEmbeddingsBin_6.py
├── FindIntoNpy_4.py
├── JpgToNpy_3.py
├── NpyToEmbeddingsBin_5.py
├── README.md
├── VkIdsParser_1.py
├── dlib_face_recognition_resnet_model_v1.dat
├── jpg
    └── README.MD
├── nofaces
    └── README.MD
└── npy
    └── README.MD


/DownloadPhotosToJpg_2.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | import os
 3 | import requests
 4 | import codecs
 5 | import vk_api
 6 | 
 7 | 
 8 | def load_file(name, url):
 9 |     if not os.path.exists('jpg/' + str(name) + '.jpg'):
10 |         r = requests.get(url, stream=True)
11 |         if r.status_code == 200:
12 |             with open('jpg/' + str(name) + '.jpg', 'wb') as f:
13 |                 r.raw.decode_content = True
14 |                 shutil.copyfileobj(r.raw, f)
15 | 
16 | 
17 | def get_photos_by_id(user_id_):
18 |     try:
19 |         request_result = vk.photos.getAll(owner_id=user_id_,
20 |                                           count=50,
21 |                                           no_service_albums=0)
22 |         prev = ''
23 |         flag = 0
24 |         photos = []
25 |         for item in request_result['items']:
26 |             for size in item['sizes']:
27 |                 url_ = str(size['url'])
28 |                 mas_ = url_.split('/')
29 |                 ident = mas_[4]
30 |                 if prev != ident:
31 |                     prev = ident
32 |                     flag = 0
33 |                 else:
34 |                     flag += 1
35 |                     if flag == 3:
36 |                         photos.append(url_)
37 |         max_flag = 0
38 |         for photo in photos:
39 |             max_flag += 1
40 |             if max_flag < 10:
41 |                 load_file(str(user_id_) + '_' + str(max_flag), photo)
42 |     except Exception as ex:
43 |         print(ex)
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     vk_session = vk_api.VkApi('логин ВК', 'Пароль ВК')
48 |     vk_session.auth()
49 | 
50 |     vk = vk_session.get_api()
51 | 
52 |     ff = codecs.open(u'ids.txt', 'r', encoding='utf8')
53 | 
54 |     e = 0
55 |     for x in ff:
56 |         e += 1
57 |         print(e)
58 |         mas = x.split('|')
59 |         user_id = int(mas[0])
60 |         get_photos_by_id(user_id)
61 | 
62 |     ff.close()
63 | 


--------------------------------------------------------------------------------
/FindFaceInEmbeddingsBin_6.py:
--------------------------------------------------------------------------------
 1 | import dlib
 2 | import numpy as np
 3 | from skimage import io
 4 | import nmslib
 5 | 
 6 | 
 7 | def get_face_descriptor(filename):
 8 |     img = io.imread(filename)
 9 |     win1 = dlib.image_window()
10 |     win1.clear_overlay()
11 |     win1.set_image(img)
12 |     face_descriptor = None
13 |     shape = None
14 |     detected_faces = detector(img, 1)
15 |     for k, d in enumerate(detected_faces):
16 |         shape = sp(img, d)
17 |         win1.clear_overlay()
18 |         win1.add_overlay(d)
19 |         win1.add_overlay(shape)
20 |     try:
21 |         face_descriptor = face_rec.compute_face_descriptor(img, shape)
22 |         face_descriptor = np.asarray(face_descriptor)
23 |     except Exception as ex:
24 |         print(ex)
25 | 
26 |     return face_descriptor
27 | 
28 | 
29 | def print_id(n):
30 |     best_dx = ids[n]
31 |     s = ''
32 |     with open('associations.txt', 'r') as file_:
33 |         for line in file_:
34 |             w = str(best_dx) + '|'
35 |             if line.find(w) == 0:
36 |                 s = line.split('|')[1]
37 |     s = 'https://vk.com/id' + s.split('_')[0]
38 |     for bad_symbols in ['.txt', '.npy', '\n']:
39 |         s = s.replace(bad_symbols, '')
40 |     print(s)
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     sp = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
45 |     face_rec = dlib.face_recognition_model_v1(
46 |         'dlib_face_recognition_resnet_model_v1.dat')
47 |     detector = dlib.get_frontal_face_detector()
48 |     index = nmslib.init(method='hnsw', space='l2',
49 |                         data_type=nmslib.DataType.DENSE_VECTOR)
50 |     index.loadIndex('embeddings.bin')
51 | 
52 |     query_time_params = {'efSearch': 400}
53 |     index.setQueryTimeParams(query_time_params)
54 | 
55 |     embedding = get_face_descriptor('1.jpg')
56 | 
57 |     ids, dists = index.knnQuery(embedding, k=5)
58 |     print_id(0)
59 |     print_id(1)
60 |     print_id(2)
61 |     print_id(3)
62 |     print_id(4)
63 | 


--------------------------------------------------------------------------------
/FindIntoNpy_4.py:
--------------------------------------------------------------------------------
 1 | import dlib
 2 | import os
 3 | import numpy as np
 4 | from skimage import io
 5 | from scipy.spatial import distance
 6 | 
 7 | 
 8 | def get_face_descriptor(filename):
 9 |     img = io.imread(filename)
10 |     win1 = dlib.image_window()
11 |     win1.clear_overlay()
12 |     win1.set_image(img)
13 |     detected_faces = detector(img, 1)
14 |     shape = None
15 |     face_descriptor = None
16 |     for k, d in enumerate(detected_faces):
17 |         shape = sp(img, d)
18 |         win1.clear_overlay()
19 |         win1.add_overlay(d)
20 |         win1.add_overlay(shape)
21 |     try:
22 |         face_descriptor = face_rec.compute_face_descriptor(img, shape)
23 |     except Exception as ex:
24 |         print(ex)
25 |     return face_descriptor
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     sp = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
30 |     face_rec = dlib.face_recognition_model_v1(
31 |             'dlib_face_recognition_resnet_model_v1.dat')
32 |     detector = dlib.get_frontal_face_detector()
33 | 
34 |     min_distance_file = ''
35 |     min_distance = 2
36 |     files = os.listdir('npy')
37 |     f1 = get_face_descriptor('1.jpg')
38 | 
39 |     for file_ in files:
40 |         file_name = 'npy/' + file_
41 |         if os.path.exists(file_name):
42 |             f2 = np.load(file_name)
43 |             euc_distance = distance.euclidean(f1, f2)
44 |             if euc_distance < min_distance:
45 |                 min_distance = euc_distance
46 |                 min_distance_file = file_
47 |     min_distance_file = min_distance_file.split('_')[0]
48 |     print('https://vk.com/id' + min_distance_file.replace('.npy', ''))
49 |     print('Result: ' + str(min_distance) + ' (< 0,52 = Win!)')
50 | 


--------------------------------------------------------------------------------
/JpgToNpy_3.py:
--------------------------------------------------------------------------------
 1 | import dlib
 2 | import os
 3 | import numpy as np
 4 | from skimage import io
 5 | 
 6 | 
 7 | sp = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
 8 | face_rec = \
 9 |     dlib.face_recognition_model_v1('dlib_face_recognition_resnet_model_v1.dat')
10 | detector = dlib.get_frontal_face_detector()
11 | 
12 | 
13 | def get_face_descriptor(x):
14 |     img = io.imread('jpg/'+x)
15 |     detected_faces = detector(img, 1)
16 |     q = 0
17 |     for k, d in enumerate(detected_faces):
18 |         shape = sp(img, d)
19 |         try:
20 |             q += 1
21 |             f = face_rec.compute_face_descriptor(img, shape)
22 |             mas = np.array(f)
23 |             file_name_ = 'npy/' + x.replace('.jpg', '')
24 |             np.save(file_name_ + '_' + str(q), mas)
25 |         except Exception as ex:
26 |             print(ex)
27 | 
28 | 
29 | files = os.listdir('jpg')
30 | z = 0
31 | for x in files: 
32 |     z += 1
33 |     file_name = 'npy/' + (x.replace('.jpg', ''))
34 |     if not os.path.exists(file_name + '_1.npy'):
35 |         print(z)
36 |         get_face_descriptor(x)
37 | 


--------------------------------------------------------------------------------
/NpyToEmbeddingsBin_5.py:
--------------------------------------------------------------------------------
 1 | import dlib
 2 | import os
 3 | import numpy as np
 4 | import nmslib
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 |     index = nmslib.init(method='hnsw',
 9 |                         space='l2',
10 |                         data_type=nmslib.DataType.DENSE_VECTOR)
11 | 
12 |     files = os.listdir('npy')
13 | 
14 |     es = []
15 |     e = 0
16 | 
17 |     with open('associations.txt', 'w') as embedding_file:
18 | 
19 |         for file in files:
20 |             e += 1
21 |             name, _ = os.path.splitext(file)
22 |             embedding = np.load('npy/' + file)
23 |             embedding_file.write(str(e) + '|' + file + '\n')
24 |             index.addDataPoint(e, embedding)
25 | 
26 |         index_time_params = {
27 |             'indexThreadQty': 4,
28 |             'skip_optimized_index': 0,
29 |             'post': 2,
30 |             'delaunay_type': 1,
31 |             'M': 100,
32 |             'efConstruction': 2000
33 |         }
34 | 
35 |         index.createIndex(index_time_params, print_progress=True)
36 |         index.saveIndex('embeddings.bin')
37 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Аналог Findface для небольших городов
 2 | 
 3 | Система состоит из нескольких скриптов - скачивает фото юзеров из профилей ВК в рамках одного указанного города, и создает базу биометрических данных этих лиц, связывая их с аккаунтами ВК.
 4 | 
 5 | Скрипты протестированы на Parrot OS и требуют установки модулей Python которые есть в разделах import у скриптов
 6 | 
 7 | Для того чтобы скрипты заработали клонируйте этот репозиторий и удалите из подкаталогов файлы README.MD - иначе они будут мешать работе скриптов
 8 | 
 9 | Скачайте этот файл https://cloud.mail.ru/public/2KGj/2pWSDbXZt и бросьте в папку с проектом
10 | 
11 | 1) VkIdsParser_1.py - впишите в скрипт ваш логин пароль ВК (с выключенной двухфакторной авторизацией)
12 | 
13 | Также поменяйте параметры аккаунтов которые надо парсить - пол, возраст, город
14 | 
15 | По итогам работы скрипта вы получите файлик ids.txt со списком ID ВК
16 | 
17 | 
18 | 2) DownloadPhotosToJpg_2.py - впишите в скрипт ваш логин пароль ВК (с выключенной двухфакторной авторизацией)
19 | 
20 | скрипт берет данные из списка ids.txt и скачивает в папку jpg по нескольку фото с каждого аккаунта 
21 | 
22 | Имена файлов ссответствуют id ВК
23 | 
24 | 3) JpgToNpy_3.py - Производит процесс сбора биометрических параметров лиц с фото в папке jpg и сохраняет их в паку npy в виде мелких файликов
25 | 
26 | При больших объемах фоток скрипт может работать достаточно долго (часы, дни)
27 | 
28 | Тем не менее его можно выключить в любой момент а при следующем запуске он начнет с того места где остановился
29 | 
30 | 4) FindIntoNpy_4.py - скрипт пытается найти среди файлов .npy в папке npy биометрию схожую с 1.jpg
31 | 
32 | Файл 1.jpg с искомым лицом нужно предварительно поместить в корневую директорию проекта
33 | 
34 | Данный скрипт ищет очень медленно из-за того что ему нужно перебрать все файлы .npy
35 | 
36 | Не рекомендуется использовать данный способ для поиска - только для проверки корректности создания .npy
37 | 
38 | 5) NpyToEmbeddingsBin_5.py - скрипт берет все файлы .npy из папки npy и создает на их основе файл embeddings.bin
39 | 
40 | Созданный файл по сути содержит биометрию всех ранее распознанных лиц и все файлы с папок jpg и npy после его создания уже не нужны
41 | 
42 | Помимо этого создается файл associations.txt в котором хранятся соответствия номеру записи в эмбединге и имени файла .npy из которого она была взята
43 | 
44 | 6) FinfFaceInEmbeddingsBin_6.py - пытается найти в базе лицо схожее с лицом на 1.jpg
45 | 
46 | Файл 1.jpg с искомым лицом нужно предварительно поместить в корневую директорию проекта
47 | 
48 | В отличии от скрипта номер 4 работает очень быстро. 
49 | 


--------------------------------------------------------------------------------
/VkIdsParser_1.py:
--------------------------------------------------------------------------------
 1 | import vk_api
 2 | import time
 3 | import codecs
 4 | 
 5 | 
 6 | if __name__ == '__main__':
 7 |     # Заходим ВКонтакте под своим логином
 8 |     vk_session = vk_api.VkApi('логин ВК', 'Пароль ВК')
 9 |     vk_session.auth()
10 |     vk = vk_session.get_api()
11 | 
12 |     # Пишем возраст от и до людей которых надо спарсить
13 |     age = 18
14 |     age_max = 30
15 | 
16 |     # Номер города
17 |     city_number = 104
18 | 
19 |     # 1 - девушки, 2 - парни
20 |     gender = 1
21 | 
22 |     # Открываем файл для записи результатов
23 |     ff = codecs.open('ids.txt', 'w', encoding='utf8')
24 | 
25 |     # Перебор возрастов
26 |     while age <= age_max:
27 |         month = 1
28 |         # Перебор месяцев рождения
29 |         while month <= 12:
30 |             # Пауза для API
31 |             time.sleep(4)
32 |             # Пишем какую группу людей качаем
33 |             print('Download ID: ' + str(age) + ' age, born in ' + str(month))
34 |             # Получаем 1000 юзеров - их ФИО, айди, и фотку
35 |             z = vk.users.search(count=1000,
36 |                                 fields='id, photo_max_orig, has_photo, '
37 |                                        'first_name, last_name',
38 |                                 city=city_number,
39 |                                 sex=gender,
40 |                                 age_from=age,
41 |                                 age_to=age,
42 |                                 birth_month=month)
43 |             month = month + 1
44 |             print('Peoples count: ' + str(z['count']))
45 |             for x in z['items']:
46 |                 if x['has_photo'] == 1:
47 |                     # Записываем данные о юзере в файл разделяя черточкой |
48 |                     s = str(x['id']) + '|' + str(x['photo_max_orig']) + '|' + str(
49 |                         x['first_name']) + ' ' + str(x['last_name']) + '\n'
50 |                     ff.write(s)
51 |         age = age + 1
52 | 
53 |     ff.close()
54 |     print('Done!')
55 | 


--------------------------------------------------------------------------------
/dlib_face_recognition_resnet_model_v1.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/solkogan/searchface/7e00d3dc44f9941e4cffe188b1ec09564ca0164b/dlib_face_recognition_resnet_model_v1.dat


--------------------------------------------------------------------------------
/jpg/README.MD:
--------------------------------------------------------------------------------
1 | В этой папке должны лежать фото с лицами - типа база лиц
2 | 
3 | Прежде чем запускать скрипты удалите с этой папке файл README.MD
4 | 


--------------------------------------------------------------------------------
/nofaces/README.MD:
--------------------------------------------------------------------------------
1 | В эту папку попадут фото на которых система не нашла лиц
2 | 


--------------------------------------------------------------------------------
/npy/README.MD:
--------------------------------------------------------------------------------
1 | 
2 | В этой папке появятся файлы .npy хранящие биометрию лиц
3 | Прежде чем запускать скрипты удалите с этой папки файл README.MD
4 | 


--------------------------------------------------------------------------------