├── Tools ├── market_change.py ├── ATRW_test.py └── split_train_test.py └── README.md /Tools/market_change.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | 5 | directory = '' 6 | new_name_format = '{:>4}_c{}s{}_{}' 7 | 8 | 9 | for filename in os.listdir(directory): 10 | if filename.endswith(".jpg") and "_" in filename: 11 | id, eid = filename.split("_")[:2] 12 | 13 | new_name = new_name_format.format(id, random.randint(1, 6), random.randint(1, 6), eid) 14 | 15 | old_path = os.path.join(directory, filename) 16 | new_path = os.path.join(directory, new_name) 17 | 18 | os.rename(old_path, new_path) -------------------------------------------------------------------------------- /Tools/ATRW_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import shutil 4 | 5 | json_file_path = "gt_test_plain.json" 6 | image_folder_path = "" 7 | output_folder_path = "" 8 | 9 | os.makedirs(output_folder_path, exist_ok=True) 10 | 11 | with open(json_file_path, "r") as file: 12 | data = json.load(file) 13 | 14 | for item in data: 15 | entityid = int(item["entityid"]) 16 | imgid = int(item["imgid"]) 17 | query = item["query"] 18 | if query == "multi": 19 | img_files = [file_name for file_name in os.listdir(image_folder_path) if file_name.endswith(".jpg")] 20 | matching_files = [file_name for file_name in img_files if int(os.path.splitext(file_name)[0]) == imgid] 21 | 22 | if len(matching_files) > 0: 23 | old_file_name = os.path.join(image_folder_path, matching_files[0]) 24 | new_file_name = os.path.join(output_folder_path, f"{entityid}_{imgid}.jpg") 25 | 26 | shutil.copy2(old_file_name, new_file_name) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Wildlife-dataset 2 | | Dataset | Dataset Download | Annotations | 3 | | ---- | ---- | ---- | 4 | | iPanda-50 | [iPanda-50](https://github.com/iPandaDateset/iPanda-50) | [label link](https://drive.google.com/drive/folders/1jhk8qgyWMbL1Ykd_GlAjh2Vn2e_wMJmc?usp=sharing) | 5 | | ATRW(Amur Tiger Re-identification) | [ATRW](https://www.kaggle.com/datasets/quadeer15sh/amur-tiger-reidentification) | [label link](https://drive.google.com/drive/folders/1HlFVl5SPcKFWElo9cwq7eTyL1qwEeSSD?usp=sharing) | 6 | | ELPephants | [ELPephants](https://cornell.app.box.com/s/qh9clpzm5e2vgsjmcaca0kqasj2vt1f6.)| [label link](https://drive.google.com/drive/folders/ELPephants) | 7 | | SealID | [SealID](https://etsin.fairdata.fi/dataset/22b5191e-f24b-4457-93d3-95797c900fc0.)| [label link](https://drive.google.com/drive/folders/SealID) | 8 | | GZGC-G | [ GZGC-G](https://lila.science/datasets/great-zebra-giraffe-id.)| [label link](https://drive.google.com/drive/folders/GZGC-G) | 9 | | GZGC-Z | [ GZGC-Z](https://lila.science/datasets/great-zebra-giraffe-id.)| [label link](https://drive.google.com/drive/folders/GZGC-Z) | 10 | | LeopardID | [ LeopardID](https://lila.science/datasets/leopard-id-2022/.)| [label link](https://drive.google.com/drive/folders/LeopardID) | 11 | 12 | 13 | ``` 14 | Wildlife dataset 15 | ├─Tools 16 | | (tools to process datasets, changing datasets formats to Market1501 format) 17 | ├─README.md 18 | ``` 19 | 20 | # Tools 21 | Run **ATRW_test.py** to process the ATRW test set data format. You need to first download the ATRW test set and the gt_test_plain.json 22 | Run **market_change.py** to change the format to Market1501 format 23 | Run **split_train_test.py** to split the dataset to train_set and test_set 24 | 25 | 26 | # Our project will be constantly updated 27 | -------------------------------------------------------------------------------- /Tools/split_train_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import shutil 4 | from collections import defaultdict 5 | 6 | def split_dataset_by_id(dataset_folder, train_folder, test_folder, train_ratio=0.7): 7 | 8 | image_files = [f for f in os.listdir(dataset_folder) if f.endswith('.jpg')] 9 | 10 | 11 | id_groups = defaultdict(list) 12 | for image_file in image_files: 13 | id_part = image_file.split('_')[0] 14 | id_groups[id_part].append(image_file) 15 | 16 | 17 | id_list = list(id_groups.keys()) 18 | random.shuffle(id_list) 19 | split_index = int(len(id_list) * train_ratio) 20 | train_ids = id_list[:split_index] 21 | test_ids = id_list[split_index:] 22 | 23 | 24 | train_images = set() 25 | test_images = set() 26 | 27 | for id_part in train_ids: 28 | train_images.update(id_groups[id_part]) 29 | 30 | for id_part in test_ids: 31 | test_images.update(id_groups[id_part]) 32 | 33 | 34 | for image_file in train_images: 35 | source_path = os.path.join(dataset_folder, image_file) 36 | destination_path = os.path.join(train_folder, image_file) 37 | shutil.move(source_path, destination_path) 38 | 39 | 40 | for image_file in test_images: 41 | source_path = os.path.join(dataset_folder, image_file) 42 | destination_path = os.path.join(test_folder, image_file) 43 | shutil.move(source_path, destination_path) 44 | 45 | if __name__ == "__main__": 46 | dataset_folder = "dataset" # dataset_folder 47 | train_folder = "train" # train_folder 48 | test_folder = "test" # test_folder 49 | 50 | os.makedirs(train_folder, exist_ok=True) 51 | os.makedirs(test_folder, exist_ok=True) 52 | train_ratio = 0.7 # train_set ratio 53 | split_dataset_by_id(dataset_folder, train_folder, test_folder, train_ratio) --------------------------------------------------------------------------------