├── LICENSE ├── README.md ├── demo ├── demo_hqsam.py ├── demo_hqsam_light.py ├── demo_hqsam_pip_example.py ├── demo_sam.py └── input_imgs │ ├── dog.jpg │ ├── example0.png │ ├── example1.png │ ├── example2.png │ ├── example3.png │ ├── example4.png │ ├── example5.png │ ├── example6.png │ ├── example7.png │ └── example8.png ├── figs ├── coco_vis_comp.png ├── davis.png ├── points_comp.png ├── sam-hf-framework.png ├── sam_variants_comp.png ├── sam_vs_hqsam_backbones.png └── ytvis.png ├── sam-hq2 ├── INSTALL.md ├── README.md ├── assets │ └── hq-sam2-results.png ├── checkpoints │ └── download_ckpts.sh ├── demo │ ├── demo_hqsam2.py │ └── input_images │ │ ├── example1.png │ │ ├── example2.png │ │ ├── example3.png │ │ └── example4.png ├── notebooks │ ├── image_predictor_example.ipynb │ ├── images │ │ ├── cars.jpg │ │ ├── groceries.jpg │ │ └── truck.jpg │ ├── video_predictor_example.ipynb │ └── videos │ │ ├── bedroom.mp4 │ │ └── bedroom │ │ ├── 00000.jpg │ │ ├── 00001.jpg │ │ ├── 00002.jpg │ │ ├── 00003.jpg │ │ ├── 00004.jpg │ │ ├── 00005.jpg │ │ ├── 00006.jpg │ │ ├── 00007.jpg │ │ ├── 00008.jpg │ │ ├── 00009.jpg │ │ ├── 00010.jpg │ │ ├── 00011.jpg │ │ ├── 00012.jpg │ │ ├── 00013.jpg │ │ ├── 00014.jpg │ │ ├── 00015.jpg │ │ ├── 00016.jpg │ │ ├── 00017.jpg │ │ ├── 00018.jpg │ │ ├── 00019.jpg │ │ ├── 00020.jpg │ │ ├── 00021.jpg │ │ ├── 00022.jpg │ │ ├── 00023.jpg │ │ ├── 00024.jpg │ │ ├── 00025.jpg │ │ ├── 00026.jpg │ │ ├── 00027.jpg │ │ ├── 00028.jpg │ │ ├── 00029.jpg │ │ ├── 00030.jpg │ │ ├── 00031.jpg │ │ ├── 00032.jpg │ │ ├── 00033.jpg │ │ ├── 00034.jpg │ │ ├── 00035.jpg │ │ ├── 00036.jpg │ │ ├── 00037.jpg │ │ ├── 00038.jpg │ │ ├── 00039.jpg │ │ ├── 00040.jpg │ │ ├── 00041.jpg │ │ ├── 00042.jpg │ │ ├── 00043.jpg │ │ ├── 00044.jpg │ │ ├── 00045.jpg │ │ ├── 00046.jpg │ │ ├── 00047.jpg │ │ ├── 00048.jpg │ │ ├── 00049.jpg │ │ ├── 00050.jpg │ │ ├── 00051.jpg │ │ ├── 00052.jpg │ │ ├── 00053.jpg │ │ ├── 00054.jpg │ │ ├── 00055.jpg │ │ ├── 00056.jpg │ │ ├── 00057.jpg │ │ ├── 00058.jpg │ │ ├── 00059.jpg │ │ ├── 00060.jpg │ │ ├── 00061.jpg │ │ ├── 00062.jpg │ │ ├── 00063.jpg │ │ ├── 00064.jpg │ │ ├── 00065.jpg │ │ ├── 00066.jpg │ │ ├── 00067.jpg │ │ ├── 00068.jpg │ │ ├── 00069.jpg │ │ ├── 00070.jpg │ │ ├── 00071.jpg │ │ ├── 00072.jpg │ │ ├── 00073.jpg │ │ ├── 00074.jpg │ │ ├── 00075.jpg │ │ ├── 00076.jpg │ │ ├── 00077.jpg │ │ ├── 00078.jpg │ │ ├── 00079.jpg │ │ ├── 00080.jpg │ │ ├── 00081.jpg │ │ ├── 00082.jpg │ │ ├── 00083.jpg │ │ ├── 00084.jpg │ │ ├── 00085.jpg │ │ ├── 00086.jpg │ │ ├── 00087.jpg │ │ ├── 00088.jpg │ │ ├── 00089.jpg │ │ ├── 00090.jpg │ │ ├── 00091.jpg │ │ ├── 00092.jpg │ │ ├── 00093.jpg │ │ ├── 00094.jpg │ │ ├── 00095.jpg │ │ ├── 00096.jpg │ │ ├── 00097.jpg │ │ ├── 00098.jpg │ │ ├── 00099.jpg │ │ ├── 00100.jpg │ │ ├── 00101.jpg │ │ ├── 00102.jpg │ │ ├── 00103.jpg │ │ ├── 00104.jpg │ │ ├── 00105.jpg │ │ ├── 00106.jpg │ │ ├── 00107.jpg │ │ ├── 00108.jpg │ │ ├── 00109.jpg │ │ ├── 00110.jpg │ │ ├── 00111.jpg │ │ ├── 00112.jpg │ │ ├── 00113.jpg │ │ ├── 00114.jpg │ │ ├── 00115.jpg │ │ ├── 00116.jpg │ │ ├── 00117.jpg │ │ ├── 00118.jpg │ │ ├── 00119.jpg │ │ ├── 00120.jpg │ │ ├── 00121.jpg │ │ ├── 00122.jpg │ │ ├── 00123.jpg │ │ ├── 00124.jpg │ │ ├── 00125.jpg │ │ ├── 00126.jpg │ │ ├── 00127.jpg │ │ ├── 00128.jpg │ │ ├── 00129.jpg │ │ ├── 00130.jpg │ │ ├── 00131.jpg │ │ ├── 00132.jpg │ │ ├── 00133.jpg │ │ ├── 00134.jpg │ │ ├── 00135.jpg │ │ ├── 00136.jpg │ │ ├── 00137.jpg │ │ ├── 00138.jpg │ │ ├── 00139.jpg │ │ ├── 00140.jpg │ │ ├── 00141.jpg │ │ ├── 00142.jpg │ │ ├── 00143.jpg │ │ ├── 00144.jpg │ │ ├── 00145.jpg │ │ ├── 00146.jpg │ │ ├── 00147.jpg │ │ ├── 00148.jpg │ │ ├── 00149.jpg │ │ ├── 00150.jpg │ │ ├── 00151.jpg │ │ ├── 00152.jpg │ │ ├── 00153.jpg │ │ ├── 00154.jpg │ │ ├── 00155.jpg │ │ ├── 00156.jpg │ │ ├── 00157.jpg │ │ ├── 00158.jpg │ │ ├── 00159.jpg │ │ ├── 00160.jpg │ │ ├── 00161.jpg │ │ ├── 00162.jpg │ │ ├── 00163.jpg │ │ ├── 00164.jpg │ │ ├── 00165.jpg │ │ ├── 00166.jpg │ │ ├── 00167.jpg │ │ ├── 00168.jpg │ │ ├── 00169.jpg │ │ ├── 00170.jpg │ │ ├── 00171.jpg │ │ ├── 00172.jpg │ │ ├── 00173.jpg │ │ ├── 00174.jpg │ │ ├── 00175.jpg │ │ ├── 00176.jpg │ │ ├── 00177.jpg │ │ ├── 00178.jpg │ │ ├── 00179.jpg │ │ ├── 00180.jpg │ │ ├── 00181.jpg │ │ ├── 00182.jpg │ │ ├── 00183.jpg │ │ ├── 00184.jpg │ │ ├── 00185.jpg │ │ ├── 00186.jpg │ │ ├── 00187.jpg │ │ ├── 00188.jpg │ │ ├── 00189.jpg │ │ ├── 00190.jpg │ │ ├── 00191.jpg │ │ ├── 00192.jpg │ │ ├── 00193.jpg │ │ ├── 00194.jpg │ │ ├── 00195.jpg │ │ ├── 00196.jpg │ │ ├── 00197.jpg │ │ ├── 00198.jpg │ │ └── 00199.jpg ├── pyproject.toml ├── sam2 │ ├── __init__.py │ ├── automatic_mask_generator.py │ ├── build_sam.py │ ├── configs │ │ └── sam2.1 │ │ │ ├── sam2.1_hiera_b+.yaml │ │ │ ├── sam2.1_hiera_l.yaml │ │ │ ├── sam2.1_hiera_s.yaml │ │ │ ├── sam2.1_hiera_t.yaml │ │ │ └── sam2.1_hq_hiera_l.yaml │ ├── csrc │ │ └── connected_components.cu │ ├── modeling │ │ ├── __init__.py │ │ ├── backbones │ │ │ ├── __init__.py │ │ │ ├── hieradet.py │ │ │ ├── image_encoder.py │ │ │ └── utils.py │ │ ├── memory_attention.py │ │ ├── memory_encoder.py │ │ ├── position_encoding.py │ │ ├── sam │ │ │ ├── __init__.py │ │ │ ├── mask_decoder.py │ │ │ ├── mask_hq_decoder.py │ │ │ ├── prompt_encoder.py │ │ │ └── transformer.py │ │ ├── sam2_base.py │ │ ├── sam2_hq_base.py │ │ └── sam2_utils.py │ ├── sam2_hq_video_predictor.py │ ├── sam2_image_predictor.py │ ├── sam2_video_predictor.py │ └── utils │ │ ├── __init__.py │ │ ├── amg.py │ │ ├── misc.py │ │ └── transforms.py └── setup.py ├── scripts └── export_onnx_model.py ├── seginw ├── GroundingDINO │ ├── .asset │ │ ├── COCO.png │ │ ├── GD_GLIGEN.png │ │ ├── GD_SD.png │ │ ├── ODinW.png │ │ ├── arch.png │ │ ├── cats.png │ │ └── hero_figure.png │ ├── LICENSE │ ├── README.md │ ├── demo │ │ ├── gradio_app.py │ │ └── inference_on_a_image.py │ ├── groundingdino │ │ ├── __init__.py │ │ ├── config │ │ │ ├── GroundingDINO_SwinB.py │ │ │ └── GroundingDINO_SwinT_OGC.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── cocogrounding_eval.py │ │ │ └── transforms.py │ │ ├── models │ │ │ ├── GroundingDINO │ │ │ │ ├── __init__.py │ │ │ │ ├── backbone │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── backbone.py │ │ │ │ │ ├── position_encoding.py │ │ │ │ │ └── swin_transformer.py │ │ │ │ ├── bertwarper.py │ │ │ │ ├── csrc │ │ │ │ │ ├── MsDeformAttn │ │ │ │ │ │ ├── ms_deform_attn.h │ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp │ │ │ │ │ │ ├── ms_deform_attn_cpu.h │ │ │ │ │ │ ├── ms_deform_attn_cuda.cu │ │ │ │ │ │ ├── ms_deform_attn_cuda.h │ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh │ │ │ │ │ ├── cuda_version.cu │ │ │ │ │ └── vision.cpp │ │ │ │ ├── fuse_modules.py │ │ │ │ ├── groundingdino.py │ │ │ │ ├── ms_deform_attn.py │ │ │ │ ├── transformer.py │ │ │ │ ├── transformer_vanilla.py │ │ │ │ └── utils.py │ │ │ ├── __init__.py │ │ │ └── registry.py │ │ ├── util │ │ │ ├── __init__.py │ │ │ ├── box_ops.py │ │ │ ├── get_tokenlizer.py │ │ │ ├── inference.py │ │ │ ├── logger.py │ │ │ ├── misc.py │ │ │ ├── slconfig.py │ │ │ ├── slio.py │ │ │ ├── time_counter.py │ │ │ ├── utils.py │ │ │ ├── visualizer.py │ │ │ └── vl_utils.py │ │ └── version.py │ ├── requirements.txt │ └── setup.py ├── README.md ├── logs │ ├── grounded_hqsam.log │ └── grounded_sam.log ├── sam2 ├── segment_anything │ ├── __init__.py │ ├── automatic_mask_generator.py │ ├── build_sam.py │ ├── build_sam_hq.py │ ├── modeling │ │ ├── __init__.py │ │ ├── common.py │ │ ├── image_encoder.py │ │ ├── mask_decoder.py │ │ ├── mask_decoder_hq.py │ │ ├── prompt_encoder.py │ │ ├── sam.py │ │ └── transformer.py │ ├── predictor.py │ └── utils │ │ ├── __init__.py │ │ ├── amg.py │ │ ├── onnx.py │ │ └── transforms.py ├── test_ap_on_seginw.py ├── test_ap_on_seginw_sam2.py ├── test_seginw.sh ├── test_seginw_hq.sh ├── test_seginw_sam2.sh └── test_seginw_sam_hq2.sh ├── segment_anything ├── __init__.py ├── automatic_mask_generator.py ├── build_sam.py ├── build_sam_baseline.py ├── modeling │ ├── __init__.py │ ├── common.py │ ├── image_encoder.py │ ├── mask_decoder.py │ ├── mask_decoder_hq.py │ ├── prompt_encoder.py │ ├── sam.py │ ├── tiny_vit_sam.py │ └── transformer.py ├── predictor.py └── utils │ ├── __init__.py │ ├── amg.py │ ├── onnx.py │ └── transforms.py ├── setup.cfg ├── setup.py ├── train ├── README.md ├── segment_anything_training │ ├── __init__.py │ ├── build_sam.py │ ├── modeling │ │ ├── __init__.py │ │ ├── common.py │ │ ├── image_encoder.py │ │ ├── mask_decoder.py │ │ ├── prompt_encoder.py │ │ ├── sam.py │ │ └── transformer.py │ └── utils │ │ ├── __init__.py │ │ └── transforms.py ├── train.py └── utils │ ├── dataloader.py │ ├── loss_mask.py │ └── misc.py └── visual_demo ├── 1.gif ├── 2.gif ├── 3.gif ├── 4.gif ├── 5.gif └── 6.gif /demo/demo_hqsam_light.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import matplotlib.pyplot as plt 4 | import cv2 5 | from segment_anything import sam_model_registry, SamPredictor 6 | import os 7 | 8 | def show_mask(mask, ax, random_color=False): 9 | if random_color: 10 | color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) 11 | else: 12 | color = np.array([30/255, 144/255, 255/255, 0.6]) 13 | h, w = mask.shape[-2:] 14 | mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) 15 | ax.imshow(mask_image) 16 | 17 | def show_points(coords, labels, ax, marker_size=375): 18 | pos_points = coords[labels==1] 19 | neg_points = coords[labels==0] 20 | ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) 21 | ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) 22 | 23 | def show_box(box, ax): 24 | x0, y0 = box[0], box[1] 25 | w, h = box[2] - box[0], box[3] - box[1] 26 | ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2)) 27 | 28 | 29 | def show_res(masks, scores, input_point, input_label, input_box, filename, image): 30 | for i, (mask, score) in enumerate(zip(masks, scores)): 31 | plt.figure(figsize=(10,10)) 32 | plt.imshow(image) 33 | show_mask(mask, plt.gca()) 34 | if input_box is not None: 35 | box = input_box[i] 36 | show_box(box, plt.gca()) 37 | if (input_point is not None) and (input_label is not None): 38 | show_points(input_point, input_label, plt.gca()) 39 | 40 | print(f"Score: {score:.3f}") 41 | plt.axis('off') 42 | plt.savefig(filename+'_'+str(i)+'.png',bbox_inches='tight',pad_inches=-0.1) 43 | plt.close() 44 | 45 | def show_res_multi(masks, scores, input_point, input_label, input_box, filename, image): 46 | plt.figure(figsize=(10, 10)) 47 | plt.imshow(image) 48 | for mask in masks: 49 | show_mask(mask, plt.gca(), random_color=True) 50 | for box in input_box: 51 | show_box(box, plt.gca()) 52 | for score in scores: 53 | print(f"Score: {score:.3f}") 54 | plt.axis('off') 55 | plt.savefig(filename +'.png',bbox_inches='tight',pad_inches=-0.1) 56 | plt.close() 57 | 58 | 59 | if __name__ == "__main__": 60 | sam_checkpoint = "./pretrained_checkpoint/sam_hq_vit_tiny.pth" 61 | model_type = "vit_tiny" 62 | 63 | device = "cuda" 64 | sam = sam_model_registry[model_type](checkpoint=sam_checkpoint) 65 | sam.to(device=device) 66 | sam.eval() 67 | predictor = SamPredictor(sam) 68 | 69 | 70 | image = cv2.imread('demo/input_imgs/dog.jpg') 71 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 72 | predictor.set_image(image) 73 | # hq_token_only: False means use hq output to correct SAM output. 74 | # True means use hq output only. 75 | # Default: False 76 | hq_token_only = False 77 | # To achieve best visualization effect, for images contain multiple objects (like typical coco images), we suggest to set hq_token_only=False 78 | # For images contain single object, we suggest to set hq_token_only = True 79 | # For quantiative evaluation on COCO/YTVOS/DAVIS/UVO/LVIS etc., we set hq_token_only = False 80 | 81 | # box prompt 82 | input_box = np.array([[784,500,1789,1000]]) 83 | input_point, input_label = None, None 84 | 85 | masks, scores, logits = predictor.predict( 86 | point_coords=input_point, 87 | point_labels=input_label, 88 | box = input_box, 89 | multimask_output=False, 90 | hq_token_only=hq_token_only, 91 | ) 92 | result_path = 'demo/hq_sam_tiny_result/' 93 | os.makedirs(result_path, exist_ok=True) 94 | show_res(masks,scores,input_point, input_label, input_box, result_path + 'dog', image) 95 | 96 | 97 | 98 | image = cv2.imread('demo/input_imgs/example3.png') 99 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 100 | predictor.set_image(image) 101 | hq_token_only = True 102 | # point prompt 103 | input_point = np.array([[221,482],[498,633],[750,379]]) 104 | input_label = np.ones(input_point.shape[0]) 105 | input_box = None 106 | 107 | masks, scores, logits = predictor.predict( 108 | point_coords=input_point, 109 | point_labels=input_label, 110 | box = input_box, 111 | multimask_output=False, 112 | hq_token_only=hq_token_only, 113 | ) 114 | show_res(masks,scores,input_point, input_label, input_box, result_path + 'example3', image) 115 | 116 | 117 | image = cv2.imread('demo/input_imgs/example7.png') 118 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 119 | predictor.set_image(image) 120 | hq_token_only = False 121 | # multi box prompt 122 | input_box = torch.tensor([[45,260,515,470], [310,228,424,296]],device=predictor.device) 123 | transformed_box = predictor.transform.apply_boxes_torch(input_box, image.shape[:2]) 124 | input_point, input_label = None, None 125 | masks, scores, logits = predictor.predict_torch( 126 | point_coords=input_point, 127 | point_labels=input_label, 128 | boxes=transformed_box, 129 | multimask_output=False, 130 | hq_token_only=hq_token_only, 131 | ) 132 | masks = masks.squeeze(1).cpu().numpy() 133 | scores = scores.squeeze(1).cpu().numpy() 134 | input_box = input_box.cpu().numpy() 135 | show_res_multi(masks, scores, input_point, input_label, input_box, result_path + 'example7', image) 136 | 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /demo/demo_sam.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import matplotlib.pyplot as plt 4 | import cv2 5 | from segment_anything import sam_model_registry_baseline, SamPredictor 6 | import os 7 | 8 | def show_mask(mask, ax, random_color=False): 9 | if random_color: 10 | color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) 11 | else: 12 | color = np.array([30/255, 144/255, 255/255, 0.6]) 13 | h, w = mask.shape[-2:] 14 | mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) 15 | ax.imshow(mask_image) 16 | 17 | def show_points(coords, labels, ax, marker_size=375): 18 | pos_points = coords[labels==1] 19 | neg_points = coords[labels==0] 20 | ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) 21 | ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) 22 | 23 | def show_box(box, ax): 24 | x0, y0 = box[0], box[1] 25 | w, h = box[2] - box[0], box[3] - box[1] 26 | ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2)) 27 | 28 | 29 | def show_res(masks, scores, input_point, input_label, input_box, filename, image): 30 | for i, (mask, score) in enumerate(zip(masks, scores)): 31 | plt.figure(figsize=(10,10)) 32 | plt.imshow(image) 33 | show_mask(mask, plt.gca()) 34 | if input_box is not None: 35 | box = input_box[i] 36 | show_box(box, plt.gca()) 37 | if (input_point is not None) and (input_label is not None): 38 | show_points(input_point, input_label, plt.gca()) 39 | 40 | print(f"Score: {score:.3f}") 41 | plt.axis('off') 42 | plt.savefig(filename+'_'+str(i)+'.png',bbox_inches='tight',pad_inches=-0.1) 43 | plt.close() 44 | 45 | def show_res_multi(masks, scores, input_point, input_label, input_box, filename, image): 46 | plt.figure(figsize=(10, 10)) 47 | plt.imshow(image) 48 | for mask in masks: 49 | show_mask(mask, plt.gca(), random_color=True) 50 | for box in input_box: 51 | show_box(box, plt.gca()) 52 | for score in scores: 53 | print(f"Score: {score:.3f}") 54 | plt.axis('off') 55 | plt.savefig(filename +'.png',bbox_inches='tight',pad_inches=-0.1) 56 | plt.close() 57 | 58 | if __name__ == "__main__": 59 | sam_checkpoint = "./pretrained_checkpoint/sam_vit_l_0b3195.pth" 60 | model_type = "vit_l" 61 | device = "cuda" 62 | sam = sam_model_registry_baseline[model_type](checkpoint=sam_checkpoint) 63 | sam.to(device=device) 64 | predictor = SamPredictor(sam) 65 | 66 | for i in range(8): 67 | print("image: ",i) 68 | image = cv2.imread('demo/input_imgs/example'+str(i)+'.png') 69 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 70 | predictor.set_image(image) 71 | 72 | if i==0: 73 | input_box = np.array([[4,13,1007,1023]]) 74 | input_point, input_label = None, None 75 | elif i==1: 76 | input_box = np.array([[306, 132, 925, 893]]) 77 | input_point, input_label = None, None 78 | elif i==2: 79 | input_point = np.array([[495,518],[217,140]]) 80 | input_label = np.ones(input_point.shape[0]) 81 | input_box = None 82 | elif i==3: 83 | input_point = np.array([[221,482],[498,633],[750,379]]) 84 | input_label = np.ones(input_point.shape[0]) 85 | input_box = None 86 | elif i==4: 87 | input_box = np.array([[64,76,940,919]]) 88 | input_point, input_label = None, None 89 | elif i==5: 90 | input_point = np.array([[373,363], [452, 575]]) 91 | input_label = np.ones(input_point.shape[0]) 92 | input_box = None 93 | elif i==6: 94 | input_box = np.array([[181, 196, 757, 495]]) 95 | input_point, input_label = None, None 96 | elif i==7: 97 | # multi box input 98 | input_box = torch.tensor([[45,260,515,470], [310,228,424,296]],device=predictor.device) 99 | transformed_box = predictor.transform.apply_boxes_torch(input_box, image.shape[:2]) 100 | input_point, input_label = None, None 101 | 102 | batch_box = False if input_box is None else len(input_box)>1 103 | result_path = 'demo/baseline_sam_result/' 104 | os.makedirs(result_path, exist_ok=True) 105 | 106 | if not batch_box: 107 | masks, scores, logits = predictor.predict( 108 | point_coords=input_point, 109 | point_labels=input_label, 110 | box = input_box, 111 | multimask_output=False, 112 | ) 113 | show_res(masks,scores,input_point, input_label, input_box, result_path + 'example'+str(i), image) 114 | else: 115 | masks, scores, logits = predictor.predict_torch( 116 | point_coords=input_point, 117 | point_labels=input_label, 118 | boxes=transformed_box, 119 | multimask_output=False, 120 | ) 121 | masks = masks.squeeze(1).cpu().numpy() 122 | scores = scores.squeeze(1).cpu().numpy() 123 | input_box = input_box.cpu().numpy() 124 | show_res_multi(masks, scores, input_point, input_label, input_box, result_path + 'example'+str(i), image) 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /demo/input_imgs/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/dog.jpg -------------------------------------------------------------------------------- /demo/input_imgs/example0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example0.png -------------------------------------------------------------------------------- /demo/input_imgs/example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example1.png -------------------------------------------------------------------------------- /demo/input_imgs/example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example2.png -------------------------------------------------------------------------------- /demo/input_imgs/example3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example3.png -------------------------------------------------------------------------------- /demo/input_imgs/example4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example4.png -------------------------------------------------------------------------------- /demo/input_imgs/example5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example5.png -------------------------------------------------------------------------------- /demo/input_imgs/example6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example6.png -------------------------------------------------------------------------------- /demo/input_imgs/example7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example7.png -------------------------------------------------------------------------------- /demo/input_imgs/example8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example8.png -------------------------------------------------------------------------------- /figs/coco_vis_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/coco_vis_comp.png -------------------------------------------------------------------------------- /figs/davis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/davis.png -------------------------------------------------------------------------------- /figs/points_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/points_comp.png -------------------------------------------------------------------------------- /figs/sam-hf-framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/sam-hf-framework.png -------------------------------------------------------------------------------- /figs/sam_variants_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/sam_variants_comp.png -------------------------------------------------------------------------------- /figs/sam_vs_hqsam_backbones.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/sam_vs_hqsam_backbones.png -------------------------------------------------------------------------------- /figs/ytvis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/ytvis.png -------------------------------------------------------------------------------- /sam-hq2/assets/hq-sam2-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/assets/hq-sam2-results.png -------------------------------------------------------------------------------- /sam-hq2/checkpoints/download_ckpts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) Meta Platforms, Inc. and affiliates. 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | # Use either wget or curl to download the checkpoints 10 | if command -v wget &> /dev/null; then 11 | CMD="wget" 12 | elif command -v curl &> /dev/null; then 13 | CMD="curl -L -O" 14 | else 15 | echo "Please install wget or curl to download the checkpoints." 16 | exit 1 17 | fi 18 | 19 | # Define the URLs for SAM 2 checkpoints 20 | # SAM2_BASE_URL="https://dl.fbaipublicfiles.com/segment_anything_2/072824" 21 | # sam2_hiera_t_url="${SAM2_BASE_URL}/sam2_hiera_tiny.pt" 22 | # sam2_hiera_s_url="${SAM2_BASE_URL}/sam2_hiera_small.pt" 23 | # sam2_hiera_b_plus_url="${SAM2_BASE_URL}/sam2_hiera_base_plus.pt" 24 | # sam2_hiera_l_url="${SAM2_BASE_URL}/sam2_hiera_large.pt" 25 | 26 | # Download each of the four checkpoints using wget 27 | # echo "Downloading sam2_hiera_tiny.pt checkpoint..." 28 | # $CMD $sam2_hiera_t_url || { echo "Failed to download checkpoint from $sam2_hiera_t_url"; exit 1; } 29 | 30 | # echo "Downloading sam2_hiera_small.pt checkpoint..." 31 | # $CMD $sam2_hiera_s_url || { echo "Failed to download checkpoint from $sam2_hiera_s_url"; exit 1; } 32 | 33 | # echo "Downloading sam2_hiera_base_plus.pt checkpoint..." 34 | # $CMD $sam2_hiera_b_plus_url || { echo "Failed to download checkpoint from $sam2_hiera_b_plus_url"; exit 1; } 35 | 36 | # echo "Downloading sam2_hiera_large.pt checkpoint..." 37 | # $CMD $sam2_hiera_l_url || { echo "Failed to download checkpoint from $sam2_hiera_l_url"; exit 1; } 38 | 39 | # Define the URLs for SAM 2.1 checkpoints 40 | #SAM2p1_BASE_URL="https://dl.fbaipublicfiles.com/segment_anything_2/092824" 41 | #sam2p1_hiera_t_url="${SAM2p1_BASE_URL}/sam2.1_hiera_tiny.pt" 42 | #sam2p1_hiera_s_url="${SAM2p1_BASE_URL}/sam2.1_hiera_small.pt" 43 | #sam2p1_hiera_b_plus_url="${SAM2p1_BASE_URL}/sam2.1_hiera_base_plus.pt" 44 | #sam2p1_hiera_l_url="${SAM2p1_BASE_URL}/sam2.1_hiera_large.pt" 45 | # sam2p1_hq_hiera_l_url="https://huggingface.co/mqye/sam-hq2/resolve/main/sam2.1_hq_hiera_large.pt?download=true" 46 | sam2p1_hq_hiera_l_url="https://huggingface.co/lkeab/hq-sam/resolve/main/sam2.1_hq_hiera_large.pt?download=true" 47 | # SAM 2.1 checkpoints 48 | 49 | echo "Downloading sam2.1_hq_hiera_l.pt checkpoint..." 50 | $CMD $sam2p1_hq_hiera_l_url || { echo "Failed to download checkpoint from $sam2p1_hiera_t_url"; exit 1; } 51 | 52 | mv sam2.1_hq_hiera_large.pt?download=true sam2.1_hq_hiera_large.pt 53 | 54 | echo "HQ-SAM-2 checkpoints are downloaded successfully." 55 | -------------------------------------------------------------------------------- /sam-hq2/demo/demo_hqsam2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import matplotlib.pyplot as plt 4 | import cv2 5 | from sam2.build_sam import build_sam2 6 | from sam2.sam2_image_predictor import SAM2ImagePredictor 7 | import os 8 | 9 | def show_mask(mask, ax, random_color=False): 10 | if random_color: 11 | color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) 12 | else: 13 | color = np.array([30/255, 144/255, 255/255, 0.6]) 14 | h, w = mask.shape[-2:] 15 | mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) 16 | ax.imshow(mask_image) 17 | 18 | def show_points(coords, labels, ax, marker_size=375): 19 | pos_points = coords[labels==1] 20 | neg_points = coords[labels==0] 21 | ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) 22 | ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25) 23 | 24 | def show_box(box, ax): 25 | x0, y0 = box[0], box[1] 26 | w, h = box[2] - box[0], box[3] - box[1] 27 | ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2)) 28 | 29 | 30 | def show_res(masks, scores, input_point, input_label, input_box, filename, image): 31 | for i, (mask, score) in enumerate(zip(masks, scores)): 32 | plt.figure(figsize=(10,10)) 33 | plt.imshow(image) 34 | show_mask(mask, plt.gca()) 35 | if input_box is not None: 36 | box = input_box[i] 37 | show_box(box, plt.gca()) 38 | if (input_point is not None) and (input_label is not None): 39 | show_points(input_point, input_label, plt.gca()) 40 | 41 | print(f"Score: {score:.3f}") 42 | plt.axis('off') 43 | plt.savefig(filename+'_'+str(i)+'.png',bbox_inches='tight',pad_inches=-0.1) 44 | plt.close() 45 | 46 | def show_res_multi(masks, scores, input_point, input_label, input_box, filename, image): 47 | plt.figure(figsize=(10, 10)) 48 | plt.imshow(image) 49 | for mask in masks: 50 | show_mask(mask, plt.gca(), random_color=True) 51 | for box in input_box: 52 | show_box(box, plt.gca()) 53 | for score in scores: 54 | print(f"Score: {score:.3f}") 55 | plt.axis('off') 56 | plt.savefig(filename +'.png',bbox_inches='tight',pad_inches=-0.1) 57 | plt.close() 58 | 59 | 60 | if __name__ == "__main__": 61 | checkpoint = "./checkpoints/sam2.1_hq_hiera_large.pt" 62 | model_cfg = "configs/sam2.1/sam2.1_hq_hiera_l.yaml" 63 | predictor = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint)) 64 | 65 | for i in range(1,5): 66 | print("image: ",i) 67 | # hq_token_only: False means use hq output to correct SAM output. 68 | # True means use hq output only. 69 | # Default: False 70 | hq_token_only = False 71 | # To achieve best visualization effect, for images contain multiple objects (like typical coco images), we suggest to set hq_token_only=False 72 | # For images contain single object, we suggest to set hq_token_only = True 73 | # For quantiative evaluation on COCO/YTVOS/DAVIS/UVO/LVIS etc., we set hq_token_only = False 74 | 75 | image = cv2.imread('./demo/input_images/example'+str(i)+'.png') 76 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 77 | predictor.set_image(image) 78 | 79 | if i==1: 80 | input_box = np.array([[306, 132, 925, 893]]) 81 | input_point, input_label = None, None 82 | elif i==2: 83 | input_point = np.array([[495,518],[217,140]]) 84 | input_label = np.ones(input_point.shape[0]) 85 | input_box = None 86 | elif i==3: 87 | input_box = np.array([[64,76,940,919]]) 88 | input_point, input_label = None, None 89 | elif i==4: 90 | # multi box input 91 | input_box = torch.tensor([[45,260,515,470], [310,228,424,296]],device=predictor.device) 92 | # transformed_box = predictor.transform.apply_boxes_torch(input_box, image.shape[:2]) 93 | input_point, input_label = None, None 94 | 95 | batch_box = False if input_box is None else len(input_box)>1 96 | result_path = 'demo/hq_sam_result_vis/' 97 | os.makedirs(result_path, exist_ok=True) 98 | 99 | with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16): 100 | masks, scores, logits = predictor.predict(point_coords=input_point, 101 | point_labels=input_label, 102 | box=input_box, 103 | multimask_output=False, hq_token_only=hq_token_only) 104 | 105 | if not batch_box: 106 | show_res(masks,scores,input_point, input_label, input_box, result_path + 'example'+str(i), image) 107 | else: 108 | masks = masks.squeeze(1) 109 | scores = scores.squeeze(1) 110 | input_box = input_box.cpu().numpy() 111 | show_res_multi(masks, scores, input_point, input_label, input_box, result_path + 'example'+str(i), image) 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /sam-hq2/demo/input_images/example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/demo/input_images/example1.png -------------------------------------------------------------------------------- /sam-hq2/demo/input_images/example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/demo/input_images/example2.png -------------------------------------------------------------------------------- /sam-hq2/demo/input_images/example3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/demo/input_images/example3.png -------------------------------------------------------------------------------- /sam-hq2/demo/input_images/example4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/demo/input_images/example4.png -------------------------------------------------------------------------------- /sam-hq2/notebooks/images/cars.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/images/cars.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/images/groceries.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/images/groceries.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/images/truck.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/images/truck.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom.mp4 -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00000.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00001.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00002.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00003.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00004.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00005.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00006.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00006.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00007.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00007.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00008.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00008.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00009.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00009.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00010.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00010.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00011.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00011.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00012.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00012.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00013.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00013.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00014.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00014.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00015.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00015.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00016.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00016.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00017.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00017.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00018.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00018.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00019.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00019.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00020.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00020.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00021.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00021.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00022.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00022.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00023.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00023.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00024.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00024.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00025.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00025.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00026.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00026.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00027.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00027.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00028.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00028.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00029.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00029.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00030.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00030.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00031.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00031.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00032.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00032.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00033.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00033.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00034.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00034.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00035.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00035.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00036.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00036.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00037.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00037.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00038.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00038.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00039.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00039.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00040.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00040.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00041.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00041.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00042.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00042.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00043.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00043.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00044.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00044.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00045.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00045.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00046.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00046.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00047.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00047.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00048.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00048.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00049.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00049.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00050.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00050.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00051.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00051.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00052.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00052.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00053.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00053.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00054.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00054.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00055.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00055.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00056.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00056.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00057.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00057.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00058.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00058.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00059.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00059.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00060.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00060.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00061.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00061.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00062.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00062.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00063.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00063.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00064.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00064.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00065.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00065.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00066.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00066.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00067.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00067.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00068.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00068.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00069.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00069.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00070.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00070.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00071.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00071.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00072.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00072.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00073.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00073.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00074.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00074.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00075.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00075.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00076.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00076.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00077.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00077.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00078.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00078.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00079.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00079.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00080.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00080.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00081.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00081.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00082.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00082.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00083.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00083.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00084.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00084.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00085.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00085.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00086.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00086.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00087.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00087.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00088.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00088.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00089.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00089.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00090.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00090.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00091.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00091.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00092.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00092.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00093.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00093.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00094.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00094.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00095.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00095.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00096.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00096.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00097.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00097.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00098.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00098.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00099.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00099.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00100.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00100.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00101.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00101.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00102.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00102.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00103.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00103.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00104.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00104.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00105.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00105.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00106.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00106.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00107.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00107.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00108.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00108.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00109.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00109.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00110.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00110.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00111.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00111.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00112.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00112.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00113.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00113.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00114.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00114.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00115.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00115.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00116.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00116.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00117.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00117.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00118.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00118.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00119.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00119.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00120.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00120.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00121.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00121.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00122.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00122.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00123.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00123.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00124.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00124.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00125.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00125.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00126.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00126.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00127.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00127.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00128.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00128.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00129.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00129.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00130.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00130.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00131.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00131.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00132.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00132.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00133.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00133.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00134.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00134.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00135.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00135.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00136.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00136.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00137.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00137.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00138.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00138.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00139.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00139.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00140.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00140.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00141.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00141.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00142.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00142.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00143.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00143.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00144.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00144.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00145.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00145.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00146.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00146.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00147.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00147.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00148.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00148.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00149.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00149.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00150.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00150.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00151.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00151.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00152.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00152.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00153.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00153.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00154.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00154.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00155.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00155.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00156.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00156.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00157.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00157.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00158.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00158.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00159.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00159.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00160.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00160.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00161.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00161.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00162.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00162.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00163.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00163.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00164.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00164.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00165.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00165.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00166.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00166.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00167.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00167.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00168.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00168.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00169.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00169.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00170.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00170.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00171.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00171.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00172.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00172.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00173.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00173.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00174.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00174.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00175.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00175.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00176.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00176.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00177.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00177.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00178.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00178.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00179.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00179.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00180.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00180.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00181.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00181.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00182.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00182.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00183.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00183.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00184.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00184.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00185.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00185.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00186.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00186.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00187.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00187.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00188.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00188.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00189.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00189.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00190.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00190.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00191.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00191.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00192.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00192.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00193.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00193.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00194.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00194.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00195.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00195.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00196.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00196.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00197.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00197.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00198.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00198.jpg -------------------------------------------------------------------------------- /sam-hq2/notebooks/videos/bedroom/00199.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00199.jpg -------------------------------------------------------------------------------- /sam-hq2/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=61.0", 4 | "torch>=2.3.1", 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | -------------------------------------------------------------------------------- /sam-hq2/sam2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from hydra import initialize_config_module 8 | from hydra.core.global_hydra import GlobalHydra 9 | 10 | if not GlobalHydra.instance().is_initialized(): 11 | initialize_config_module("sam2", version_base="1.2") 12 | -------------------------------------------------------------------------------- /sam-hq2/sam2/configs/sam2.1/sam2.1_hiera_b+.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # Model 4 | model: 5 | _target_: sam2.modeling.sam2_base.SAM2Base 6 | image_encoder: 7 | _target_: sam2.modeling.backbones.image_encoder.ImageEncoder 8 | scalp: 1 9 | trunk: 10 | _target_: sam2.modeling.backbones.hieradet.Hiera 11 | embed_dim: 112 12 | num_heads: 2 13 | neck: 14 | _target_: sam2.modeling.backbones.image_encoder.FpnNeck 15 | position_encoding: 16 | _target_: sam2.modeling.position_encoding.PositionEmbeddingSine 17 | num_pos_feats: 256 18 | normalize: true 19 | scale: null 20 | temperature: 10000 21 | d_model: 256 22 | backbone_channel_list: [896, 448, 224, 112] 23 | fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features 24 | fpn_interp_model: nearest 25 | 26 | memory_attention: 27 | _target_: sam2.modeling.memory_attention.MemoryAttention 28 | d_model: 256 29 | pos_enc_at_input: true 30 | layer: 31 | _target_: sam2.modeling.memory_attention.MemoryAttentionLayer 32 | activation: relu 33 | dim_feedforward: 2048 34 | dropout: 0.1 35 | pos_enc_at_attn: false 36 | self_attention: 37 | _target_: sam2.modeling.sam.transformer.RoPEAttention 38 | rope_theta: 10000.0 39 | feat_sizes: [32, 32] 40 | embedding_dim: 256 41 | num_heads: 1 42 | downsample_rate: 1 43 | dropout: 0.1 44 | d_model: 256 45 | pos_enc_at_cross_attn_keys: true 46 | pos_enc_at_cross_attn_queries: false 47 | cross_attention: 48 | _target_: sam2.modeling.sam.transformer.RoPEAttention 49 | rope_theta: 10000.0 50 | feat_sizes: [32, 32] 51 | rope_k_repeat: True 52 | embedding_dim: 256 53 | num_heads: 1 54 | downsample_rate: 1 55 | dropout: 0.1 56 | kv_in_dim: 64 57 | num_layers: 4 58 | 59 | memory_encoder: 60 | _target_: sam2.modeling.memory_encoder.MemoryEncoder 61 | out_dim: 64 62 | position_encoding: 63 | _target_: sam2.modeling.position_encoding.PositionEmbeddingSine 64 | num_pos_feats: 64 65 | normalize: true 66 | scale: null 67 | temperature: 10000 68 | mask_downsampler: 69 | _target_: sam2.modeling.memory_encoder.MaskDownSampler 70 | kernel_size: 3 71 | stride: 2 72 | padding: 1 73 | fuser: 74 | _target_: sam2.modeling.memory_encoder.Fuser 75 | layer: 76 | _target_: sam2.modeling.memory_encoder.CXBlock 77 | dim: 256 78 | kernel_size: 7 79 | padding: 3 80 | layer_scale_init_value: 1e-6 81 | use_dwconv: True # depth-wise convs 82 | num_layers: 2 83 | 84 | num_maskmem: 7 85 | image_size: 1024 86 | # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask 87 | sigmoid_scale_for_mem_enc: 20.0 88 | sigmoid_bias_for_mem_enc: -10.0 89 | use_mask_input_as_output_without_sam: true 90 | # Memory 91 | directly_add_no_mem_embed: true 92 | no_obj_embed_spatial: true 93 | # use high-resolution feature map in the SAM mask decoder 94 | use_high_res_features_in_sam: true 95 | # output 3 masks on the first click on initial conditioning frames 96 | multimask_output_in_sam: true 97 | # SAM heads 98 | iou_prediction_use_sigmoid: True 99 | # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder 100 | use_obj_ptrs_in_encoder: true 101 | add_tpos_enc_to_obj_ptrs: true 102 | proj_tpos_enc_in_obj_ptrs: true 103 | use_signed_tpos_enc_to_obj_ptrs: true 104 | only_obj_ptrs_in_the_past_for_eval: true 105 | # object occlusion prediction 106 | pred_obj_scores: true 107 | pred_obj_scores_mlp: true 108 | fixed_no_obj_ptr: true 109 | # multimask tracking settings 110 | multimask_output_for_tracking: true 111 | use_multimask_token_for_obj_ptr: true 112 | multimask_min_pt_num: 0 113 | multimask_max_pt_num: 1 114 | use_mlp_for_obj_ptr_proj: true 115 | # Compilation flag 116 | compile_image_encoder: False 117 | -------------------------------------------------------------------------------- /sam-hq2/sam2/configs/sam2.1/sam2.1_hiera_l.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # Model 4 | model: 5 | _target_: sam2.modeling.sam2_base.SAM2Base 6 | image_encoder: 7 | _target_: sam2.modeling.backbones.image_encoder.ImageEncoder 8 | scalp: 1 9 | trunk: 10 | _target_: sam2.modeling.backbones.hieradet.Hiera 11 | embed_dim: 144 12 | num_heads: 2 13 | stages: [2, 6, 36, 4] 14 | global_att_blocks: [23, 33, 43] 15 | window_pos_embed_bkg_spatial_size: [7, 7] 16 | window_spec: [8, 4, 16, 8] 17 | neck: 18 | _target_: sam2.modeling.backbones.image_encoder.FpnNeck 19 | position_encoding: 20 | _target_: sam2.modeling.position_encoding.PositionEmbeddingSine 21 | num_pos_feats: 256 22 | normalize: true 23 | scale: null 24 | temperature: 10000 25 | d_model: 256 26 | backbone_channel_list: [1152, 576, 288, 144] 27 | fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features 28 | fpn_interp_model: nearest 29 | 30 | memory_attention: 31 | _target_: sam2.modeling.memory_attention.MemoryAttention 32 | d_model: 256 33 | pos_enc_at_input: true 34 | layer: 35 | _target_: sam2.modeling.memory_attention.MemoryAttentionLayer 36 | activation: relu 37 | dim_feedforward: 2048 38 | dropout: 0.1 39 | pos_enc_at_attn: false 40 | self_attention: 41 | _target_: sam2.modeling.sam.transformer.RoPEAttention 42 | rope_theta: 10000.0 43 | feat_sizes: [32, 32] 44 | embedding_dim: 256 45 | num_heads: 1 46 | downsample_rate: 1 47 | dropout: 0.1 48 | d_model: 256 49 | pos_enc_at_cross_attn_keys: true 50 | pos_enc_at_cross_attn_queries: false 51 | cross_attention: 52 | _target_: sam2.modeling.sam.transformer.RoPEAttention 53 | rope_theta: 10000.0 54 | feat_sizes: [32, 32] 55 | rope_k_repeat: True 56 | embedding_dim: 256 57 | num_heads: 1 58 | downsample_rate: 1 59 | dropout: 0.1 60 | kv_in_dim: 64 61 | num_layers: 4 62 | 63 | memory_encoder: 64 | _target_: sam2.modeling.memory_encoder.MemoryEncoder 65 | out_dim: 64 66 | position_encoding: 67 | _target_: sam2.modeling.position_encoding.PositionEmbeddingSine 68 | num_pos_feats: 64 69 | normalize: true 70 | scale: null 71 | temperature: 10000 72 | mask_downsampler: 73 | _target_: sam2.modeling.memory_encoder.MaskDownSampler 74 | kernel_size: 3 75 | stride: 2 76 | padding: 1 77 | fuser: 78 | _target_: sam2.modeling.memory_encoder.Fuser 79 | layer: 80 | _target_: sam2.modeling.memory_encoder.CXBlock 81 | dim: 256 82 | kernel_size: 7 83 | padding: 3 84 | layer_scale_init_value: 1e-6 85 | use_dwconv: True # depth-wise convs 86 | num_layers: 2 87 | 88 | num_maskmem: 7 89 | image_size: 1024 90 | # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask 91 | sigmoid_scale_for_mem_enc: 20.0 92 | sigmoid_bias_for_mem_enc: -10.0 93 | use_mask_input_as_output_without_sam: true 94 | # Memory 95 | directly_add_no_mem_embed: true 96 | no_obj_embed_spatial: true 97 | # use high-resolution feature map in the SAM mask decoder 98 | use_high_res_features_in_sam: true 99 | # output 3 masks on the first click on initial conditioning frames 100 | multimask_output_in_sam: true 101 | # SAM heads 102 | iou_prediction_use_sigmoid: True 103 | # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder 104 | use_obj_ptrs_in_encoder: true 105 | add_tpos_enc_to_obj_ptrs: true 106 | proj_tpos_enc_in_obj_ptrs: true 107 | use_signed_tpos_enc_to_obj_ptrs: true 108 | only_obj_ptrs_in_the_past_for_eval: true 109 | # object occlusion prediction 110 | pred_obj_scores: true 111 | pred_obj_scores_mlp: true 112 | fixed_no_obj_ptr: true 113 | # multimask tracking settings 114 | multimask_output_for_tracking: true 115 | use_multimask_token_for_obj_ptr: true 116 | multimask_min_pt_num: 0 117 | multimask_max_pt_num: 1 118 | use_mlp_for_obj_ptr_proj: true 119 | # Compilation flag 120 | compile_image_encoder: False 121 | -------------------------------------------------------------------------------- /sam-hq2/sam2/configs/sam2.1/sam2.1_hiera_s.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # Model 4 | model: 5 | _target_: sam2.modeling.sam2_base.SAM2Base 6 | image_encoder: 7 | _target_: sam2.modeling.backbones.image_encoder.ImageEncoder 8 | scalp: 1 9 | trunk: 10 | _target_: sam2.modeling.backbones.hieradet.Hiera 11 | embed_dim: 96 12 | num_heads: 1 13 | stages: [1, 2, 11, 2] 14 | global_att_blocks: [7, 10, 13] 15 | window_pos_embed_bkg_spatial_size: [7, 7] 16 | neck: 17 | _target_: sam2.modeling.backbones.image_encoder.FpnNeck 18 | position_encoding: 19 | _target_: sam2.modeling.position_encoding.PositionEmbeddingSine 20 | num_pos_feats: 256 21 | normalize: true 22 | scale: null 23 | temperature: 10000 24 | d_model: 256 25 | backbone_channel_list: [768, 384, 192, 96] 26 | fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features 27 | fpn_interp_model: nearest 28 | 29 | memory_attention: 30 | _target_: sam2.modeling.memory_attention.MemoryAttention 31 | d_model: 256 32 | pos_enc_at_input: true 33 | layer: 34 | _target_: sam2.modeling.memory_attention.MemoryAttentionLayer 35 | activation: relu 36 | dim_feedforward: 2048 37 | dropout: 0.1 38 | pos_enc_at_attn: false 39 | self_attention: 40 | _target_: sam2.modeling.sam.transformer.RoPEAttention 41 | rope_theta: 10000.0 42 | feat_sizes: [32, 32] 43 | embedding_dim: 256 44 | num_heads: 1 45 | downsample_rate: 1 46 | dropout: 0.1 47 | d_model: 256 48 | pos_enc_at_cross_attn_keys: true 49 | pos_enc_at_cross_attn_queries: false 50 | cross_attention: 51 | _target_: sam2.modeling.sam.transformer.RoPEAttention 52 | rope_theta: 10000.0 53 | feat_sizes: [32, 32] 54 | rope_k_repeat: True 55 | embedding_dim: 256 56 | num_heads: 1 57 | downsample_rate: 1 58 | dropout: 0.1 59 | kv_in_dim: 64 60 | num_layers: 4 61 | 62 | memory_encoder: 63 | _target_: sam2.modeling.memory_encoder.MemoryEncoder 64 | out_dim: 64 65 | position_encoding: 66 | _target_: sam2.modeling.position_encoding.PositionEmbeddingSine 67 | num_pos_feats: 64 68 | normalize: true 69 | scale: null 70 | temperature: 10000 71 | mask_downsampler: 72 | _target_: sam2.modeling.memory_encoder.MaskDownSampler 73 | kernel_size: 3 74 | stride: 2 75 | padding: 1 76 | fuser: 77 | _target_: sam2.modeling.memory_encoder.Fuser 78 | layer: 79 | _target_: sam2.modeling.memory_encoder.CXBlock 80 | dim: 256 81 | kernel_size: 7 82 | padding: 3 83 | layer_scale_init_value: 1e-6 84 | use_dwconv: True # depth-wise convs 85 | num_layers: 2 86 | 87 | num_maskmem: 7 88 | image_size: 1024 89 | # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask 90 | sigmoid_scale_for_mem_enc: 20.0 91 | sigmoid_bias_for_mem_enc: -10.0 92 | use_mask_input_as_output_without_sam: true 93 | # Memory 94 | directly_add_no_mem_embed: true 95 | no_obj_embed_spatial: true 96 | # use high-resolution feature map in the SAM mask decoder 97 | use_high_res_features_in_sam: true 98 | # output 3 masks on the first click on initial conditioning frames 99 | multimask_output_in_sam: true 100 | # SAM heads 101 | iou_prediction_use_sigmoid: True 102 | # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder 103 | use_obj_ptrs_in_encoder: true 104 | add_tpos_enc_to_obj_ptrs: true 105 | proj_tpos_enc_in_obj_ptrs: true 106 | use_signed_tpos_enc_to_obj_ptrs: true 107 | only_obj_ptrs_in_the_past_for_eval: true 108 | # object occlusion prediction 109 | pred_obj_scores: true 110 | pred_obj_scores_mlp: true 111 | fixed_no_obj_ptr: true 112 | # multimask tracking settings 113 | multimask_output_for_tracking: true 114 | use_multimask_token_for_obj_ptr: true 115 | multimask_min_pt_num: 0 116 | multimask_max_pt_num: 1 117 | use_mlp_for_obj_ptr_proj: true 118 | # Compilation flag 119 | compile_image_encoder: False 120 | -------------------------------------------------------------------------------- /sam-hq2/sam2/configs/sam2.1/sam2.1_hiera_t.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # Model 4 | model: 5 | _target_: sam2.modeling.sam2_base.SAM2Base 6 | image_encoder: 7 | _target_: sam2.modeling.backbones.image_encoder.ImageEncoder 8 | scalp: 1 9 | trunk: 10 | _target_: sam2.modeling.backbones.hieradet.Hiera 11 | embed_dim: 96 12 | num_heads: 1 13 | stages: [1, 2, 7, 2] 14 | global_att_blocks: [5, 7, 9] 15 | window_pos_embed_bkg_spatial_size: [7, 7] 16 | neck: 17 | _target_: sam2.modeling.backbones.image_encoder.FpnNeck 18 | position_encoding: 19 | _target_: sam2.modeling.position_encoding.PositionEmbeddingSine 20 | num_pos_feats: 256 21 | normalize: true 22 | scale: null 23 | temperature: 10000 24 | d_model: 256 25 | backbone_channel_list: [768, 384, 192, 96] 26 | fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features 27 | fpn_interp_model: nearest 28 | 29 | memory_attention: 30 | _target_: sam2.modeling.memory_attention.MemoryAttention 31 | d_model: 256 32 | pos_enc_at_input: true 33 | layer: 34 | _target_: sam2.modeling.memory_attention.MemoryAttentionLayer 35 | activation: relu 36 | dim_feedforward: 2048 37 | dropout: 0.1 38 | pos_enc_at_attn: false 39 | self_attention: 40 | _target_: sam2.modeling.sam.transformer.RoPEAttention 41 | rope_theta: 10000.0 42 | feat_sizes: [32, 32] 43 | embedding_dim: 256 44 | num_heads: 1 45 | downsample_rate: 1 46 | dropout: 0.1 47 | d_model: 256 48 | pos_enc_at_cross_attn_keys: true 49 | pos_enc_at_cross_attn_queries: false 50 | cross_attention: 51 | _target_: sam2.modeling.sam.transformer.RoPEAttention 52 | rope_theta: 10000.0 53 | feat_sizes: [32, 32] 54 | rope_k_repeat: True 55 | embedding_dim: 256 56 | num_heads: 1 57 | downsample_rate: 1 58 | dropout: 0.1 59 | kv_in_dim: 64 60 | num_layers: 4 61 | 62 | memory_encoder: 63 | _target_: sam2.modeling.memory_encoder.MemoryEncoder 64 | out_dim: 64 65 | position_encoding: 66 | _target_: sam2.modeling.position_encoding.PositionEmbeddingSine 67 | num_pos_feats: 64 68 | normalize: true 69 | scale: null 70 | temperature: 10000 71 | mask_downsampler: 72 | _target_: sam2.modeling.memory_encoder.MaskDownSampler 73 | kernel_size: 3 74 | stride: 2 75 | padding: 1 76 | fuser: 77 | _target_: sam2.modeling.memory_encoder.Fuser 78 | layer: 79 | _target_: sam2.modeling.memory_encoder.CXBlock 80 | dim: 256 81 | kernel_size: 7 82 | padding: 3 83 | layer_scale_init_value: 1e-6 84 | use_dwconv: True # depth-wise convs 85 | num_layers: 2 86 | 87 | num_maskmem: 7 88 | image_size: 1024 89 | # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask 90 | # SAM decoder 91 | sigmoid_scale_for_mem_enc: 20.0 92 | sigmoid_bias_for_mem_enc: -10.0 93 | use_mask_input_as_output_without_sam: true 94 | # Memory 95 | directly_add_no_mem_embed: true 96 | no_obj_embed_spatial: true 97 | # use high-resolution feature map in the SAM mask decoder 98 | use_high_res_features_in_sam: true 99 | # output 3 masks on the first click on initial conditioning frames 100 | multimask_output_in_sam: true 101 | # SAM heads 102 | iou_prediction_use_sigmoid: True 103 | # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder 104 | use_obj_ptrs_in_encoder: true 105 | add_tpos_enc_to_obj_ptrs: true 106 | proj_tpos_enc_in_obj_ptrs: true 107 | use_signed_tpos_enc_to_obj_ptrs: true 108 | only_obj_ptrs_in_the_past_for_eval: true 109 | # object occlusion prediction 110 | pred_obj_scores: true 111 | pred_obj_scores_mlp: true 112 | fixed_no_obj_ptr: true 113 | # multimask tracking settings 114 | multimask_output_for_tracking: true 115 | use_multimask_token_for_obj_ptr: true 116 | multimask_min_pt_num: 0 117 | multimask_max_pt_num: 1 118 | use_mlp_for_obj_ptr_proj: true 119 | # Compilation flag 120 | # HieraT does not currently support compilation, should always be set to False 121 | compile_image_encoder: False 122 | -------------------------------------------------------------------------------- /sam-hq2/sam2/configs/sam2.1/sam2.1_hq_hiera_l.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | 3 | # Model 4 | model: 5 | _target_: sam2.modeling.sam2_hq_base.SAM2HQBase 6 | image_encoder: 7 | _target_: sam2.modeling.backbones.image_encoder.ImageEncoder 8 | scalp: 1 9 | trunk: 10 | _target_: sam2.modeling.backbones.hieradet.Hiera 11 | embed_dim: 144 12 | num_heads: 2 13 | stages: [2, 6, 36, 4] 14 | global_att_blocks: [23, 33, 43] 15 | window_pos_embed_bkg_spatial_size: [7, 7] 16 | window_spec: [8, 4, 16, 8] 17 | neck: 18 | _target_: sam2.modeling.backbones.image_encoder.FpnNeck 19 | position_encoding: 20 | _target_: sam2.modeling.position_encoding.PositionEmbeddingSine 21 | num_pos_feats: 256 22 | normalize: true 23 | scale: null 24 | temperature: 10000 25 | d_model: 256 26 | backbone_channel_list: [1152, 576, 288, 144] 27 | fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features 28 | fpn_interp_model: nearest 29 | 30 | memory_attention: 31 | _target_: sam2.modeling.memory_attention.MemoryAttention 32 | d_model: 256 33 | pos_enc_at_input: true 34 | layer: 35 | _target_: sam2.modeling.memory_attention.MemoryAttentionLayer 36 | activation: relu 37 | dim_feedforward: 2048 38 | dropout: 0.1 39 | pos_enc_at_attn: false 40 | self_attention: 41 | _target_: sam2.modeling.sam.transformer.RoPEAttention 42 | rope_theta: 10000.0 43 | feat_sizes: [32, 32] 44 | embedding_dim: 256 45 | num_heads: 1 46 | downsample_rate: 1 47 | dropout: 0.1 48 | d_model: 256 49 | pos_enc_at_cross_attn_keys: true 50 | pos_enc_at_cross_attn_queries: false 51 | cross_attention: 52 | _target_: sam2.modeling.sam.transformer.RoPEAttention 53 | rope_theta: 10000.0 54 | feat_sizes: [32, 32] 55 | rope_k_repeat: True 56 | embedding_dim: 256 57 | num_heads: 1 58 | downsample_rate: 1 59 | dropout: 0.1 60 | kv_in_dim: 64 61 | num_layers: 4 62 | 63 | memory_encoder: 64 | _target_: sam2.modeling.memory_encoder.MemoryEncoder 65 | out_dim: 64 66 | position_encoding: 67 | _target_: sam2.modeling.position_encoding.PositionEmbeddingSine 68 | num_pos_feats: 64 69 | normalize: true 70 | scale: null 71 | temperature: 10000 72 | mask_downsampler: 73 | _target_: sam2.modeling.memory_encoder.MaskDownSampler 74 | kernel_size: 3 75 | stride: 2 76 | padding: 1 77 | fuser: 78 | _target_: sam2.modeling.memory_encoder.Fuser 79 | layer: 80 | _target_: sam2.modeling.memory_encoder.CXBlock 81 | dim: 256 82 | kernel_size: 7 83 | padding: 3 84 | layer_scale_init_value: 1e-6 85 | use_dwconv: True # depth-wise convs 86 | num_layers: 2 87 | 88 | num_maskmem: 7 89 | image_size: 1024 90 | # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask 91 | sigmoid_scale_for_mem_enc: 20.0 92 | sigmoid_bias_for_mem_enc: -10.0 93 | use_mask_input_as_output_without_sam: true 94 | # Memory 95 | directly_add_no_mem_embed: true 96 | no_obj_embed_spatial: true 97 | # use high-resolution feature map in the SAM mask decoder 98 | use_high_res_features_in_sam: true 99 | # output 3 masks on the first click on initial conditioning frames 100 | multimask_output_in_sam: true 101 | # SAM heads 102 | iou_prediction_use_sigmoid: True 103 | # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder 104 | use_obj_ptrs_in_encoder: true 105 | add_tpos_enc_to_obj_ptrs: true 106 | proj_tpos_enc_in_obj_ptrs: true 107 | use_signed_tpos_enc_to_obj_ptrs: true 108 | only_obj_ptrs_in_the_past_for_eval: true 109 | # object occlusion prediction 110 | pred_obj_scores: true 111 | pred_obj_scores_mlp: true 112 | fixed_no_obj_ptr: true 113 | # multimask tracking settings 114 | multimask_output_for_tracking: true 115 | use_multimask_token_for_obj_ptr: true 116 | multimask_min_pt_num: 0 117 | multimask_max_pt_num: 1 118 | use_mlp_for_obj_ptr_proj: true 119 | # Compilation flag 120 | compile_image_encoder: False 121 | -------------------------------------------------------------------------------- /sam-hq2/sam2/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /sam-hq2/sam2/modeling/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /sam-hq2/sam2/modeling/backbones/image_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from typing import List, Optional 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | 14 | class ImageEncoder(nn.Module): 15 | def __init__( 16 | self, 17 | trunk: nn.Module, 18 | neck: nn.Module, 19 | scalp: int = 0, 20 | ): 21 | super().__init__() 22 | self.trunk = trunk 23 | self.neck = neck 24 | self.scalp = scalp 25 | assert ( 26 | self.trunk.channel_list == self.neck.backbone_channel_list 27 | ), f"Channel dims of trunk and neck do not match. Trunk: {self.trunk.channel_list}, neck: {self.neck.backbone_channel_list}" 28 | 29 | def forward(self, sample: torch.Tensor): 30 | # Forward through backbone 31 | features, pos = self.neck(self.trunk(sample)) 32 | if self.scalp > 0: 33 | # Discard the lowest resolution features 34 | features, pos = features[: -self.scalp], pos[: -self.scalp] 35 | 36 | src = features[-1] 37 | output = { 38 | "vision_features": src, 39 | "vision_pos_enc": pos, 40 | "backbone_fpn": features, 41 | } 42 | return output 43 | 44 | 45 | class FpnNeck(nn.Module): 46 | """ 47 | A modified variant of Feature Pyramid Network (FPN) neck 48 | (we remove output conv and also do bicubic interpolation similar to ViT 49 | pos embed interpolation) 50 | """ 51 | 52 | def __init__( 53 | self, 54 | position_encoding: nn.Module, 55 | d_model: int, 56 | backbone_channel_list: List[int], 57 | kernel_size: int = 1, 58 | stride: int = 1, 59 | padding: int = 0, 60 | fpn_interp_model: str = "bilinear", 61 | fuse_type: str = "sum", 62 | fpn_top_down_levels: Optional[List[int]] = None, 63 | ): 64 | """Initialize the neck 65 | :param trunk: the backbone 66 | :param position_encoding: the positional encoding to use 67 | :param d_model: the dimension of the model 68 | :param neck_norm: the normalization to use 69 | """ 70 | super().__init__() 71 | self.position_encoding = position_encoding 72 | self.convs = nn.ModuleList() 73 | self.backbone_channel_list = backbone_channel_list 74 | self.d_model = d_model 75 | for dim in backbone_channel_list: 76 | current = nn.Sequential() 77 | current.add_module( 78 | "conv", 79 | nn.Conv2d( 80 | in_channels=dim, 81 | out_channels=d_model, 82 | kernel_size=kernel_size, 83 | stride=stride, 84 | padding=padding, 85 | ), 86 | ) 87 | 88 | self.convs.append(current) 89 | self.fpn_interp_model = fpn_interp_model 90 | assert fuse_type in ["sum", "avg"] 91 | self.fuse_type = fuse_type 92 | 93 | # levels to have top-down features in its outputs 94 | # e.g. if fpn_top_down_levels is [2, 3], then only outputs of level 2 and 3 95 | # have top-down propagation, while outputs of level 0 and level 1 have only 96 | # lateral features from the same backbone level. 97 | if fpn_top_down_levels is None: 98 | # default is to have top-down features on all levels 99 | fpn_top_down_levels = range(len(self.convs)) 100 | self.fpn_top_down_levels = list(fpn_top_down_levels) 101 | 102 | def forward(self, xs: List[torch.Tensor]): 103 | 104 | out = [None] * len(self.convs) 105 | pos = [None] * len(self.convs) 106 | assert len(xs) == len(self.convs) 107 | # fpn forward pass 108 | # see https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/fpn.py 109 | prev_features = None 110 | # forward in top-down order (from low to high resolution) 111 | n = len(self.convs) - 1 112 | for i in range(n, -1, -1): 113 | x = xs[i] 114 | lateral_features = self.convs[n - i](x) 115 | if i in self.fpn_top_down_levels and prev_features is not None: 116 | top_down_features = F.interpolate( 117 | prev_features.to(dtype=torch.float32), 118 | scale_factor=2.0, 119 | mode=self.fpn_interp_model, 120 | align_corners=( 121 | None if self.fpn_interp_model == "nearest" else False 122 | ), 123 | antialias=False, 124 | ) 125 | prev_features = lateral_features + top_down_features 126 | if self.fuse_type == "avg": 127 | prev_features /= 2 128 | else: 129 | prev_features = lateral_features 130 | x_out = prev_features 131 | out[i] = x_out 132 | pos[i] = self.position_encoding(x_out).to(x_out.dtype) 133 | 134 | return out, pos 135 | -------------------------------------------------------------------------------- /sam-hq2/sam2/modeling/backbones/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | """Some utilities for backbones, in particular for windowing""" 8 | 9 | from typing import Tuple 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | 16 | def window_partition(x, window_size): 17 | """ 18 | Partition into non-overlapping windows with padding if needed. 19 | Args: 20 | x (tensor): input tokens with [B, H, W, C]. 21 | window_size (int): window size. 22 | Returns: 23 | windows: windows after partition with [B * num_windows, window_size, window_size, C]. 24 | (Hp, Wp): padded height and width before partition 25 | """ 26 | B, H, W, C = x.shape 27 | 28 | pad_h = (window_size - H % window_size) % window_size 29 | pad_w = (window_size - W % window_size) % window_size 30 | if pad_h > 0 or pad_w > 0: 31 | x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h)) 32 | Hp, Wp = H + pad_h, W + pad_w 33 | 34 | x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C) 35 | windows = ( 36 | x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) 37 | ) 38 | return windows, (Hp, Wp) 39 | 40 | 41 | def window_unpartition(windows, window_size, pad_hw, hw): 42 | """ 43 | Window unpartition into original sequences and removing padding. 44 | Args: 45 | x (tensor): input tokens with [B * num_windows, window_size, window_size, C]. 46 | window_size (int): window size. 47 | pad_hw (Tuple): padded height and width (Hp, Wp). 48 | hw (Tuple): original height and width (H, W) before padding. 49 | Returns: 50 | x: unpartitioned sequences with [B, H, W, C]. 51 | """ 52 | Hp, Wp = pad_hw 53 | H, W = hw 54 | B = windows.shape[0] // (Hp * Wp // window_size // window_size) 55 | x = windows.view( 56 | B, Hp // window_size, Wp // window_size, window_size, window_size, -1 57 | ) 58 | x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, Hp, Wp, -1) 59 | 60 | if Hp > H or Wp > W: 61 | x = x[:, :H, :W, :].contiguous() 62 | return x 63 | 64 | 65 | class PatchEmbed(nn.Module): 66 | """ 67 | Image to Patch Embedding. 68 | """ 69 | 70 | def __init__( 71 | self, 72 | kernel_size: Tuple[int, ...] = (7, 7), 73 | stride: Tuple[int, ...] = (4, 4), 74 | padding: Tuple[int, ...] = (3, 3), 75 | in_chans: int = 3, 76 | embed_dim: int = 768, 77 | ): 78 | """ 79 | Args: 80 | kernel_size (Tuple): kernel size of the projection layer. 81 | stride (Tuple): stride of the projection layer. 82 | padding (Tuple): padding size of the projection layer. 83 | in_chans (int): Number of input image channels. 84 | embed_dim (int): embed_dim (int): Patch embedding dimension. 85 | """ 86 | super().__init__() 87 | self.proj = nn.Conv2d( 88 | in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding 89 | ) 90 | 91 | def forward(self, x: torch.Tensor) -> torch.Tensor: 92 | x = self.proj(x) 93 | # B C H W -> B H W C 94 | x = x.permute(0, 2, 3, 1) 95 | return x 96 | -------------------------------------------------------------------------------- /sam-hq2/sam2/modeling/sam/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /sam-hq2/sam2/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /sam-hq2/sam2/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import warnings 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | from torchvision.transforms import Normalize, Resize, ToTensor 13 | 14 | 15 | class SAM2Transforms(nn.Module): 16 | def __init__( 17 | self, resolution, mask_threshold, max_hole_area=0.0, max_sprinkle_area=0.0 18 | ): 19 | """ 20 | Transforms for SAM2. 21 | """ 22 | super().__init__() 23 | self.resolution = resolution 24 | self.mask_threshold = mask_threshold 25 | self.max_hole_area = max_hole_area 26 | self.max_sprinkle_area = max_sprinkle_area 27 | self.mean = [0.485, 0.456, 0.406] 28 | self.std = [0.229, 0.224, 0.225] 29 | self.to_tensor = ToTensor() 30 | self.transforms = torch.jit.script( 31 | nn.Sequential( 32 | Resize((self.resolution, self.resolution)), 33 | Normalize(self.mean, self.std), 34 | ) 35 | ) 36 | 37 | def __call__(self, x): 38 | x = self.to_tensor(x) 39 | return self.transforms(x) 40 | 41 | def forward_batch(self, img_list): 42 | img_batch = [self.transforms(self.to_tensor(img)) for img in img_list] 43 | img_batch = torch.stack(img_batch, dim=0) 44 | return img_batch 45 | 46 | def transform_coords( 47 | self, coords: torch.Tensor, normalize=False, orig_hw=None 48 | ) -> torch.Tensor: 49 | """ 50 | Expects a torch tensor with length 2 in the last dimension. The coordinates can be in absolute image or normalized coordinates, 51 | If the coords are in absolute image coordinates, normalize should be set to True and original image size is required. 52 | 53 | Returns 54 | Un-normalized coordinates in the range of [0, 1] which is expected by the SAM2 model. 55 | """ 56 | if normalize: 57 | assert orig_hw is not None 58 | h, w = orig_hw 59 | coords = coords.clone() 60 | coords[..., 0] = coords[..., 0] / w 61 | coords[..., 1] = coords[..., 1] / h 62 | 63 | coords = coords * self.resolution # unnormalize coords 64 | return coords 65 | 66 | def transform_boxes( 67 | self, boxes: torch.Tensor, normalize=False, orig_hw=None 68 | ) -> torch.Tensor: 69 | """ 70 | Expects a tensor of shape Bx4. The coordinates can be in absolute image or normalized coordinates, 71 | if the coords are in absolute image coordinates, normalize should be set to True and original image size is required. 72 | """ 73 | boxes = self.transform_coords(boxes.reshape(-1, 2, 2), normalize, orig_hw) 74 | return boxes 75 | 76 | def postprocess_masks(self, masks: torch.Tensor, orig_hw) -> torch.Tensor: 77 | """ 78 | Perform PostProcessing on output masks. 79 | """ 80 | from sam2.utils.misc import get_connected_components 81 | 82 | masks = masks.float() 83 | input_masks = masks 84 | mask_flat = masks.flatten(0, 1).unsqueeze(1) # flatten as 1-channel image 85 | try: 86 | if self.max_hole_area > 0: 87 | # Holes are those connected components in background with area <= self.fill_hole_area 88 | # (background regions are those with mask scores <= self.mask_threshold) 89 | labels, areas = get_connected_components( 90 | mask_flat <= self.mask_threshold 91 | ) 92 | is_hole = (labels > 0) & (areas <= self.max_hole_area) 93 | is_hole = is_hole.reshape_as(masks) 94 | # We fill holes with a small positive mask score (10.0) to change them to foreground. 95 | masks = torch.where(is_hole, self.mask_threshold + 10.0, masks) 96 | 97 | if self.max_sprinkle_area > 0: 98 | labels, areas = get_connected_components( 99 | mask_flat > self.mask_threshold 100 | ) 101 | is_hole = (labels > 0) & (areas <= self.max_sprinkle_area) 102 | is_hole = is_hole.reshape_as(masks) 103 | # We fill holes with negative mask score (-10.0) to change them to background. 104 | masks = torch.where(is_hole, self.mask_threshold - 10.0, masks) 105 | except Exception as e: 106 | # Skip the post-processing step if the CUDA kernel fails 107 | warnings.warn( 108 | f"{e}\n\nSkipping the post-processing step due to the error above. You can " 109 | "still use SAM 2 and it's OK to ignore the error above, although some post-processing " 110 | "functionality may be limited (which doesn't affect the results in most cases; see " 111 | "https://github.com/facebookresearch/sam2/blob/main/INSTALL.md).", 112 | category=UserWarning, 113 | stacklevel=2, 114 | ) 115 | masks = input_masks 116 | 117 | masks = F.interpolate(masks, orig_hw, mode="bilinear", align_corners=False) 118 | return masks 119 | -------------------------------------------------------------------------------- /sam-hq2/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | import os 7 | 8 | from setuptools import find_packages, setup 9 | 10 | # Package metadata 11 | NAME = "HQ-SAM-2" 12 | VERSION = "1.0" 13 | DESCRIPTION = "SAM-HQ 2: Segment Anything in High Quality for Images and Videos" 14 | URL = "https://github.com/SysCV/sam-hq" 15 | AUTHOR = "HQ-SAM Team" 16 | AUTHOR_EMAIL = "None" 17 | LICENSE = "Apache 2.0" 18 | 19 | # Read the contents of README file 20 | with open("README.md", "r", encoding="utf-8") as f: 21 | LONG_DESCRIPTION = f.read() 22 | 23 | # Required dependencies 24 | REQUIRED_PACKAGES = [ 25 | "torch>=2.3.1", 26 | "torchvision>=0.18.1", 27 | "numpy>=1.24.4", 28 | "tqdm>=4.66.1", 29 | "hydra-core>=1.3.2", 30 | "iopath>=0.1.10", 31 | "pillow>=9.4.0", 32 | "matplotlib>=3.9.1", 33 | "opencv-python>=4.7.0", 34 | ] 35 | 36 | EXTRA_PACKAGES = { 37 | "notebooks": [ 38 | "matplotlib>=3.9.1", 39 | "jupyter>=1.0.0", 40 | "opencv-python>=4.7.0", 41 | "eva-decord>=0.6.1", 42 | ], 43 | "interactive-demo": [ 44 | "Flask>=3.0.3", 45 | "Flask-Cors>=5.0.0", 46 | "av>=13.0.0", 47 | "dataclasses-json>=0.6.7", 48 | "eva-decord>=0.6.1", 49 | "gunicorn>=23.0.0", 50 | "imagesize>=1.4.1", 51 | "pycocotools>=2.0.8", 52 | "strawberry-graphql>=0.239.2", 53 | ], 54 | "dev": [ 55 | "black==24.2.0", 56 | "usort==1.0.2", 57 | "ufmt==2.0.0b2", 58 | "fvcore>=0.1.5.post20221221", 59 | "pandas>=2.2.2", 60 | "scikit-image>=0.24.0", 61 | "tensorboard>=2.17.0", 62 | "pycocotools>=2.0.8", 63 | "tensordict>=0.5.0", 64 | "opencv-python>=4.7.0", 65 | "submitit>=1.5.1", 66 | ], 67 | } 68 | 69 | # By default, we also build the SAM 2 CUDA extension. 70 | # You may turn off CUDA build with `export SAM2_BUILD_CUDA=0`. 71 | BUILD_CUDA = os.getenv("SAM2_BUILD_CUDA", "1") == "1" 72 | # By default, we allow SAM 2 installation to proceed even with build errors. 73 | # You may force stopping on errors with `export SAM2_BUILD_ALLOW_ERRORS=0`. 74 | BUILD_ALLOW_ERRORS = os.getenv("SAM2_BUILD_ALLOW_ERRORS", "1") == "1" 75 | 76 | # Catch and skip errors during extension building and print a warning message 77 | # (note that this message only shows up under verbose build mode 78 | # "pip install -v -e ." or "python setup.py build_ext -v") 79 | CUDA_ERROR_MSG = ( 80 | "{}\n\n" 81 | "Failed to build the SAM 2 CUDA extension due to the error above. " 82 | "You can still use SAM 2 and it's OK to ignore the error above, although some " 83 | "post-processing functionality may be limited (which doesn't affect the results in most cases; " 84 | "(see https://github.com/facebookresearch/sam2/blob/main/INSTALL.md).\n" 85 | ) 86 | 87 | 88 | def get_extensions(): 89 | if not BUILD_CUDA: 90 | return [] 91 | 92 | try: 93 | from torch.utils.cpp_extension import CUDAExtension 94 | 95 | srcs = ["sam2/csrc/connected_components.cu"] 96 | compile_args = { 97 | "cxx": [], 98 | "nvcc": [ 99 | "-DCUDA_HAS_FP16=1", 100 | "-D__CUDA_NO_HALF_OPERATORS__", 101 | "-D__CUDA_NO_HALF_CONVERSIONS__", 102 | "-D__CUDA_NO_HALF2_OPERATORS__", 103 | ], 104 | } 105 | ext_modules = [CUDAExtension("sam2._C", srcs, extra_compile_args=compile_args)] 106 | except Exception as e: 107 | if BUILD_ALLOW_ERRORS: 108 | print(CUDA_ERROR_MSG.format(e)) 109 | ext_modules = [] 110 | else: 111 | raise e 112 | 113 | return ext_modules 114 | 115 | 116 | try: 117 | from torch.utils.cpp_extension import BuildExtension 118 | 119 | class BuildExtensionIgnoreErrors(BuildExtension): 120 | 121 | def finalize_options(self): 122 | try: 123 | super().finalize_options() 124 | except Exception as e: 125 | print(CUDA_ERROR_MSG.format(e)) 126 | self.extensions = [] 127 | 128 | def build_extensions(self): 129 | try: 130 | super().build_extensions() 131 | except Exception as e: 132 | print(CUDA_ERROR_MSG.format(e)) 133 | self.extensions = [] 134 | 135 | def get_ext_filename(self, ext_name): 136 | try: 137 | return super().get_ext_filename(ext_name) 138 | except Exception as e: 139 | print(CUDA_ERROR_MSG.format(e)) 140 | self.extensions = [] 141 | return "_C.so" 142 | 143 | cmdclass = { 144 | "build_ext": ( 145 | BuildExtensionIgnoreErrors.with_options(no_python_abi_suffix=True) 146 | if BUILD_ALLOW_ERRORS 147 | else BuildExtension.with_options(no_python_abi_suffix=True) 148 | ) 149 | } 150 | except Exception as e: 151 | cmdclass = {} 152 | if BUILD_ALLOW_ERRORS: 153 | print(CUDA_ERROR_MSG.format(e)) 154 | else: 155 | raise e 156 | 157 | 158 | # Setup configuration 159 | setup( 160 | name=NAME, 161 | version=VERSION, 162 | description=DESCRIPTION, 163 | long_description=LONG_DESCRIPTION, 164 | long_description_content_type="text/markdown", 165 | url=URL, 166 | author=AUTHOR, 167 | author_email=AUTHOR_EMAIL, 168 | license=LICENSE, 169 | packages=find_packages(exclude="notebooks"), 170 | include_package_data=True, 171 | install_requires=REQUIRED_PACKAGES, 172 | extras_require=EXTRA_PACKAGES, 173 | python_requires=">=3.10.0", 174 | ext_modules=get_extensions(), 175 | cmdclass=cmdclass, 176 | ) 177 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/.asset/COCO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/COCO.png -------------------------------------------------------------------------------- /seginw/GroundingDINO/.asset/GD_GLIGEN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/GD_GLIGEN.png -------------------------------------------------------------------------------- /seginw/GroundingDINO/.asset/GD_SD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/GD_SD.png -------------------------------------------------------------------------------- /seginw/GroundingDINO/.asset/ODinW.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/ODinW.png -------------------------------------------------------------------------------- /seginw/GroundingDINO/.asset/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/arch.png -------------------------------------------------------------------------------- /seginw/GroundingDINO/.asset/cats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/cats.png -------------------------------------------------------------------------------- /seginw/GroundingDINO/.asset/hero_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/hero_figure.png -------------------------------------------------------------------------------- /seginw/GroundingDINO/demo/gradio_app.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from functools import partial 3 | import cv2 4 | import requests 5 | import os 6 | from io import BytesIO 7 | from PIL import Image 8 | import numpy as np 9 | from pathlib import Path 10 | 11 | 12 | import warnings 13 | 14 | import torch 15 | 16 | # prepare the environment 17 | os.system("python setup.py build develop --user") 18 | os.system("pip install packaging==21.3") 19 | os.system("pip install gradio") 20 | 21 | 22 | warnings.filterwarnings("ignore") 23 | 24 | import gradio as gr 25 | 26 | from groundingdino.models import build_model 27 | from groundingdino.util.slconfig import SLConfig 28 | from groundingdino.util.utils import clean_state_dict 29 | from groundingdino.util.inference import annotate, load_image, predict 30 | import groundingdino.datasets.transforms as T 31 | 32 | from huggingface_hub import hf_hub_download 33 | 34 | 35 | 36 | # Use this command for evaluate the GLIP-T model 37 | config_file = "groundingdino/config/GroundingDINO_SwinT_OGC.py" 38 | ckpt_repo_id = "ShilongLiu/GroundingDINO" 39 | ckpt_filenmae = "groundingdino_swint_ogc.pth" 40 | 41 | 42 | def load_model_hf(model_config_path, repo_id, filename, device='cpu'): 43 | args = SLConfig.fromfile(model_config_path) 44 | model = build_model(args) 45 | args.device = device 46 | 47 | cache_file = hf_hub_download(repo_id=repo_id, filename=filename) 48 | checkpoint = torch.load(cache_file, map_location='cpu') 49 | log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False) 50 | print("Model loaded from {} \n => {}".format(cache_file, log)) 51 | _ = model.eval() 52 | return model 53 | 54 | def image_transform_grounding(init_image): 55 | transform = T.Compose([ 56 | T.RandomResize([800], max_size=1333), 57 | T.ToTensor(), 58 | T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 59 | ]) 60 | image, _ = transform(init_image, None) # 3, h, w 61 | return init_image, image 62 | 63 | def image_transform_grounding_for_vis(init_image): 64 | transform = T.Compose([ 65 | T.RandomResize([800], max_size=1333), 66 | ]) 67 | image, _ = transform(init_image, None) # 3, h, w 68 | return image 69 | 70 | model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae) 71 | 72 | def run_grounding(input_image, grounding_caption, box_threshold, text_threshold): 73 | init_image = input_image.convert("RGB") 74 | original_size = init_image.size 75 | 76 | _, image_tensor = image_transform_grounding(init_image) 77 | image_pil: Image = image_transform_grounding_for_vis(init_image) 78 | 79 | # run grounidng 80 | boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu') 81 | annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases) 82 | image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)) 83 | 84 | 85 | return image_with_box 86 | 87 | if __name__ == "__main__": 88 | 89 | parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True) 90 | parser.add_argument("--debug", action="store_true", help="using debug mode") 91 | parser.add_argument("--share", action="store_true", help="share the app") 92 | args = parser.parse_args() 93 | 94 | block = gr.Blocks().queue() 95 | with block: 96 | gr.Markdown("# [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO)") 97 | gr.Markdown("### Open-World Detection with Grounding DINO") 98 | 99 | with gr.Row(): 100 | with gr.Column(): 101 | input_image = gr.Image(source='upload', type="pil") 102 | grounding_caption = gr.Textbox(label="Detection Prompt") 103 | run_button = gr.Button(label="Run") 104 | with gr.Accordion("Advanced options", open=False): 105 | box_threshold = gr.Slider( 106 | label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001 107 | ) 108 | text_threshold = gr.Slider( 109 | label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001 110 | ) 111 | 112 | with gr.Column(): 113 | gallery = gr.outputs.Image( 114 | type="pil", 115 | # label="grounding results" 116 | ).style(full_width=True, full_height=True) 117 | # gallery = gr.Gallery(label="Generated images", show_label=False).style( 118 | # grid=[1], height="auto", container=True, full_width=True, full_height=True) 119 | 120 | run_button.click(fn=run_grounding, inputs=[ 121 | input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery]) 122 | 123 | 124 | block.launch(server_name='0.0.0.0', server_port=7579, debug=args.debug, share=args.share) 125 | 126 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/groundingdino/__init__.py -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py: -------------------------------------------------------------------------------- 1 | batch_size = 1 2 | modelname = "groundingdino" 3 | backbone = "swin_B_384_22k" 4 | position_embedding = "sine" 5 | pe_temperatureH = 20 6 | pe_temperatureW = 20 7 | return_interm_indices = [1, 2, 3] 8 | backbone_freeze_keywords = None 9 | enc_layers = 6 10 | dec_layers = 6 11 | pre_norm = False 12 | dim_feedforward = 2048 13 | hidden_dim = 256 14 | dropout = 0.0 15 | nheads = 8 16 | num_queries = 900 17 | query_dim = 4 18 | num_patterns = 0 19 | num_feature_levels = 4 20 | enc_n_points = 4 21 | dec_n_points = 4 22 | two_stage_type = "standard" 23 | two_stage_bbox_embed_share = False 24 | two_stage_class_embed_share = False 25 | transformer_activation = "relu" 26 | dec_pred_bbox_embed_share = True 27 | dn_box_noise_scale = 1.0 28 | dn_label_noise_ratio = 0.5 29 | dn_label_coef = 1.0 30 | dn_bbox_coef = 1.0 31 | embed_init_tgt = True 32 | dn_labelbook_size = 2000 33 | max_text_len = 256 34 | text_encoder_type = "bert-base-uncased" 35 | use_text_enhancer = True 36 | use_fusion_layer = True 37 | use_checkpoint = True 38 | use_transformer_ckpt = True 39 | use_text_cross_attention = True 40 | text_dropout = 0.0 41 | fusion_dropout = 0.0 42 | fusion_droppath = 0.1 43 | sub_sentence_present = True 44 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py: -------------------------------------------------------------------------------- 1 | batch_size = 1 2 | modelname = "groundingdino" 3 | backbone = "swin_T_224_1k" 4 | position_embedding = "sine" 5 | pe_temperatureH = 20 6 | pe_temperatureW = 20 7 | return_interm_indices = [1, 2, 3] 8 | backbone_freeze_keywords = None 9 | enc_layers = 6 10 | dec_layers = 6 11 | pre_norm = False 12 | dim_feedforward = 2048 13 | hidden_dim = 256 14 | dropout = 0.0 15 | nheads = 8 16 | num_queries = 900 17 | query_dim = 4 18 | num_patterns = 0 19 | num_feature_levels = 4 20 | enc_n_points = 4 21 | dec_n_points = 4 22 | two_stage_type = "standard" 23 | two_stage_bbox_embed_share = False 24 | two_stage_class_embed_share = False 25 | transformer_activation = "relu" 26 | dec_pred_bbox_embed_share = True 27 | dn_box_noise_scale = 1.0 28 | dn_label_noise_ratio = 0.5 29 | dn_label_coef = 1.0 30 | dn_bbox_coef = 1.0 31 | embed_init_tgt = True 32 | dn_labelbook_size = 2000 33 | max_text_len = 256 34 | text_encoder_type = "bert-base-uncased" 35 | use_text_enhancer = True 36 | use_fusion_layer = True 37 | use_checkpoint = True 38 | use_transformer_ckpt = True 39 | use_text_cross_attention = True 40 | text_dropout = 0.0 41 | fusion_dropout = 0.0 42 | fusion_droppath = 0.1 43 | sub_sentence_present = True 44 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/groundingdino/datasets/__init__.py -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/models/GroundingDINO/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Grounding DINO 3 | # url: https://github.com/IDEA-Research/GroundingDINO 4 | # Copyright (c) 2023 IDEA. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------ 7 | # Conditional DETR 8 | # Copyright (c) 2021 Microsoft. All Rights Reserved. 9 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 10 | # ------------------------------------------------------------------------ 11 | # Copied from DETR (https://github.com/facebookresearch/detr) 12 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 13 | # ------------------------------------------------------------------------ 14 | 15 | from .groundingdino import build_groundingdino 16 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbone import build_backbone 2 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | 13 | #include "ms_deform_attn_cpu.h" 14 | 15 | #ifdef WITH_CUDA 16 | #include "ms_deform_attn_cuda.h" 17 | #endif 18 | 19 | namespace groundingdino { 20 | 21 | at::Tensor 22 | ms_deform_attn_forward( 23 | const at::Tensor &value, 24 | const at::Tensor &spatial_shapes, 25 | const at::Tensor &level_start_index, 26 | const at::Tensor &sampling_loc, 27 | const at::Tensor &attn_weight, 28 | const int im2col_step) 29 | { 30 | if (value.type().is_cuda()) 31 | { 32 | #ifdef WITH_CUDA 33 | return ms_deform_attn_cuda_forward( 34 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); 35 | #else 36 | AT_ERROR("Not compiled with GPU support"); 37 | #endif 38 | } 39 | AT_ERROR("Not implemented on the CPU"); 40 | } 41 | 42 | std::vector 43 | ms_deform_attn_backward( 44 | const at::Tensor &value, 45 | const at::Tensor &spatial_shapes, 46 | const at::Tensor &level_start_index, 47 | const at::Tensor &sampling_loc, 48 | const at::Tensor &attn_weight, 49 | const at::Tensor &grad_output, 50 | const int im2col_step) 51 | { 52 | if (value.type().is_cuda()) 53 | { 54 | #ifdef WITH_CUDA 55 | return ms_deform_attn_cuda_backward( 56 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step); 57 | #else 58 | AT_ERROR("Not compiled with GPU support"); 59 | #endif 60 | } 61 | AT_ERROR("Not implemented on the CPU"); 62 | } 63 | 64 | } // namespace groundingdino -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.cpp: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | namespace groundingdino { 17 | 18 | at::Tensor 19 | ms_deform_attn_cpu_forward( 20 | const at::Tensor &value, 21 | const at::Tensor &spatial_shapes, 22 | const at::Tensor &level_start_index, 23 | const at::Tensor &sampling_loc, 24 | const at::Tensor &attn_weight, 25 | const int im2col_step) 26 | { 27 | AT_ERROR("Not implement on cpu"); 28 | } 29 | 30 | std::vector 31 | ms_deform_attn_cpu_backward( 32 | const at::Tensor &value, 33 | const at::Tensor &spatial_shapes, 34 | const at::Tensor &level_start_index, 35 | const at::Tensor &sampling_loc, 36 | const at::Tensor &attn_weight, 37 | const at::Tensor &grad_output, 38 | const int im2col_step) 39 | { 40 | AT_ERROR("Not implement on cpu"); 41 | } 42 | 43 | } // namespace groundingdino 44 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | namespace groundingdino { 15 | 16 | at::Tensor 17 | ms_deform_attn_cpu_forward( 18 | const at::Tensor &value, 19 | const at::Tensor &spatial_shapes, 20 | const at::Tensor &level_start_index, 21 | const at::Tensor &sampling_loc, 22 | const at::Tensor &attn_weight, 23 | const int im2col_step); 24 | 25 | std::vector 26 | ms_deform_attn_cpu_backward( 27 | const at::Tensor &value, 28 | const at::Tensor &spatial_shapes, 29 | const at::Tensor &level_start_index, 30 | const at::Tensor &sampling_loc, 31 | const at::Tensor &attn_weight, 32 | const at::Tensor &grad_output, 33 | const int im2col_step); 34 | 35 | } // namespace groundingdino 36 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | 14 | namespace groundingdino { 15 | 16 | at::Tensor ms_deform_attn_cuda_forward( 17 | const at::Tensor &value, 18 | const at::Tensor &spatial_shapes, 19 | const at::Tensor &level_start_index, 20 | const at::Tensor &sampling_loc, 21 | const at::Tensor &attn_weight, 22 | const int im2col_step); 23 | 24 | std::vector ms_deform_attn_cuda_backward( 25 | const at::Tensor &value, 26 | const at::Tensor &spatial_shapes, 27 | const at::Tensor &level_start_index, 28 | const at::Tensor &sampling_loc, 29 | const at::Tensor &attn_weight, 30 | const at::Tensor &grad_output, 31 | const int im2col_step); 32 | 33 | } // namespace groundingdino -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/cuda_version.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace groundingdino { 4 | int get_cudart_version() { 5 | return CUDART_VERSION; 6 | } 7 | } // namespace groundingdino 8 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | #include "MsDeformAttn/ms_deform_attn.h" 4 | 5 | namespace groundingdino { 6 | 7 | #ifdef WITH_CUDA 8 | extern int get_cudart_version(); 9 | #endif 10 | 11 | std::string get_cuda_version() { 12 | #ifdef WITH_CUDA 13 | std::ostringstream oss; 14 | 15 | // copied from 16 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 17 | auto printCudaStyleVersion = [&](int v) { 18 | oss << (v / 1000) << "." << (v / 10 % 100); 19 | if (v % 10 != 0) { 20 | oss << "." << (v % 10); 21 | } 22 | }; 23 | printCudaStyleVersion(get_cudart_version()); 24 | return oss.str(); 25 | #else 26 | return std::string("not available"); 27 | #endif 28 | } 29 | 30 | // similar to 31 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 32 | std::string get_compiler_version() { 33 | std::ostringstream ss; 34 | #if defined(__GNUC__) 35 | #ifndef __clang__ 36 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 37 | #endif 38 | #endif 39 | 40 | #if defined(__clang_major__) 41 | { 42 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 43 | << __clang_patchlevel__; 44 | } 45 | #endif 46 | 47 | #if defined(_MSC_VER) 48 | { ss << "MSVC " << _MSC_FULL_VER; } 49 | #endif 50 | return ss.str(); 51 | } 52 | 53 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 54 | m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward"); 55 | m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward"); 56 | } 57 | 58 | } // namespace groundingdino -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/models/GroundingDINO/transformer_vanilla.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Grounding DINO 3 | # url: https://github.com/IDEA-Research/GroundingDINO 4 | # Copyright (c) 2023 IDEA. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------ 7 | # Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved 8 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 9 | """ 10 | DETR Transformer class. 11 | 12 | Copy-paste from torch.nn.Transformer with modifications: 13 | * positional encodings are passed in MHattention 14 | * extra LN at the end of encoder is removed 15 | * decoder returns a stack of activations from all decoding layers 16 | """ 17 | from typing import Optional 18 | 19 | import torch 20 | import torch.nn.functional as F 21 | from torch import Tensor, nn 22 | 23 | from .utils import ( 24 | MLP, 25 | _get_activation_fn, 26 | _get_clones, 27 | gen_encoder_output_proposals, 28 | gen_sineembed_for_position, 29 | sigmoid_focal_loss, 30 | ) 31 | 32 | 33 | class TextTransformer(nn.Module): 34 | def __init__(self, num_layers, d_model=256, nheads=8, dim_feedforward=2048, dropout=0.1): 35 | super().__init__() 36 | self.num_layers = num_layers 37 | self.d_model = d_model 38 | self.nheads = nheads 39 | self.dim_feedforward = dim_feedforward 40 | self.norm = None 41 | 42 | single_encoder_layer = TransformerEncoderLayer( 43 | d_model=d_model, nhead=nheads, dim_feedforward=dim_feedforward, dropout=dropout 44 | ) 45 | self.layers = _get_clones(single_encoder_layer, num_layers) 46 | 47 | def forward(self, memory_text: torch.Tensor, text_attention_mask: torch.Tensor): 48 | """ 49 | 50 | Args: 51 | text_attention_mask: bs, num_token 52 | memory_text: bs, num_token, d_model 53 | 54 | Raises: 55 | RuntimeError: _description_ 56 | 57 | Returns: 58 | output: bs, num_token, d_model 59 | """ 60 | 61 | output = memory_text.transpose(0, 1) 62 | 63 | for layer in self.layers: 64 | output = layer(output, src_key_padding_mask=text_attention_mask) 65 | 66 | if self.norm is not None: 67 | output = self.norm(output) 68 | 69 | return output.transpose(0, 1) 70 | 71 | 72 | class TransformerEncoderLayer(nn.Module): 73 | def __init__( 74 | self, 75 | d_model, 76 | nhead, 77 | dim_feedforward=2048, 78 | dropout=0.1, 79 | activation="relu", 80 | normalize_before=False, 81 | ): 82 | super().__init__() 83 | self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) 84 | # Implementation of Feedforward model 85 | self.linear1 = nn.Linear(d_model, dim_feedforward) 86 | self.dropout = nn.Dropout(dropout) 87 | self.linear2 = nn.Linear(dim_feedforward, d_model) 88 | 89 | self.norm1 = nn.LayerNorm(d_model) 90 | self.norm2 = nn.LayerNorm(d_model) 91 | self.dropout1 = nn.Dropout(dropout) 92 | self.dropout2 = nn.Dropout(dropout) 93 | 94 | self.activation = _get_activation_fn(activation) 95 | self.normalize_before = normalize_before 96 | self.nhead = nhead 97 | 98 | def with_pos_embed(self, tensor, pos: Optional[Tensor]): 99 | return tensor if pos is None else tensor + pos 100 | 101 | def forward( 102 | self, 103 | src, 104 | src_mask: Optional[Tensor] = None, 105 | src_key_padding_mask: Optional[Tensor] = None, 106 | pos: Optional[Tensor] = None, 107 | ): 108 | # repeat attn mask 109 | if src_mask.dim() == 3 and src_mask.shape[0] == src.shape[1]: 110 | # bs, num_q, num_k 111 | src_mask = src_mask.repeat(self.nhead, 1, 1) 112 | 113 | q = k = self.with_pos_embed(src, pos) 114 | 115 | src2 = self.self_attn(q, k, value=src, attn_mask=src_mask)[0] 116 | 117 | # src2 = self.self_attn(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0] 118 | src = src + self.dropout1(src2) 119 | src = self.norm1(src) 120 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 121 | src = src + self.dropout2(src2) 122 | src = self.norm2(src) 123 | return src 124 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Grounding DINO 3 | # url: https://github.com/IDEA-Research/GroundingDINO 4 | # Copyright (c) 2023 IDEA. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------ 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 8 | from .GroundingDINO import build_groundingdino 9 | 10 | 11 | def build_model(args): 12 | # we use register to maintain models from catdet6 on. 13 | from .registry import MODULE_BUILD_FUNCS 14 | 15 | assert args.modelname in MODULE_BUILD_FUNCS._module_dict 16 | build_func = MODULE_BUILD_FUNCS.get(args.modelname) 17 | model = build_func(args) 18 | return model 19 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/models/registry.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Grounding DINO 3 | # url: https://github.com/IDEA-Research/GroundingDINO 4 | # Copyright (c) 2023 IDEA. All Rights Reserved. 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | # ------------------------------------------------------------------------ 7 | # -*- coding: utf-8 -*- 8 | # @Author: Yihao Chen 9 | # @Date: 2021-08-16 16:03:17 10 | # @Last Modified by: Shilong Liu 11 | # @Last Modified time: 2022-01-23 15:26 12 | # modified from mmcv 13 | 14 | import inspect 15 | from functools import partial 16 | 17 | 18 | class Registry(object): 19 | def __init__(self, name): 20 | self._name = name 21 | self._module_dict = dict() 22 | 23 | def __repr__(self): 24 | format_str = self.__class__.__name__ + "(name={}, items={})".format( 25 | self._name, list(self._module_dict.keys()) 26 | ) 27 | return format_str 28 | 29 | def __len__(self): 30 | return len(self._module_dict) 31 | 32 | @property 33 | def name(self): 34 | return self._name 35 | 36 | @property 37 | def module_dict(self): 38 | return self._module_dict 39 | 40 | def get(self, key): 41 | return self._module_dict.get(key, None) 42 | 43 | def registe_with_name(self, module_name=None, force=False): 44 | return partial(self.register, module_name=module_name, force=force) 45 | 46 | def register(self, module_build_function, module_name=None, force=False): 47 | """Register a module build function. 48 | Args: 49 | module (:obj:`nn.Module`): Module to be registered. 50 | """ 51 | if not inspect.isfunction(module_build_function): 52 | raise TypeError( 53 | "module_build_function must be a function, but got {}".format( 54 | type(module_build_function) 55 | ) 56 | ) 57 | if module_name is None: 58 | module_name = module_build_function.__name__ 59 | if not force and module_name in self._module_dict: 60 | raise KeyError("{} is already registered in {}".format(module_name, self.name)) 61 | self._module_dict[module_name] = module_build_function 62 | 63 | return module_build_function 64 | 65 | 66 | MODULE_BUILD_FUNCS = Registry("model build functions") 67 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/util/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/util/box_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Utilities for bounding box manipulation and GIoU. 4 | """ 5 | import torch 6 | from torchvision.ops.boxes import box_area 7 | 8 | 9 | def box_cxcywh_to_xyxy(x): 10 | x_c, y_c, w, h = x.unbind(-1) 11 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)] 12 | return torch.stack(b, dim=-1) 13 | 14 | 15 | def box_xyxy_to_cxcywh(x): 16 | x0, y0, x1, y1 = x.unbind(-1) 17 | b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)] 18 | return torch.stack(b, dim=-1) 19 | 20 | 21 | # modified from torchvision to also return the union 22 | def box_iou(boxes1, boxes2): 23 | area1 = box_area(boxes1) 24 | area2 = box_area(boxes2) 25 | 26 | # import ipdb; ipdb.set_trace() 27 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] 28 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] 29 | 30 | wh = (rb - lt).clamp(min=0) # [N,M,2] 31 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 32 | 33 | union = area1[:, None] + area2 - inter 34 | 35 | iou = inter / (union + 1e-6) 36 | return iou, union 37 | 38 | 39 | def generalized_box_iou(boxes1, boxes2): 40 | """ 41 | Generalized IoU from https://giou.stanford.edu/ 42 | 43 | The boxes should be in [x0, y0, x1, y1] format 44 | 45 | Returns a [N, M] pairwise matrix, where N = len(boxes1) 46 | and M = len(boxes2) 47 | """ 48 | # degenerate boxes gives inf / nan results 49 | # so do an early check 50 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all() 51 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all() 52 | # except: 53 | # import ipdb; ipdb.set_trace() 54 | iou, union = box_iou(boxes1, boxes2) 55 | 56 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2]) 57 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) 58 | 59 | wh = (rb - lt).clamp(min=0) # [N,M,2] 60 | area = wh[:, :, 0] * wh[:, :, 1] 61 | 62 | return iou - (area - union) / (area + 1e-6) 63 | 64 | 65 | # modified from torchvision to also return the union 66 | def box_iou_pairwise(boxes1, boxes2): 67 | area1 = box_area(boxes1) 68 | area2 = box_area(boxes2) 69 | 70 | lt = torch.max(boxes1[:, :2], boxes2[:, :2]) # [N,2] 71 | rb = torch.min(boxes1[:, 2:], boxes2[:, 2:]) # [N,2] 72 | 73 | wh = (rb - lt).clamp(min=0) # [N,2] 74 | inter = wh[:, 0] * wh[:, 1] # [N] 75 | 76 | union = area1 + area2 - inter 77 | 78 | iou = inter / union 79 | return iou, union 80 | 81 | 82 | def generalized_box_iou_pairwise(boxes1, boxes2): 83 | """ 84 | Generalized IoU from https://giou.stanford.edu/ 85 | 86 | Input: 87 | - boxes1, boxes2: N,4 88 | Output: 89 | - giou: N, 4 90 | """ 91 | # degenerate boxes gives inf / nan results 92 | # so do an early check 93 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all() 94 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all() 95 | assert boxes1.shape == boxes2.shape 96 | iou, union = box_iou_pairwise(boxes1, boxes2) # N, 4 97 | 98 | lt = torch.min(boxes1[:, :2], boxes2[:, :2]) 99 | rb = torch.max(boxes1[:, 2:], boxes2[:, 2:]) 100 | 101 | wh = (rb - lt).clamp(min=0) # [N,2] 102 | area = wh[:, 0] * wh[:, 1] 103 | 104 | return iou - (area - union) / area 105 | 106 | 107 | def masks_to_boxes(masks): 108 | """Compute the bounding boxes around the provided masks 109 | 110 | The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions. 111 | 112 | Returns a [N, 4] tensors, with the boxes in xyxy format 113 | """ 114 | if masks.numel() == 0: 115 | return torch.zeros((0, 4), device=masks.device) 116 | 117 | h, w = masks.shape[-2:] 118 | 119 | y = torch.arange(0, h, dtype=torch.float) 120 | x = torch.arange(0, w, dtype=torch.float) 121 | y, x = torch.meshgrid(y, x) 122 | 123 | x_mask = masks * x.unsqueeze(0) 124 | x_max = x_mask.flatten(1).max(-1)[0] 125 | x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 126 | 127 | y_mask = masks * y.unsqueeze(0) 128 | y_max = y_mask.flatten(1).max(-1)[0] 129 | y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 130 | 131 | return torch.stack([x_min, y_min, x_max, y_max], 1) 132 | 133 | 134 | if __name__ == "__main__": 135 | x = torch.rand(5, 4) 136 | y = torch.rand(3, 4) 137 | iou, union = box_iou(x, y) 138 | import ipdb 139 | 140 | ipdb.set_trace() 141 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/util/get_tokenlizer.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoTokenizer, BertModel, BertTokenizer, RobertaModel, RobertaTokenizerFast 2 | 3 | 4 | def get_tokenlizer(text_encoder_type): 5 | if not isinstance(text_encoder_type, str): 6 | # print("text_encoder_type is not a str") 7 | if hasattr(text_encoder_type, "text_encoder_type"): 8 | text_encoder_type = text_encoder_type.text_encoder_type 9 | elif text_encoder_type.get("text_encoder_type", False): 10 | text_encoder_type = text_encoder_type.get("text_encoder_type") 11 | else: 12 | raise ValueError( 13 | "Unknown type of text_encoder_type: {}".format(type(text_encoder_type)) 14 | ) 15 | print("final text_encoder_type: {}".format(text_encoder_type)) 16 | 17 | tokenizer = AutoTokenizer.from_pretrained(text_encoder_type) 18 | return tokenizer 19 | 20 | 21 | def get_pretrained_language_model(text_encoder_type): 22 | if text_encoder_type == "bert-base-uncased": 23 | return BertModel.from_pretrained(text_encoder_type) 24 | if text_encoder_type == "roberta-base": 25 | return RobertaModel.from_pretrained(text_encoder_type) 26 | raise ValueError("Unknown text_encoder_type {}".format(text_encoder_type)) 27 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/util/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import functools 3 | import logging 4 | import os 5 | import sys 6 | 7 | from termcolor import colored 8 | 9 | 10 | class _ColorfulFormatter(logging.Formatter): 11 | def __init__(self, *args, **kwargs): 12 | self._root_name = kwargs.pop("root_name") + "." 13 | self._abbrev_name = kwargs.pop("abbrev_name", "") 14 | if len(self._abbrev_name): 15 | self._abbrev_name = self._abbrev_name + "." 16 | super(_ColorfulFormatter, self).__init__(*args, **kwargs) 17 | 18 | def formatMessage(self, record): 19 | record.name = record.name.replace(self._root_name, self._abbrev_name) 20 | log = super(_ColorfulFormatter, self).formatMessage(record) 21 | if record.levelno == logging.WARNING: 22 | prefix = colored("WARNING", "red", attrs=["blink"]) 23 | elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL: 24 | prefix = colored("ERROR", "red", attrs=["blink", "underline"]) 25 | else: 26 | return log 27 | return prefix + " " + log 28 | 29 | 30 | # so that calling setup_logger multiple times won't add many handlers 31 | @functools.lru_cache() 32 | def setup_logger(output=None, distributed_rank=0, *, color=True, name="imagenet", abbrev_name=None): 33 | """ 34 | Initialize the detectron2 logger and set its verbosity level to "INFO". 35 | 36 | Args: 37 | output (str): a file name or a directory to save log. If None, will not save log file. 38 | If ends with ".txt" or ".log", assumed to be a file name. 39 | Otherwise, logs will be saved to `output/log.txt`. 40 | name (str): the root module name of this logger 41 | 42 | Returns: 43 | logging.Logger: a logger 44 | """ 45 | logger = logging.getLogger(name) 46 | logger.setLevel(logging.DEBUG) 47 | logger.propagate = False 48 | 49 | if abbrev_name is None: 50 | abbrev_name = name 51 | 52 | plain_formatter = logging.Formatter( 53 | "[%(asctime)s.%(msecs)03d]: %(message)s", datefmt="%m/%d %H:%M:%S" 54 | ) 55 | # stdout logging: master only 56 | if distributed_rank == 0: 57 | ch = logging.StreamHandler(stream=sys.stdout) 58 | ch.setLevel(logging.DEBUG) 59 | if color: 60 | formatter = _ColorfulFormatter( 61 | colored("[%(asctime)s.%(msecs)03d]: ", "green") + "%(message)s", 62 | datefmt="%m/%d %H:%M:%S", 63 | root_name=name, 64 | abbrev_name=str(abbrev_name), 65 | ) 66 | else: 67 | formatter = plain_formatter 68 | ch.setFormatter(formatter) 69 | logger.addHandler(ch) 70 | 71 | # file logging: all workers 72 | if output is not None: 73 | if output.endswith(".txt") or output.endswith(".log"): 74 | filename = output 75 | else: 76 | filename = os.path.join(output, "log.txt") 77 | if distributed_rank > 0: 78 | filename = filename + f".rank{distributed_rank}" 79 | os.makedirs(os.path.dirname(filename), exist_ok=True) 80 | 81 | fh = logging.StreamHandler(_cached_log_stream(filename)) 82 | fh.setLevel(logging.DEBUG) 83 | fh.setFormatter(plain_formatter) 84 | logger.addHandler(fh) 85 | 86 | return logger 87 | 88 | 89 | # cache the opened file object, so that different calls to `setup_logger` 90 | # with the same file name can safely write to the same file. 91 | @functools.lru_cache(maxsize=None) 92 | def _cached_log_stream(filename): 93 | return open(filename, "a") 94 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/util/time_counter.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | 4 | 5 | class TimeCounter: 6 | def __init__(self) -> None: 7 | pass 8 | 9 | def clear(self): 10 | self.timedict = {} 11 | self.basetime = time.perf_counter() 12 | 13 | def timeit(self, name): 14 | nowtime = time.perf_counter() - self.basetime 15 | self.timedict[name] = nowtime 16 | self.basetime = time.perf_counter() 17 | 18 | 19 | class TimeHolder: 20 | def __init__(self) -> None: 21 | self.timedict = {} 22 | 23 | def update(self, _timedict: dict): 24 | for k, v in _timedict.items(): 25 | if k not in self.timedict: 26 | self.timedict[k] = AverageMeter(name=k, val_only=True) 27 | self.timedict[k].update(val=v) 28 | 29 | def final_res(self): 30 | return {k: v.avg for k, v in self.timedict.items()} 31 | 32 | def __str__(self): 33 | return json.dumps(self.final_res(), indent=2) 34 | 35 | 36 | class AverageMeter(object): 37 | """Computes and stores the average and current value""" 38 | 39 | def __init__(self, name, fmt=":f", val_only=False): 40 | self.name = name 41 | self.fmt = fmt 42 | self.val_only = val_only 43 | self.reset() 44 | 45 | def reset(self): 46 | self.val = 0 47 | self.avg = 0 48 | self.sum = 0 49 | self.count = 0 50 | 51 | def update(self, val, n=1): 52 | self.val = val 53 | self.sum += val * n 54 | self.count += n 55 | self.avg = self.sum / self.count 56 | 57 | def __str__(self): 58 | if self.val_only: 59 | fmtstr = "{name} {val" + self.fmt + "}" 60 | else: 61 | fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})" 62 | return fmtstr.format(**self.__dict__) 63 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/util/vl_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | from typing import List 4 | 5 | import torch 6 | 7 | 8 | def create_positive_map_from_span(tokenized, token_span, max_text_len=256): 9 | """construct a map such that positive_map[i,j] = True iff box i is associated to token j 10 | Input: 11 | - tokenized: 12 | - input_ids: Tensor[1, ntokens] 13 | - attention_mask: Tensor[1, ntokens] 14 | - token_span: list with length num_boxes. 15 | - each item: [start_idx, end_idx] 16 | """ 17 | positive_map = torch.zeros((len(token_span), max_text_len), dtype=torch.float) 18 | for j, tok_list in enumerate(token_span): 19 | for (beg, end) in tok_list: 20 | beg_pos = tokenized.char_to_token(beg) 21 | end_pos = tokenized.char_to_token(end - 1) 22 | if beg_pos is None: 23 | try: 24 | beg_pos = tokenized.char_to_token(beg + 1) 25 | if beg_pos is None: 26 | beg_pos = tokenized.char_to_token(beg + 2) 27 | except: 28 | beg_pos = None 29 | if end_pos is None: 30 | try: 31 | end_pos = tokenized.char_to_token(end - 2) 32 | if end_pos is None: 33 | end_pos = tokenized.char_to_token(end - 3) 34 | except: 35 | end_pos = None 36 | if beg_pos is None or end_pos is None: 37 | continue 38 | 39 | assert beg_pos is not None and end_pos is not None 40 | if os.environ.get("SHILONG_DEBUG_ONLY_ONE_POS", None) == "TRUE": 41 | positive_map[j, beg_pos] = 1 42 | break 43 | else: 44 | positive_map[j, beg_pos : end_pos + 1].fill_(1) 45 | 46 | return positive_map / (positive_map.sum(-1)[:, None] + 1e-6) 47 | 48 | 49 | def build_captions_and_token_span(cat_list, force_lowercase): 50 | """ 51 | Return: 52 | captions: str 53 | cat2tokenspan: dict 54 | { 55 | 'dog': [[0, 2]], 56 | ... 57 | } 58 | """ 59 | 60 | cat2tokenspan = {} 61 | captions = "" 62 | for catname in cat_list: 63 | class_name = catname 64 | if force_lowercase: 65 | class_name = class_name.lower() 66 | if "/" in class_name: 67 | class_name_list: List = class_name.strip().split("/") 68 | class_name_list.append(class_name) 69 | class_name: str = random.choice(class_name_list) 70 | 71 | tokens_positive_i = [] 72 | subnamelist = [i.strip() for i in class_name.strip().split(" ")] 73 | for subname in subnamelist: 74 | if len(subname) == 0: 75 | continue 76 | if len(captions) > 0: 77 | captions = captions + " " 78 | strat_idx = len(captions) 79 | end_idx = strat_idx + len(subname) 80 | tokens_positive_i.append([strat_idx, end_idx]) 81 | captions = captions + subname 82 | 83 | if len(tokens_positive_i) > 0: 84 | captions = captions + " ." 85 | cat2tokenspan[class_name] = tokens_positive_i 86 | 87 | return captions, cat2tokenspan 88 | 89 | 90 | def build_id2posspan_and_caption(category_dict: dict): 91 | """Build id2pos_span and caption from category_dict 92 | 93 | Args: 94 | category_dict (dict): category_dict 95 | """ 96 | cat_list = [item["name"].lower() for item in category_dict] 97 | id2catname = {item["id"]: item["name"].lower() for item in category_dict} 98 | caption, cat2posspan = build_captions_and_token_span(cat_list, force_lowercase=True) 99 | id2posspan = {catid: cat2posspan[catname] for catid, catname in id2catname.items()} 100 | return id2posspan, caption 101 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/groundingdino/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.0' 2 | -------------------------------------------------------------------------------- /seginw/GroundingDINO/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | transformers 4 | addict 5 | yapf 6 | timm 7 | numpy 8 | opencv-python 9 | supervision 10 | pycocotools -------------------------------------------------------------------------------- /seginw/sam2: -------------------------------------------------------------------------------- 1 | ../sam-hq2/sam2/ -------------------------------------------------------------------------------- /seginw/segment_anything/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .build_sam import ( 8 | build_sam, 9 | build_sam_vit_h, 10 | build_sam_vit_l, 11 | build_sam_vit_b, 12 | sam_model_registry, 13 | ) 14 | from .build_sam_hq import ( 15 | build_sam_hq, 16 | build_sam_hq_vit_h, 17 | build_sam_hq_vit_l, 18 | build_sam_hq_vit_b, 19 | sam_hq_model_registry, 20 | ) 21 | from .predictor import SamPredictor 22 | from .automatic_mask_generator import SamAutomaticMaskGenerator 23 | -------------------------------------------------------------------------------- /seginw/segment_anything/build_sam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | 9 | from functools import partial 10 | 11 | from .modeling import ImageEncoderViT, MaskDecoder, PromptEncoder, Sam, TwoWayTransformer 12 | 13 | 14 | def build_sam_vit_h(checkpoint=None): 15 | return _build_sam( 16 | encoder_embed_dim=1280, 17 | encoder_depth=32, 18 | encoder_num_heads=16, 19 | encoder_global_attn_indexes=[7, 15, 23, 31], 20 | checkpoint=checkpoint, 21 | ) 22 | 23 | 24 | build_sam = build_sam_vit_h 25 | 26 | 27 | def build_sam_vit_l(checkpoint=None): 28 | return _build_sam( 29 | encoder_embed_dim=1024, 30 | encoder_depth=24, 31 | encoder_num_heads=16, 32 | encoder_global_attn_indexes=[5, 11, 17, 23], 33 | checkpoint=checkpoint, 34 | ) 35 | 36 | 37 | def build_sam_vit_b(checkpoint=None): 38 | return _build_sam( 39 | encoder_embed_dim=768, 40 | encoder_depth=12, 41 | encoder_num_heads=12, 42 | encoder_global_attn_indexes=[2, 5, 8, 11], 43 | checkpoint=checkpoint, 44 | ) 45 | 46 | 47 | sam_model_registry = { 48 | "default": build_sam, 49 | "vit_h": build_sam, 50 | "vit_l": build_sam_vit_l, 51 | "vit_b": build_sam_vit_b, 52 | } 53 | 54 | 55 | def _build_sam( 56 | encoder_embed_dim, 57 | encoder_depth, 58 | encoder_num_heads, 59 | encoder_global_attn_indexes, 60 | checkpoint=None, 61 | ): 62 | prompt_embed_dim = 256 63 | image_size = 1024 64 | vit_patch_size = 16 65 | image_embedding_size = image_size // vit_patch_size 66 | sam = Sam( 67 | image_encoder=ImageEncoderViT( 68 | depth=encoder_depth, 69 | embed_dim=encoder_embed_dim, 70 | img_size=image_size, 71 | mlp_ratio=4, 72 | norm_layer=partial(torch.nn.LayerNorm, eps=1e-6), 73 | num_heads=encoder_num_heads, 74 | patch_size=vit_patch_size, 75 | qkv_bias=True, 76 | use_rel_pos=True, 77 | global_attn_indexes=encoder_global_attn_indexes, 78 | window_size=14, 79 | out_chans=prompt_embed_dim, 80 | ), 81 | prompt_encoder=PromptEncoder( 82 | embed_dim=prompt_embed_dim, 83 | image_embedding_size=(image_embedding_size, image_embedding_size), 84 | input_image_size=(image_size, image_size), 85 | mask_in_chans=16, 86 | ), 87 | mask_decoder=MaskDecoder( 88 | num_multimask_outputs=3, 89 | transformer=TwoWayTransformer( 90 | depth=2, 91 | embedding_dim=prompt_embed_dim, 92 | mlp_dim=2048, 93 | num_heads=8, 94 | ), 95 | transformer_dim=prompt_embed_dim, 96 | iou_head_depth=3, 97 | iou_head_hidden_dim=256, 98 | ), 99 | pixel_mean=[123.675, 116.28, 103.53], 100 | pixel_std=[58.395, 57.12, 57.375], 101 | ) 102 | sam.eval() 103 | if checkpoint is not None: 104 | with open(checkpoint, "rb") as f: 105 | state_dict = torch.load(f) 106 | sam.load_state_dict(state_dict) 107 | return sam 108 | -------------------------------------------------------------------------------- /seginw/segment_anything/build_sam_hq.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | 9 | from functools import partial 10 | 11 | from .modeling import ImageEncoderViT, MaskDecoderHQ, PromptEncoder, Sam, TwoWayTransformer 12 | 13 | 14 | def build_sam_hq_vit_h(checkpoint=None): 15 | return _build_sam( 16 | encoder_embed_dim=1280, 17 | encoder_depth=32, 18 | encoder_num_heads=16, 19 | encoder_global_attn_indexes=[7, 15, 23, 31], 20 | checkpoint=checkpoint, 21 | ) 22 | 23 | 24 | build_sam_hq = build_sam_hq_vit_h 25 | 26 | 27 | def build_sam_hq_vit_l(checkpoint=None): 28 | return _build_sam( 29 | encoder_embed_dim=1024, 30 | encoder_depth=24, 31 | encoder_num_heads=16, 32 | encoder_global_attn_indexes=[5, 11, 17, 23], 33 | checkpoint=checkpoint, 34 | ) 35 | 36 | 37 | def build_sam_hq_vit_b(checkpoint=None): 38 | return _build_sam( 39 | encoder_embed_dim=768, 40 | encoder_depth=12, 41 | encoder_num_heads=12, 42 | encoder_global_attn_indexes=[2, 5, 8, 11], 43 | checkpoint=checkpoint, 44 | ) 45 | 46 | 47 | sam_hq_model_registry = { 48 | "default": build_sam_hq_vit_h, 49 | "vit_h": build_sam_hq_vit_h, 50 | "vit_l": build_sam_hq_vit_l, 51 | "vit_b": build_sam_hq_vit_b, 52 | } 53 | 54 | 55 | def _build_sam( 56 | encoder_embed_dim, 57 | encoder_depth, 58 | encoder_num_heads, 59 | encoder_global_attn_indexes, 60 | checkpoint=None, 61 | ): 62 | prompt_embed_dim = 256 63 | image_size = 1024 64 | vit_patch_size = 16 65 | image_embedding_size = image_size // vit_patch_size 66 | sam = Sam( 67 | image_encoder=ImageEncoderViT( 68 | depth=encoder_depth, 69 | embed_dim=encoder_embed_dim, 70 | img_size=image_size, 71 | mlp_ratio=4, 72 | norm_layer=partial(torch.nn.LayerNorm, eps=1e-6), 73 | num_heads=encoder_num_heads, 74 | patch_size=vit_patch_size, 75 | qkv_bias=True, 76 | use_rel_pos=True, 77 | global_attn_indexes=encoder_global_attn_indexes, 78 | window_size=14, 79 | out_chans=prompt_embed_dim, 80 | ), 81 | prompt_encoder=PromptEncoder( 82 | embed_dim=prompt_embed_dim, 83 | image_embedding_size=(image_embedding_size, image_embedding_size), 84 | input_image_size=(image_size, image_size), 85 | mask_in_chans=16, 86 | ), 87 | mask_decoder=MaskDecoderHQ( 88 | num_multimask_outputs=3, 89 | transformer=TwoWayTransformer( 90 | depth=2, 91 | embedding_dim=prompt_embed_dim, 92 | mlp_dim=2048, 93 | num_heads=8, 94 | ), 95 | transformer_dim=prompt_embed_dim, 96 | iou_head_depth=3, 97 | iou_head_hidden_dim=256, 98 | vit_dim=encoder_embed_dim, 99 | ), 100 | pixel_mean=[123.675, 116.28, 103.53], 101 | pixel_std=[58.395, 57.12, 57.375], 102 | ) 103 | # sam.eval() 104 | if checkpoint is not None: 105 | with open(checkpoint, "rb") as f: 106 | state_dict = torch.load(f) 107 | info = sam.load_state_dict(state_dict, strict=False) 108 | print(info) 109 | for n, p in sam.named_parameters(): 110 | if 'hf_token' not in n and 'hf_mlp' not in n and 'compress_vit_feat' not in n and 'embedding_encoder' not in n and 'embedding_maskfeature' not in n: 111 | p.requires_grad = False 112 | 113 | return sam -------------------------------------------------------------------------------- /seginw/segment_anything/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .sam import Sam 8 | from .image_encoder import ImageEncoderViT 9 | from .mask_decoder_hq import MaskDecoderHQ 10 | from .mask_decoder import MaskDecoder 11 | from .prompt_encoder import PromptEncoder 12 | from .transformer import TwoWayTransformer 13 | -------------------------------------------------------------------------------- /seginw/segment_anything/modeling/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | from typing import Type 11 | 12 | 13 | class MLPBlock(nn.Module): 14 | def __init__( 15 | self, 16 | embedding_dim: int, 17 | mlp_dim: int, 18 | act: Type[nn.Module] = nn.GELU, 19 | ) -> None: 20 | super().__init__() 21 | self.lin1 = nn.Linear(embedding_dim, mlp_dim) 22 | self.lin2 = nn.Linear(mlp_dim, embedding_dim) 23 | self.act = act() 24 | 25 | def forward(self, x: torch.Tensor) -> torch.Tensor: 26 | return self.lin2(self.act(self.lin1(x))) 27 | 28 | 29 | # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa 30 | # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa 31 | class LayerNorm2d(nn.Module): 32 | def __init__(self, num_channels: int, eps: float = 1e-6) -> None: 33 | super().__init__() 34 | self.weight = nn.Parameter(torch.ones(num_channels)) 35 | self.bias = nn.Parameter(torch.zeros(num_channels)) 36 | self.eps = eps 37 | 38 | def forward(self, x: torch.Tensor) -> torch.Tensor: 39 | u = x.mean(1, keepdim=True) 40 | s = (x - u).pow(2).mean(1, keepdim=True) 41 | x = (x - u) / torch.sqrt(s + self.eps) 42 | x = self.weight[:, None, None] * x + self.bias[:, None, None] 43 | return x 44 | -------------------------------------------------------------------------------- /seginw/segment_anything/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /seginw/segment_anything/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import numpy as np 8 | import torch 9 | from torch.nn import functional as F 10 | from torchvision.transforms.functional import resize, to_pil_image # type: ignore 11 | 12 | from copy import deepcopy 13 | from typing import Tuple 14 | 15 | 16 | class ResizeLongestSide: 17 | """ 18 | Resizes images to longest side 'target_length', as well as provides 19 | methods for resizing coordinates and boxes. Provides methods for 20 | transforming both numpy array and batched torch tensors. 21 | """ 22 | 23 | def __init__(self, target_length: int) -> None: 24 | self.target_length = target_length 25 | 26 | def apply_image(self, image: np.ndarray) -> np.ndarray: 27 | """ 28 | Expects a numpy array with shape HxWxC in uint8 format. 29 | """ 30 | target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length) 31 | return np.array(resize(to_pil_image(image), target_size)) 32 | 33 | def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray: 34 | """ 35 | Expects a numpy array of length 2 in the final dimension. Requires the 36 | original image size in (H, W) format. 37 | """ 38 | old_h, old_w = original_size 39 | new_h, new_w = self.get_preprocess_shape( 40 | original_size[0], original_size[1], self.target_length 41 | ) 42 | coords = deepcopy(coords).astype(float) 43 | coords[..., 0] = coords[..., 0] * (new_w / old_w) 44 | coords[..., 1] = coords[..., 1] * (new_h / old_h) 45 | return coords 46 | 47 | def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray: 48 | """ 49 | Expects a numpy array shape Bx4. Requires the original image size 50 | in (H, W) format. 51 | """ 52 | boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size) 53 | return boxes.reshape(-1, 4) 54 | 55 | def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor: 56 | """ 57 | Expects batched images with shape BxCxHxW and float format. This 58 | transformation may not exactly match apply_image. apply_image is 59 | the transformation expected by the model. 60 | """ 61 | # Expects an image in BCHW format. May not exactly match apply_image. 62 | target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length) 63 | return F.interpolate( 64 | image, target_size, mode="bilinear", align_corners=False, antialias=True 65 | ) 66 | 67 | def apply_coords_torch( 68 | self, coords: torch.Tensor, original_size: Tuple[int, ...] 69 | ) -> torch.Tensor: 70 | """ 71 | Expects a torch tensor with length 2 in the last dimension. Requires the 72 | original image size in (H, W) format. 73 | """ 74 | old_h, old_w = original_size 75 | new_h, new_w = self.get_preprocess_shape( 76 | original_size[0], original_size[1], self.target_length 77 | ) 78 | coords = deepcopy(coords).to(torch.float) 79 | coords[..., 0] = coords[..., 0] * (new_w / old_w) 80 | coords[..., 1] = coords[..., 1] * (new_h / old_h) 81 | return coords 82 | 83 | def apply_boxes_torch( 84 | self, boxes: torch.Tensor, original_size: Tuple[int, ...] 85 | ) -> torch.Tensor: 86 | """ 87 | Expects a torch tensor with shape Bx4. Requires the original image 88 | size in (H, W) format. 89 | """ 90 | boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size) 91 | return boxes.reshape(-1, 4) 92 | 93 | @staticmethod 94 | def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]: 95 | """ 96 | Compute the output size given input size and target long side length. 97 | """ 98 | scale = long_side_length * 1.0 / max(oldh, oldw) 99 | newh, neww = oldh * scale, oldw * scale 100 | neww = int(neww + 0.5) 101 | newh = int(newh + 0.5) 102 | return (newh, neww) 103 | -------------------------------------------------------------------------------- /seginw/test_seginw.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for file in ./data/seginw/*; 4 | do 5 | echo $file is data path \! ; 6 | 7 | python test_ap_on_seginw.py -c GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py -p pretrained_checkpoint/groundingdino_swinb_cogcoor.pth --anno_path $file/valid/_annotations_min1cat.coco.json --image_dir $file/valid/ 8 | done -------------------------------------------------------------------------------- /seginw/test_seginw_hq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for file in ./data/seginw/*; 4 | do 5 | echo $file is data path \! ; 6 | 7 | python test_ap_on_seginw.py -c GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py -p pretrained_checkpoint/groundingdino_swinb_cogcoor.pth --anno_path $file/valid/_annotations_min1cat.coco.json --image_dir $file/valid/ --use_sam_hq 8 | done -------------------------------------------------------------------------------- /seginw/test_seginw_sam2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for file in ./data/seginw/*; 4 | do 5 | echo $file is data path \! ; 6 | 7 | python test_ap_on_seginw_sam2.py -c GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py -p pretrained_checkpoint/groundingdino_swinb_cogcoor.pth --anno_path $file/valid/_annotations_min1cat.coco.json --image_dir $file/valid/ 8 | done -------------------------------------------------------------------------------- /seginw/test_seginw_sam_hq2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for file in ./data/seginw/*; 4 | do 5 | echo $file is data path \! ; 6 | 7 | python test_ap_on_seginw_sam2.py -c GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py -p pretrained_checkpoint/groundingdino_swinb_cogcoor.pth --anno_path $file/valid/_annotations_min1cat.coco.json --image_dir $file/valid/ --use_sam_hq 8 | done -------------------------------------------------------------------------------- /segment_anything/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .build_sam import ( 8 | build_sam, 9 | build_sam_vit_h, 10 | build_sam_vit_l, 11 | build_sam_vit_b, 12 | sam_model_registry, 13 | ) 14 | from .build_sam_baseline import sam_model_registry_baseline 15 | from .predictor import SamPredictor 16 | from .automatic_mask_generator import SamAutomaticMaskGenerator 17 | -------------------------------------------------------------------------------- /segment_anything/build_sam_baseline.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | 9 | from functools import partial 10 | 11 | from .modeling import ImageEncoderViT, MaskDecoder, PromptEncoder, Sam, TwoWayTransformer, TinyViT 12 | 13 | 14 | def build_sam_vit_h(checkpoint=None): 15 | return _build_sam( 16 | encoder_embed_dim=1280, 17 | encoder_depth=32, 18 | encoder_num_heads=16, 19 | encoder_global_attn_indexes=[7, 15, 23, 31], 20 | checkpoint=checkpoint, 21 | ) 22 | 23 | 24 | build_sam = build_sam_vit_h 25 | 26 | 27 | def build_sam_vit_l(checkpoint=None): 28 | return _build_sam( 29 | encoder_embed_dim=1024, 30 | encoder_depth=24, 31 | encoder_num_heads=16, 32 | encoder_global_attn_indexes=[5, 11, 17, 23], 33 | checkpoint=checkpoint, 34 | ) 35 | 36 | 37 | def build_sam_vit_b(checkpoint=None): 38 | return _build_sam( 39 | encoder_embed_dim=768, 40 | encoder_depth=12, 41 | encoder_num_heads=12, 42 | encoder_global_attn_indexes=[2, 5, 8, 11], 43 | checkpoint=checkpoint, 44 | ) 45 | 46 | 47 | def build_sam_vit_t(checkpoint=None): 48 | prompt_embed_dim = 256 49 | image_size = 1024 50 | vit_patch_size = 16 51 | image_embedding_size = image_size // vit_patch_size 52 | mobile_sam = Sam( 53 | image_encoder=TinyViT(img_size=1024, in_chans=3, num_classes=1000, 54 | embed_dims=[64, 128, 160, 320], 55 | depths=[2, 2, 6, 2], 56 | num_heads=[2, 4, 5, 10], 57 | window_sizes=[7, 7, 14, 7], 58 | mlp_ratio=4., 59 | drop_rate=0., 60 | drop_path_rate=0.0, 61 | use_checkpoint=False, 62 | mbconv_expand_ratio=4.0, 63 | local_conv_size=3, 64 | layer_lr_decay=0.8 65 | ), 66 | prompt_encoder=PromptEncoder( 67 | embed_dim=prompt_embed_dim, 68 | image_embedding_size=(image_embedding_size, image_embedding_size), 69 | input_image_size=(image_size, image_size), 70 | mask_in_chans=16, 71 | ), 72 | mask_decoder=MaskDecoder( 73 | num_multimask_outputs=3, 74 | transformer=TwoWayTransformer( 75 | depth=2, 76 | embedding_dim=prompt_embed_dim, 77 | mlp_dim=2048, 78 | num_heads=8, 79 | ), 80 | transformer_dim=prompt_embed_dim, 81 | iou_head_depth=3, 82 | iou_head_hidden_dim=256, 83 | ), 84 | pixel_mean=[123.675, 116.28, 103.53], 85 | pixel_std=[58.395, 57.12, 57.375], 86 | ) 87 | 88 | mobile_sam.eval() 89 | if checkpoint is not None: 90 | with open(checkpoint, "rb") as f: 91 | state_dict = torch.load(f) 92 | mobile_sam.load_state_dict(state_dict) 93 | return mobile_sam 94 | 95 | sam_model_registry_baseline = { 96 | "default": build_sam_vit_h, 97 | "vit_h": build_sam_vit_h, 98 | "vit_l": build_sam_vit_l, 99 | "vit_b": build_sam_vit_b, 100 | "vit_tiny": build_sam_vit_t 101 | } 102 | 103 | 104 | def _build_sam( 105 | encoder_embed_dim, 106 | encoder_depth, 107 | encoder_num_heads, 108 | encoder_global_attn_indexes, 109 | checkpoint=None, 110 | ): 111 | prompt_embed_dim = 256 112 | image_size = 1024 113 | vit_patch_size = 16 114 | image_embedding_size = image_size // vit_patch_size 115 | sam = Sam( 116 | image_encoder=ImageEncoderViT( 117 | depth=encoder_depth, 118 | embed_dim=encoder_embed_dim, 119 | img_size=image_size, 120 | mlp_ratio=4, 121 | norm_layer=partial(torch.nn.LayerNorm, eps=1e-6), 122 | num_heads=encoder_num_heads, 123 | patch_size=vit_patch_size, 124 | qkv_bias=True, 125 | use_rel_pos=True, 126 | global_attn_indexes=encoder_global_attn_indexes, 127 | window_size=14, 128 | out_chans=prompt_embed_dim, 129 | ), 130 | prompt_encoder=PromptEncoder( 131 | embed_dim=prompt_embed_dim, 132 | image_embedding_size=(image_embedding_size, image_embedding_size), 133 | input_image_size=(image_size, image_size), 134 | mask_in_chans=16, 135 | ), 136 | mask_decoder=MaskDecoder( 137 | num_multimask_outputs=3, 138 | transformer=TwoWayTransformer( 139 | depth=2, 140 | embedding_dim=prompt_embed_dim, 141 | mlp_dim=2048, 142 | num_heads=8, 143 | ), 144 | transformer_dim=prompt_embed_dim, 145 | iou_head_depth=3, 146 | iou_head_hidden_dim=256, 147 | ), 148 | pixel_mean=[123.675, 116.28, 103.53], 149 | pixel_std=[58.395, 57.12, 57.375], 150 | ) 151 | sam.eval() 152 | if checkpoint is not None: 153 | with open(checkpoint, "rb") as f: 154 | state_dict = torch.load(f) 155 | sam.load_state_dict(state_dict) 156 | return sam -------------------------------------------------------------------------------- /segment_anything/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .sam import Sam 8 | from .image_encoder import ImageEncoderViT 9 | from .mask_decoder_hq import MaskDecoderHQ 10 | from .mask_decoder import MaskDecoder 11 | from .prompt_encoder import PromptEncoder 12 | from .transformer import TwoWayTransformer 13 | from .tiny_vit_sam import TinyViT 14 | -------------------------------------------------------------------------------- /segment_anything/modeling/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | from typing import Type 11 | 12 | 13 | class MLPBlock(nn.Module): 14 | def __init__( 15 | self, 16 | embedding_dim: int, 17 | mlp_dim: int, 18 | act: Type[nn.Module] = nn.GELU, 19 | ) -> None: 20 | super().__init__() 21 | self.lin1 = nn.Linear(embedding_dim, mlp_dim) 22 | self.lin2 = nn.Linear(mlp_dim, embedding_dim) 23 | self.act = act() 24 | 25 | def forward(self, x: torch.Tensor) -> torch.Tensor: 26 | return self.lin2(self.act(self.lin1(x))) 27 | 28 | 29 | # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa 30 | # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa 31 | class LayerNorm2d(nn.Module): 32 | def __init__(self, num_channels: int, eps: float = 1e-6) -> None: 33 | super().__init__() 34 | self.weight = nn.Parameter(torch.ones(num_channels)) 35 | self.bias = nn.Parameter(torch.zeros(num_channels)) 36 | self.eps = eps 37 | 38 | def forward(self, x: torch.Tensor) -> torch.Tensor: 39 | u = x.mean(1, keepdim=True) 40 | s = (x - u).pow(2).mean(1, keepdim=True) 41 | x = (x - u) / torch.sqrt(s + self.eps) 42 | x = self.weight[:, None, None] * x + self.bias[:, None, None] 43 | return x 44 | -------------------------------------------------------------------------------- /segment_anything/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /segment_anything/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import numpy as np 8 | import torch 9 | from torch.nn import functional as F 10 | from torchvision.transforms.functional import resize, to_pil_image # type: ignore 11 | 12 | from copy import deepcopy 13 | from typing import Tuple 14 | 15 | 16 | class ResizeLongestSide: 17 | """ 18 | Resizes images to the longest side 'target_length', as well as provides 19 | methods for resizing coordinates and boxes. Provides methods for 20 | transforming both numpy array and batched torch tensors. 21 | """ 22 | 23 | def __init__(self, target_length: int) -> None: 24 | self.target_length = target_length 25 | 26 | def apply_image(self, image: np.ndarray) -> np.ndarray: 27 | """ 28 | Expects a numpy array with shape HxWxC in uint8 format. 29 | """ 30 | target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length) 31 | return np.array(resize(to_pil_image(image), target_size)) 32 | 33 | def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray: 34 | """ 35 | Expects a numpy array of length 2 in the final dimension. Requires the 36 | original image size in (H, W) format. 37 | """ 38 | old_h, old_w = original_size 39 | new_h, new_w = self.get_preprocess_shape( 40 | original_size[0], original_size[1], self.target_length 41 | ) 42 | coords = deepcopy(coords).astype(float) 43 | coords[..., 0] = coords[..., 0] * (new_w / old_w) 44 | coords[..., 1] = coords[..., 1] * (new_h / old_h) 45 | return coords 46 | 47 | def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray: 48 | """ 49 | Expects a numpy array shape Bx4. Requires the original image size 50 | in (H, W) format. 51 | """ 52 | boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size) 53 | return boxes.reshape(-1, 4) 54 | 55 | def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor: 56 | """ 57 | Expects batched images with shape BxCxHxW and float format. This 58 | transformation may not exactly match apply_image. apply_image is 59 | the transformation expected by the model. 60 | """ 61 | # Expects an image in BCHW format. May not exactly match apply_image. 62 | target_size = self.get_preprocess_shape(image.shape[2], image.shape[3], self.target_length) 63 | return F.interpolate( 64 | image, target_size, mode="bilinear", align_corners=False, antialias=True 65 | ) 66 | 67 | def apply_coords_torch( 68 | self, coords: torch.Tensor, original_size: Tuple[int, ...] 69 | ) -> torch.Tensor: 70 | """ 71 | Expects a torch tensor with length 2 in the last dimension. Requires the 72 | original image size in (H, W) format. 73 | """ 74 | old_h, old_w = original_size 75 | new_h, new_w = self.get_preprocess_shape( 76 | original_size[0], original_size[1], self.target_length 77 | ) 78 | coords = deepcopy(coords).to(torch.float) 79 | coords[..., 0] = coords[..., 0] * (new_w / old_w) 80 | coords[..., 1] = coords[..., 1] * (new_h / old_h) 81 | return coords 82 | 83 | def apply_boxes_torch( 84 | self, boxes: torch.Tensor, original_size: Tuple[int, ...] 85 | ) -> torch.Tensor: 86 | """ 87 | Expects a torch tensor with shape Bx4. Requires the original image 88 | size in (H, W) format. 89 | """ 90 | boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size) 91 | return boxes.reshape(-1, 4) 92 | 93 | @staticmethod 94 | def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]: 95 | """ 96 | Compute the output size given input size and target long side length. 97 | """ 98 | scale = long_side_length * 1.0 / max(oldh, oldw) 99 | newh, neww = oldh * scale, oldw * scale 100 | neww = int(neww + 0.5) 101 | newh = int(newh + 0.5) 102 | return (newh, neww) 103 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length=100 3 | multi_line_output=3 4 | include_trailing_comma=True 5 | known_standard_library=numpy,setuptools 6 | skip_glob=*/__init__.py 7 | known_myself=segment_anything 8 | known_third_party=matplotlib,cv2,torch,torchvision,pycocotools,onnx,black,isort 9 | no_lines_before=STDLIB,THIRDPARTY 10 | sections=FUTURE,STDLIB,THIRDPARTY,MYSELF,FIRSTPARTY,LOCALFOLDER 11 | default_section=FIRSTPARTY 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from setuptools import find_packages, setup 8 | 9 | setup( 10 | name="segment_anything", 11 | version="1.0", 12 | install_requires=[], 13 | packages=find_packages(exclude="notebooks"), 14 | extras_require={ 15 | "all": ["matplotlib", "pycocotools", "opencv-python", "onnx", "onnxruntime", "timm"], 16 | "dev": ["flake8", "isort", "black", "mypy"], 17 | }, 18 | ) 19 | -------------------------------------------------------------------------------- /train/README.md: -------------------------------------------------------------------------------- 1 | # Training instruction for HQ-SAM 2 | 3 | > [**Segment Anything in High Quality**](https://arxiv.org/abs/2306.01567) 4 | > Lei Ke, Mingqiao Ye, Martin Danelljan, Yifan Liu, Yu-Wing Tai, Chi-Keung Tang, Fisher Yu \ 5 | > ETH Zurich & HKUST 6 | 7 | We organize the training folder as follows. 8 | ``` 9 | train 10 | |____data 11 | |____pretrained_checkpoint 12 | |____train.py 13 | |____utils 14 | | |____dataloader.py 15 | | |____misc.py 16 | | |____loss_mask.py 17 | |____segment_anything_training 18 | |____work_dirs 19 | ``` 20 | 21 | ## 1. Data Preparation 22 | 23 | HQSeg-44K can be downloaded from [hugging face link](https://huggingface.co/sam-hq-team/sam-hq-training/tree/main/data) 24 | 25 | ### Expected dataset structure for HQSeg-44K 26 | 27 | ``` 28 | data 29 | |____DIS5K 30 | |____cascade_psp 31 | | |____DUTS-TE 32 | | |____DUTS-TR 33 | | |____ecssd 34 | | |____fss_all 35 | | |____MSRA_10K 36 | |____thin_object_detection 37 | | |____COIFT 38 | | |____HRSOD 39 | | |____ThinObject5K 40 | 41 | ``` 42 | 43 | ## 2. Init Checkpoint 44 | Init checkpoint can be downloaded from [hugging face link](https://huggingface.co/sam-hq-team/sam-hq-training/tree/main/pretrained_checkpoint) 45 | 46 | ### Expected checkpoint 47 | 48 | ``` 49 | pretrained_checkpoint 50 | |____sam_vit_b_maskdecoder.pth 51 | |____sam_vit_b_01ec64.pth 52 | |____sam_vit_l_maskdecoder.pth 53 | |____sam_vit_l_0b3195.pth 54 | |____sam_vit_h_maskdecoder.pth 55 | |____sam_vit_h_4b8939.pth 56 | 57 | ``` 58 | 59 | ## 3. Training 60 | To train HQ-SAM on HQSeg-44K dataset 61 | 62 | ``` 63 | python -m torch.distributed.launch --nproc_per_node= train.py --checkpoint --model-type --output 64 | ``` 65 | 66 | ### Example HQ-SAM-L training script 67 | ``` 68 | python -m torch.distributed.launch --nproc_per_node=8 train.py --checkpoint ./pretrained_checkpoint/sam_vit_l_0b3195.pth --model-type vit_l --output work_dirs/hq_sam_l 69 | ``` 70 | 71 | ### Example HQ-SAM-B training script 72 | ``` 73 | python -m torch.distributed.launch --nproc_per_node=8 train.py --checkpoint ./pretrained_checkpoint/sam_vit_b_01ec64.pth --model-type vit_b --output work_dirs/hq_sam_b 74 | ``` 75 | 76 | ### Example HQ-SAM-H training script 77 | ``` 78 | python -m torch.distributed.launch --nproc_per_node=8 train.py --checkpoint ./pretrained_checkpoint/sam_vit_h_4b8939.pth --model-type vit_h --output work_dirs/hq_sam_h 79 | ``` 80 | 81 | ## 4. Evaluation 82 | To evaluate on 4 HQ-datasets 83 | 84 | ``` 85 | python -m torch.distributed.launch --nproc_per_node= train.py --checkpoint --model-type --output --eval --restore-model 86 | ``` 87 | 88 | ### Example HQ-SAM-L evaluation script 89 | ``` 90 | python -m torch.distributed.launch --nproc_per_node=1 train.py --checkpoint ./pretrained_checkpoint/sam_vit_l_0b3195.pth --model-type vit_l --output work_dirs/hq_sam_l --eval --restore-model work_dirs/hq_sam_l/epoch_11.pth 91 | ``` 92 | 93 | ### Example HQ-SAM-L visualization script 94 | ``` 95 | python -m torch.distributed.launch --nproc_per_node=1 train.py --checkpoint ./pretrained_checkpoint/sam_vit_l_0b3195.pth --model-type vit_l --output work_dirs/hq_sam_l --eval --restore-model work_dirs/hq_sam_l/epoch_11.pth --visualize 96 | ``` -------------------------------------------------------------------------------- /train/segment_anything_training/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .build_sam import ( 8 | build_sam, 9 | build_sam_vit_h, 10 | build_sam_vit_l, 11 | build_sam_vit_b, 12 | sam_model_registry, 13 | ) 14 | -------------------------------------------------------------------------------- /train/segment_anything_training/build_sam.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | 9 | from functools import partial 10 | 11 | from .modeling import ImageEncoderViT, MaskDecoder, PromptEncoder, Sam, TwoWayTransformer 12 | 13 | 14 | def build_sam_vit_h(checkpoint=None): 15 | return _build_sam( 16 | encoder_embed_dim=1280, 17 | encoder_depth=32, 18 | encoder_num_heads=16, 19 | encoder_global_attn_indexes=[7, 15, 23, 31], 20 | checkpoint=checkpoint, 21 | ) 22 | 23 | 24 | build_sam = build_sam_vit_h 25 | 26 | 27 | def build_sam_vit_l(checkpoint=None): 28 | return _build_sam( 29 | encoder_embed_dim=1024, 30 | encoder_depth=24, 31 | encoder_num_heads=16, 32 | encoder_global_attn_indexes=[5, 11, 17, 23], 33 | checkpoint=checkpoint, 34 | ) 35 | 36 | 37 | def build_sam_vit_b(checkpoint=None): 38 | return _build_sam( 39 | encoder_embed_dim=768, 40 | encoder_depth=12, 41 | encoder_num_heads=12, 42 | encoder_global_attn_indexes=[2, 5, 8, 11], 43 | checkpoint=checkpoint, 44 | ) 45 | 46 | 47 | sam_model_registry = { 48 | "default": build_sam, 49 | "vit_h": build_sam, 50 | "vit_l": build_sam_vit_l, 51 | "vit_b": build_sam_vit_b, 52 | } 53 | 54 | 55 | def _build_sam( 56 | encoder_embed_dim, 57 | encoder_depth, 58 | encoder_num_heads, 59 | encoder_global_attn_indexes, 60 | checkpoint=None, 61 | ): 62 | prompt_embed_dim = 256 63 | image_size = 1024 64 | vit_patch_size = 16 65 | image_embedding_size = image_size // vit_patch_size 66 | sam = Sam( 67 | image_encoder=ImageEncoderViT( 68 | depth=encoder_depth, 69 | embed_dim=encoder_embed_dim, 70 | img_size=image_size, 71 | mlp_ratio=4, 72 | norm_layer=partial(torch.nn.LayerNorm, eps=1e-6), 73 | num_heads=encoder_num_heads, 74 | patch_size=vit_patch_size, 75 | qkv_bias=True, 76 | use_rel_pos=True, 77 | global_attn_indexes=encoder_global_attn_indexes, 78 | window_size=14, 79 | out_chans=prompt_embed_dim, 80 | ), 81 | prompt_encoder=PromptEncoder( 82 | embed_dim=prompt_embed_dim, 83 | image_embedding_size=(image_embedding_size, image_embedding_size), 84 | input_image_size=(image_size, image_size), 85 | mask_in_chans=16, 86 | ), 87 | mask_decoder=MaskDecoder( 88 | num_multimask_outputs=3, 89 | transformer=TwoWayTransformer( 90 | depth=2, 91 | embedding_dim=prompt_embed_dim, 92 | mlp_dim=2048, 93 | num_heads=8, 94 | ), 95 | transformer_dim=prompt_embed_dim, 96 | iou_head_depth=3, 97 | iou_head_hidden_dim=256, 98 | ), 99 | pixel_mean=[123.675, 116.28, 103.53], 100 | pixel_std=[58.395, 57.12, 57.375], 101 | ) 102 | sam.eval() 103 | if checkpoint is not None: 104 | with open(checkpoint, "rb") as f: 105 | state_dict = torch.load(f) 106 | sam.load_state_dict(state_dict) 107 | return sam 108 | -------------------------------------------------------------------------------- /train/segment_anything_training/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | from .sam import Sam 8 | from .image_encoder import ImageEncoderViT 9 | from .mask_decoder import MaskDecoder 10 | from .prompt_encoder import PromptEncoder 11 | from .transformer import TwoWayTransformer 12 | -------------------------------------------------------------------------------- /train/segment_anything_training/modeling/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import torch 8 | import torch.nn as nn 9 | 10 | from typing import Type 11 | 12 | 13 | class MLPBlock(nn.Module): 14 | def __init__( 15 | self, 16 | embedding_dim: int, 17 | mlp_dim: int, 18 | act: Type[nn.Module] = nn.GELU, 19 | ) -> None: 20 | super().__init__() 21 | self.lin1 = nn.Linear(embedding_dim, mlp_dim) 22 | self.lin2 = nn.Linear(mlp_dim, embedding_dim) 23 | self.act = act() 24 | 25 | def forward(self, x: torch.Tensor) -> torch.Tensor: 26 | return self.lin2(self.act(self.lin1(x))) 27 | 28 | 29 | # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa 30 | # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa 31 | class LayerNorm2d(nn.Module): 32 | def __init__(self, num_channels: int, eps: float = 1e-6) -> None: 33 | super().__init__() 34 | self.weight = nn.Parameter(torch.ones(num_channels)) 35 | self.bias = nn.Parameter(torch.zeros(num_channels)) 36 | self.eps = eps 37 | 38 | def forward(self, x: torch.Tensor) -> torch.Tensor: 39 | u = x.mean(1, keepdim=True) 40 | s = (x - u).pow(2).mean(1, keepdim=True) 41 | x = (x - u) / torch.sqrt(s + self.eps) 42 | x = self.weight[:, None, None] * x + self.bias[:, None, None] 43 | return x 44 | -------------------------------------------------------------------------------- /train/segment_anything_training/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | -------------------------------------------------------------------------------- /train/segment_anything_training/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import numpy as np 8 | import torch 9 | from torch.nn import functional as F 10 | from torchvision.transforms.functional import resize, to_pil_image # type: ignore 11 | 12 | from copy import deepcopy 13 | from typing import Tuple 14 | 15 | 16 | class ResizeLongestSide: 17 | """ 18 | Resizes images to longest side 'target_length', as well as provides 19 | methods for resizing coordinates and boxes. Provides methods for 20 | transforming both numpy array and batched torch tensors. 21 | """ 22 | 23 | def __init__(self, target_length: int) -> None: 24 | self.target_length = target_length 25 | 26 | def apply_image(self, image: np.ndarray) -> np.ndarray: 27 | """ 28 | Expects a numpy array with shape HxWxC in uint8 format. 29 | """ 30 | target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length) 31 | return np.array(resize(to_pil_image(image), target_size)) 32 | 33 | def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray: 34 | """ 35 | Expects a numpy array of length 2 in the final dimension. Requires the 36 | original image size in (H, W) format. 37 | """ 38 | old_h, old_w = original_size 39 | new_h, new_w = self.get_preprocess_shape( 40 | original_size[0], original_size[1], self.target_length 41 | ) 42 | coords = deepcopy(coords).astype(float) 43 | coords[..., 0] = coords[..., 0] * (new_w / old_w) 44 | coords[..., 1] = coords[..., 1] * (new_h / old_h) 45 | return coords 46 | 47 | def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray: 48 | """ 49 | Expects a numpy array shape Bx4. Requires the original image size 50 | in (H, W) format. 51 | """ 52 | boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size) 53 | return boxes.reshape(-1, 4) 54 | 55 | def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor: 56 | """ 57 | Expects batched images with shape BxCxHxW and float format. This 58 | transformation may not exactly match apply_image. apply_image is 59 | the transformation expected by the model. 60 | """ 61 | # Expects an image in BCHW format. May not exactly match apply_image. 62 | target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length) 63 | return F.interpolate( 64 | image, target_size, mode="bilinear", align_corners=False, antialias=True 65 | ) 66 | 67 | def apply_coords_torch( 68 | self, coords: torch.Tensor, original_size: Tuple[int, ...] 69 | ) -> torch.Tensor: 70 | """ 71 | Expects a torch tensor with length 2 in the last dimension. Requires the 72 | original image size in (H, W) format. 73 | """ 74 | old_h, old_w = original_size 75 | new_h, new_w = self.get_preprocess_shape( 76 | original_size[0], original_size[1], self.target_length 77 | ) 78 | coords = deepcopy(coords).to(torch.float) 79 | coords[..., 0] = coords[..., 0] * (new_w / old_w) 80 | coords[..., 1] = coords[..., 1] * (new_h / old_h) 81 | return coords 82 | 83 | def apply_boxes_torch( 84 | self, boxes: torch.Tensor, original_size: Tuple[int, ...] 85 | ) -> torch.Tensor: 86 | """ 87 | Expects a torch tensor with shape Bx4. Requires the original image 88 | size in (H, W) format. 89 | """ 90 | boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size) 91 | return boxes.reshape(-1, 4) 92 | 93 | @staticmethod 94 | def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]: 95 | """ 96 | Compute the output size given input size and target long side length. 97 | """ 98 | scale = long_side_length * 1.0 / max(oldh, oldw) 99 | newh, neww = oldh * scale, oldw * scale 100 | neww = int(neww + 0.5) 101 | newh = int(newh + 0.5) 102 | return (newh, neww) 103 | -------------------------------------------------------------------------------- /visual_demo/1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/1.gif -------------------------------------------------------------------------------- /visual_demo/2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/2.gif -------------------------------------------------------------------------------- /visual_demo/3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/3.gif -------------------------------------------------------------------------------- /visual_demo/4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/4.gif -------------------------------------------------------------------------------- /visual_demo/5.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/5.gif -------------------------------------------------------------------------------- /visual_demo/6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/6.gif --------------------------------------------------------------------------------