├── LICENSE
├── README.md
├── demo
    ├── demo_hqsam.py
    ├── demo_hqsam_light.py
    ├── demo_hqsam_pip_example.py
    ├── demo_sam.py
    └── input_imgs
    │   ├── dog.jpg
    │   ├── example0.png
    │   ├── example1.png
    │   ├── example2.png
    │   ├── example3.png
    │   ├── example4.png
    │   ├── example5.png
    │   ├── example6.png
    │   ├── example7.png
    │   └── example8.png
├── figs
    ├── coco_vis_comp.png
    ├── davis.png
    ├── points_comp.png
    ├── sam-hf-framework.png
    ├── sam_variants_comp.png
    ├── sam_vs_hqsam_backbones.png
    └── ytvis.png
├── sam-hq2
    ├── INSTALL.md
    ├── README.md
    ├── assets
    │   └── hq-sam2-results.png
    ├── checkpoints
    │   └── download_ckpts.sh
    ├── demo
    │   ├── demo_hqsam2.py
    │   └── input_images
    │   │   ├── example1.png
    │   │   ├── example2.png
    │   │   ├── example3.png
    │   │   └── example4.png
    ├── notebooks
    │   ├── image_predictor_example.ipynb
    │   ├── images
    │   │   ├── cars.jpg
    │   │   ├── groceries.jpg
    │   │   └── truck.jpg
    │   ├── video_predictor_example.ipynb
    │   └── videos
    │   │   ├── bedroom.mp4
    │   │   └── bedroom
    │   │       ├── 00000.jpg
    │   │       ├── 00001.jpg
    │   │       ├── 00002.jpg
    │   │       ├── 00003.jpg
    │   │       ├── 00004.jpg
    │   │       ├── 00005.jpg
    │   │       ├── 00006.jpg
    │   │       ├── 00007.jpg
    │   │       ├── 00008.jpg
    │   │       ├── 00009.jpg
    │   │       ├── 00010.jpg
    │   │       ├── 00011.jpg
    │   │       ├── 00012.jpg
    │   │       ├── 00013.jpg
    │   │       ├── 00014.jpg
    │   │       ├── 00015.jpg
    │   │       ├── 00016.jpg
    │   │       ├── 00017.jpg
    │   │       ├── 00018.jpg
    │   │       ├── 00019.jpg
    │   │       ├── 00020.jpg
    │   │       ├── 00021.jpg
    │   │       ├── 00022.jpg
    │   │       ├── 00023.jpg
    │   │       ├── 00024.jpg
    │   │       ├── 00025.jpg
    │   │       ├── 00026.jpg
    │   │       ├── 00027.jpg
    │   │       ├── 00028.jpg
    │   │       ├── 00029.jpg
    │   │       ├── 00030.jpg
    │   │       ├── 00031.jpg
    │   │       ├── 00032.jpg
    │   │       ├── 00033.jpg
    │   │       ├── 00034.jpg
    │   │       ├── 00035.jpg
    │   │       ├── 00036.jpg
    │   │       ├── 00037.jpg
    │   │       ├── 00038.jpg
    │   │       ├── 00039.jpg
    │   │       ├── 00040.jpg
    │   │       ├── 00041.jpg
    │   │       ├── 00042.jpg
    │   │       ├── 00043.jpg
    │   │       ├── 00044.jpg
    │   │       ├── 00045.jpg
    │   │       ├── 00046.jpg
    │   │       ├── 00047.jpg
    │   │       ├── 00048.jpg
    │   │       ├── 00049.jpg
    │   │       ├── 00050.jpg
    │   │       ├── 00051.jpg
    │   │       ├── 00052.jpg
    │   │       ├── 00053.jpg
    │   │       ├── 00054.jpg
    │   │       ├── 00055.jpg
    │   │       ├── 00056.jpg
    │   │       ├── 00057.jpg
    │   │       ├── 00058.jpg
    │   │       ├── 00059.jpg
    │   │       ├── 00060.jpg
    │   │       ├── 00061.jpg
    │   │       ├── 00062.jpg
    │   │       ├── 00063.jpg
    │   │       ├── 00064.jpg
    │   │       ├── 00065.jpg
    │   │       ├── 00066.jpg
    │   │       ├── 00067.jpg
    │   │       ├── 00068.jpg
    │   │       ├── 00069.jpg
    │   │       ├── 00070.jpg
    │   │       ├── 00071.jpg
    │   │       ├── 00072.jpg
    │   │       ├── 00073.jpg
    │   │       ├── 00074.jpg
    │   │       ├── 00075.jpg
    │   │       ├── 00076.jpg
    │   │       ├── 00077.jpg
    │   │       ├── 00078.jpg
    │   │       ├── 00079.jpg
    │   │       ├── 00080.jpg
    │   │       ├── 00081.jpg
    │   │       ├── 00082.jpg
    │   │       ├── 00083.jpg
    │   │       ├── 00084.jpg
    │   │       ├── 00085.jpg
    │   │       ├── 00086.jpg
    │   │       ├── 00087.jpg
    │   │       ├── 00088.jpg
    │   │       ├── 00089.jpg
    │   │       ├── 00090.jpg
    │   │       ├── 00091.jpg
    │   │       ├── 00092.jpg
    │   │       ├── 00093.jpg
    │   │       ├── 00094.jpg
    │   │       ├── 00095.jpg
    │   │       ├── 00096.jpg
    │   │       ├── 00097.jpg
    │   │       ├── 00098.jpg
    │   │       ├── 00099.jpg
    │   │       ├── 00100.jpg
    │   │       ├── 00101.jpg
    │   │       ├── 00102.jpg
    │   │       ├── 00103.jpg
    │   │       ├── 00104.jpg
    │   │       ├── 00105.jpg
    │   │       ├── 00106.jpg
    │   │       ├── 00107.jpg
    │   │       ├── 00108.jpg
    │   │       ├── 00109.jpg
    │   │       ├── 00110.jpg
    │   │       ├── 00111.jpg
    │   │       ├── 00112.jpg
    │   │       ├── 00113.jpg
    │   │       ├── 00114.jpg
    │   │       ├── 00115.jpg
    │   │       ├── 00116.jpg
    │   │       ├── 00117.jpg
    │   │       ├── 00118.jpg
    │   │       ├── 00119.jpg
    │   │       ├── 00120.jpg
    │   │       ├── 00121.jpg
    │   │       ├── 00122.jpg
    │   │       ├── 00123.jpg
    │   │       ├── 00124.jpg
    │   │       ├── 00125.jpg
    │   │       ├── 00126.jpg
    │   │       ├── 00127.jpg
    │   │       ├── 00128.jpg
    │   │       ├── 00129.jpg
    │   │       ├── 00130.jpg
    │   │       ├── 00131.jpg
    │   │       ├── 00132.jpg
    │   │       ├── 00133.jpg
    │   │       ├── 00134.jpg
    │   │       ├── 00135.jpg
    │   │       ├── 00136.jpg
    │   │       ├── 00137.jpg
    │   │       ├── 00138.jpg
    │   │       ├── 00139.jpg
    │   │       ├── 00140.jpg
    │   │       ├── 00141.jpg
    │   │       ├── 00142.jpg
    │   │       ├── 00143.jpg
    │   │       ├── 00144.jpg
    │   │       ├── 00145.jpg
    │   │       ├── 00146.jpg
    │   │       ├── 00147.jpg
    │   │       ├── 00148.jpg
    │   │       ├── 00149.jpg
    │   │       ├── 00150.jpg
    │   │       ├── 00151.jpg
    │   │       ├── 00152.jpg
    │   │       ├── 00153.jpg
    │   │       ├── 00154.jpg
    │   │       ├── 00155.jpg
    │   │       ├── 00156.jpg
    │   │       ├── 00157.jpg
    │   │       ├── 00158.jpg
    │   │       ├── 00159.jpg
    │   │       ├── 00160.jpg
    │   │       ├── 00161.jpg
    │   │       ├── 00162.jpg
    │   │       ├── 00163.jpg
    │   │       ├── 00164.jpg
    │   │       ├── 00165.jpg
    │   │       ├── 00166.jpg
    │   │       ├── 00167.jpg
    │   │       ├── 00168.jpg
    │   │       ├── 00169.jpg
    │   │       ├── 00170.jpg
    │   │       ├── 00171.jpg
    │   │       ├── 00172.jpg
    │   │       ├── 00173.jpg
    │   │       ├── 00174.jpg
    │   │       ├── 00175.jpg
    │   │       ├── 00176.jpg
    │   │       ├── 00177.jpg
    │   │       ├── 00178.jpg
    │   │       ├── 00179.jpg
    │   │       ├── 00180.jpg
    │   │       ├── 00181.jpg
    │   │       ├── 00182.jpg
    │   │       ├── 00183.jpg
    │   │       ├── 00184.jpg
    │   │       ├── 00185.jpg
    │   │       ├── 00186.jpg
    │   │       ├── 00187.jpg
    │   │       ├── 00188.jpg
    │   │       ├── 00189.jpg
    │   │       ├── 00190.jpg
    │   │       ├── 00191.jpg
    │   │       ├── 00192.jpg
    │   │       ├── 00193.jpg
    │   │       ├── 00194.jpg
    │   │       ├── 00195.jpg
    │   │       ├── 00196.jpg
    │   │       ├── 00197.jpg
    │   │       ├── 00198.jpg
    │   │       └── 00199.jpg
    ├── pyproject.toml
    ├── sam2
    │   ├── __init__.py
    │   ├── automatic_mask_generator.py
    │   ├── build_sam.py
    │   ├── configs
    │   │   └── sam2.1
    │   │   │   ├── sam2.1_hiera_b+.yaml
    │   │   │   ├── sam2.1_hiera_l.yaml
    │   │   │   ├── sam2.1_hiera_s.yaml
    │   │   │   ├── sam2.1_hiera_t.yaml
    │   │   │   └── sam2.1_hq_hiera_l.yaml
    │   ├── csrc
    │   │   └── connected_components.cu
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── backbones
    │   │   │   ├── __init__.py
    │   │   │   ├── hieradet.py
    │   │   │   ├── image_encoder.py
    │   │   │   └── utils.py
    │   │   ├── memory_attention.py
    │   │   ├── memory_encoder.py
    │   │   ├── position_encoding.py
    │   │   ├── sam
    │   │   │   ├── __init__.py
    │   │   │   ├── mask_decoder.py
    │   │   │   ├── mask_hq_decoder.py
    │   │   │   ├── prompt_encoder.py
    │   │   │   └── transformer.py
    │   │   ├── sam2_base.py
    │   │   ├── sam2_hq_base.py
    │   │   └── sam2_utils.py
    │   ├── sam2_hq_video_predictor.py
    │   ├── sam2_image_predictor.py
    │   ├── sam2_video_predictor.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── amg.py
    │   │   ├── misc.py
    │   │   └── transforms.py
    └── setup.py
├── scripts
    └── export_onnx_model.py
├── seginw
    ├── GroundingDINO
    │   ├── .asset
    │   │   ├── COCO.png
    │   │   ├── GD_GLIGEN.png
    │   │   ├── GD_SD.png
    │   │   ├── ODinW.png
    │   │   ├── arch.png
    │   │   ├── cats.png
    │   │   └── hero_figure.png
    │   ├── LICENSE
    │   ├── README.md
    │   ├── demo
    │   │   ├── gradio_app.py
    │   │   └── inference_on_a_image.py
    │   ├── groundingdino
    │   │   ├── __init__.py
    │   │   ├── config
    │   │   │   ├── GroundingDINO_SwinB.py
    │   │   │   └── GroundingDINO_SwinT_OGC.py
    │   │   ├── datasets
    │   │   │   ├── __init__.py
    │   │   │   ├── cocogrounding_eval.py
    │   │   │   └── transforms.py
    │   │   ├── models
    │   │   │   ├── GroundingDINO
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── backbone
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── backbone.py
    │   │   │   │   │   ├── position_encoding.py
    │   │   │   │   │   └── swin_transformer.py
    │   │   │   │   ├── bertwarper.py
    │   │   │   │   ├── csrc
    │   │   │   │   │   ├── MsDeformAttn
    │   │   │   │   │   │   ├── ms_deform_attn.h
    │   │   │   │   │   │   ├── ms_deform_attn_cpu.cpp
    │   │   │   │   │   │   ├── ms_deform_attn_cpu.h
    │   │   │   │   │   │   ├── ms_deform_attn_cuda.cu
    │   │   │   │   │   │   ├── ms_deform_attn_cuda.h
    │   │   │   │   │   │   └── ms_deform_im2col_cuda.cuh
    │   │   │   │   │   ├── cuda_version.cu
    │   │   │   │   │   └── vision.cpp
    │   │   │   │   ├── fuse_modules.py
    │   │   │   │   ├── groundingdino.py
    │   │   │   │   ├── ms_deform_attn.py
    │   │   │   │   ├── transformer.py
    │   │   │   │   ├── transformer_vanilla.py
    │   │   │   │   └── utils.py
    │   │   │   ├── __init__.py
    │   │   │   └── registry.py
    │   │   ├── util
    │   │   │   ├── __init__.py
    │   │   │   ├── box_ops.py
    │   │   │   ├── get_tokenlizer.py
    │   │   │   ├── inference.py
    │   │   │   ├── logger.py
    │   │   │   ├── misc.py
    │   │   │   ├── slconfig.py
    │   │   │   ├── slio.py
    │   │   │   ├── time_counter.py
    │   │   │   ├── utils.py
    │   │   │   ├── visualizer.py
    │   │   │   └── vl_utils.py
    │   │   └── version.py
    │   ├── requirements.txt
    │   └── setup.py
    ├── README.md
    ├── logs
    │   ├── grounded_hqsam.log
    │   └── grounded_sam.log
    ├── sam2
    ├── segment_anything
    │   ├── __init__.py
    │   ├── automatic_mask_generator.py
    │   ├── build_sam.py
    │   ├── build_sam_hq.py
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── common.py
    │   │   ├── image_encoder.py
    │   │   ├── mask_decoder.py
    │   │   ├── mask_decoder_hq.py
    │   │   ├── prompt_encoder.py
    │   │   ├── sam.py
    │   │   └── transformer.py
    │   ├── predictor.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── amg.py
    │   │   ├── onnx.py
    │   │   └── transforms.py
    ├── test_ap_on_seginw.py
    ├── test_ap_on_seginw_sam2.py
    ├── test_seginw.sh
    ├── test_seginw_hq.sh
    ├── test_seginw_sam2.sh
    └── test_seginw_sam_hq2.sh
├── segment_anything
    ├── __init__.py
    ├── automatic_mask_generator.py
    ├── build_sam.py
    ├── build_sam_baseline.py
    ├── modeling
    │   ├── __init__.py
    │   ├── common.py
    │   ├── image_encoder.py
    │   ├── mask_decoder.py
    │   ├── mask_decoder_hq.py
    │   ├── prompt_encoder.py
    │   ├── sam.py
    │   ├── tiny_vit_sam.py
    │   └── transformer.py
    ├── predictor.py
    └── utils
    │   ├── __init__.py
    │   ├── amg.py
    │   ├── onnx.py
    │   └── transforms.py
├── setup.cfg
├── setup.py
├── train
    ├── README.md
    ├── segment_anything_training
    │   ├── __init__.py
    │   ├── build_sam.py
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── common.py
    │   │   ├── image_encoder.py
    │   │   ├── mask_decoder.py
    │   │   ├── prompt_encoder.py
    │   │   ├── sam.py
    │   │   └── transformer.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   └── transforms.py
    ├── train.py
    └── utils
    │   ├── dataloader.py
    │   ├── loss_mask.py
    │   └── misc.py
└── visual_demo
    ├── 1.gif
    ├── 2.gif
    ├── 3.gif
    ├── 4.gif
    ├── 5.gif
    └── 6.gif


/demo/demo_hqsam_light.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import matplotlib.pyplot as plt
  4 | import cv2
  5 | from segment_anything import sam_model_registry, SamPredictor
  6 | import os
  7 | 
  8 | def show_mask(mask, ax, random_color=False):
  9 |     if random_color:
 10 |         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
 11 |     else:
 12 |         color = np.array([30/255, 144/255, 255/255, 0.6])
 13 |     h, w = mask.shape[-2:]
 14 |     mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
 15 |     ax.imshow(mask_image)
 16 |     
 17 | def show_points(coords, labels, ax, marker_size=375):
 18 |     pos_points = coords[labels==1]
 19 |     neg_points = coords[labels==0]
 20 |     ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
 21 |     ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)   
 22 |     
 23 | def show_box(box, ax):
 24 |     x0, y0 = box[0], box[1]
 25 |     w, h = box[2] - box[0], box[3] - box[1]
 26 |     ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))    
 27 | 
 28 | 
 29 | def show_res(masks, scores, input_point, input_label, input_box, filename, image):
 30 |     for i, (mask, score) in enumerate(zip(masks, scores)):
 31 |         plt.figure(figsize=(10,10))
 32 |         plt.imshow(image)
 33 |         show_mask(mask, plt.gca())
 34 |         if input_box is not None:
 35 |             box = input_box[i]
 36 |             show_box(box, plt.gca())
 37 |         if (input_point is not None) and (input_label is not None): 
 38 |             show_points(input_point, input_label, plt.gca())
 39 |         
 40 |         print(f"Score: {score:.3f}")
 41 |         plt.axis('off')
 42 |         plt.savefig(filename+'_'+str(i)+'.png',bbox_inches='tight',pad_inches=-0.1)
 43 |         plt.close()
 44 | 
 45 | def show_res_multi(masks, scores, input_point, input_label, input_box, filename, image):
 46 |     plt.figure(figsize=(10, 10))
 47 |     plt.imshow(image)
 48 |     for mask in masks:
 49 |         show_mask(mask, plt.gca(), random_color=True)
 50 |     for box in input_box:
 51 |         show_box(box, plt.gca())
 52 |     for score in scores:
 53 |         print(f"Score: {score:.3f}")
 54 |     plt.axis('off')
 55 |     plt.savefig(filename +'.png',bbox_inches='tight',pad_inches=-0.1)
 56 |     plt.close()
 57 | 
 58 | 
 59 | if __name__ == "__main__":
 60 |     sam_checkpoint = "./pretrained_checkpoint/sam_hq_vit_tiny.pth"
 61 |     model_type = "vit_tiny"
 62 | 
 63 |     device = "cuda"
 64 |     sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
 65 |     sam.to(device=device)
 66 |     sam.eval()
 67 |     predictor = SamPredictor(sam)
 68 | 
 69 | 
 70 |     image = cv2.imread('demo/input_imgs/dog.jpg')
 71 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 72 |     predictor.set_image(image)
 73 |     # hq_token_only: False means use hq output to correct SAM output. 
 74 |     #                True means use hq output only. 
 75 |     #                Default: False
 76 |     hq_token_only = False 
 77 |     # To achieve best visualization effect, for images contain multiple objects (like typical coco images), we suggest to set hq_token_only=False
 78 |     # For images contain single object, we suggest to set hq_token_only = True
 79 |     # For quantiative evaluation on COCO/YTVOS/DAVIS/UVO/LVIS etc., we set hq_token_only = False
 80 |     
 81 |     # box prompt
 82 |     input_box = np.array([[784,500,1789,1000]])
 83 |     input_point, input_label = None, None
 84 | 
 85 |     masks, scores, logits = predictor.predict(
 86 |         point_coords=input_point,
 87 |         point_labels=input_label,
 88 |         box = input_box,
 89 |         multimask_output=False,
 90 |         hq_token_only=hq_token_only, 
 91 |     )
 92 |     result_path = 'demo/hq_sam_tiny_result/'
 93 |     os.makedirs(result_path, exist_ok=True)
 94 |     show_res(masks,scores,input_point, input_label, input_box, result_path + 'dog', image)
 95 | 
 96 | 
 97 | 
 98 |     image = cv2.imread('demo/input_imgs/example3.png')
 99 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
100 |     predictor.set_image(image)
101 |     hq_token_only = True
102 |     # point prompt
103 |     input_point = np.array([[221,482],[498,633],[750,379]])
104 |     input_label = np.ones(input_point.shape[0])
105 |     input_box = None
106 | 
107 |     masks, scores, logits = predictor.predict(
108 |         point_coords=input_point,
109 |         point_labels=input_label,
110 |         box = input_box,
111 |         multimask_output=False,
112 |         hq_token_only=hq_token_only, 
113 |     )
114 |     show_res(masks,scores,input_point, input_label, input_box, result_path + 'example3', image)
115 | 
116 | 
117 |     image = cv2.imread('demo/input_imgs/example7.png')
118 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
119 |     predictor.set_image(image)
120 |     hq_token_only = False
121 |     # multi box prompt
122 |     input_box = torch.tensor([[45,260,515,470], [310,228,424,296]],device=predictor.device)
123 |     transformed_box = predictor.transform.apply_boxes_torch(input_box, image.shape[:2])
124 |     input_point, input_label = None, None
125 |     masks, scores, logits = predictor.predict_torch(
126 |         point_coords=input_point,
127 |         point_labels=input_label,
128 |         boxes=transformed_box,
129 |         multimask_output=False,
130 |         hq_token_only=hq_token_only,
131 |     )
132 |     masks = masks.squeeze(1).cpu().numpy()
133 |     scores = scores.squeeze(1).cpu().numpy()
134 |     input_box = input_box.cpu().numpy()
135 |     show_res_multi(masks, scores, input_point, input_label, input_box, result_path + 'example7', image)
136 | 
137 | 
138 |     
139 | 
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/demo/demo_sam.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import matplotlib.pyplot as plt
  4 | import cv2
  5 | from segment_anything import sam_model_registry_baseline, SamPredictor
  6 | import os
  7 | 
  8 | def show_mask(mask, ax, random_color=False):
  9 |     if random_color:
 10 |         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
 11 |     else:
 12 |         color = np.array([30/255, 144/255, 255/255, 0.6])
 13 |     h, w = mask.shape[-2:]
 14 |     mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
 15 |     ax.imshow(mask_image)
 16 |     
 17 | def show_points(coords, labels, ax, marker_size=375):
 18 |     pos_points = coords[labels==1]
 19 |     neg_points = coords[labels==0]
 20 |     ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
 21 |     ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)   
 22 |     
 23 | def show_box(box, ax):
 24 |     x0, y0 = box[0], box[1]
 25 |     w, h = box[2] - box[0], box[3] - box[1]
 26 |     ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))    
 27 | 
 28 | 
 29 | def show_res(masks, scores, input_point, input_label, input_box, filename, image):
 30 |     for i, (mask, score) in enumerate(zip(masks, scores)):
 31 |         plt.figure(figsize=(10,10))
 32 |         plt.imshow(image)
 33 |         show_mask(mask, plt.gca())
 34 |         if input_box is not None:
 35 |             box = input_box[i]
 36 |             show_box(box, plt.gca())
 37 |         if (input_point is not None) and (input_label is not None): 
 38 |             show_points(input_point, input_label, plt.gca())
 39 |         
 40 |         print(f"Score: {score:.3f}")
 41 |         plt.axis('off')
 42 |         plt.savefig(filename+'_'+str(i)+'.png',bbox_inches='tight',pad_inches=-0.1)
 43 |         plt.close()
 44 | 
 45 | def show_res_multi(masks, scores, input_point, input_label, input_box, filename, image):
 46 |     plt.figure(figsize=(10, 10))
 47 |     plt.imshow(image)
 48 |     for mask in masks:
 49 |         show_mask(mask, plt.gca(), random_color=True)
 50 |     for box in input_box:
 51 |         show_box(box, plt.gca())
 52 |     for score in scores:
 53 |         print(f"Score: {score:.3f}")
 54 |     plt.axis('off')
 55 |     plt.savefig(filename +'.png',bbox_inches='tight',pad_inches=-0.1)
 56 |     plt.close()
 57 | 
 58 | if __name__ == "__main__":
 59 |     sam_checkpoint = "./pretrained_checkpoint/sam_vit_l_0b3195.pth"
 60 |     model_type = "vit_l"
 61 |     device = "cuda"
 62 |     sam = sam_model_registry_baseline[model_type](checkpoint=sam_checkpoint)
 63 |     sam.to(device=device)
 64 |     predictor = SamPredictor(sam)
 65 | 
 66 |     for i in range(8):
 67 |         print("image:   ",i)
 68 |         image = cv2.imread('demo/input_imgs/example'+str(i)+'.png')
 69 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 70 |         predictor.set_image(image)
 71 | 
 72 |         if i==0:
 73 |             input_box = np.array([[4,13,1007,1023]])
 74 |             input_point, input_label = None, None
 75 |         elif i==1:
 76 |             input_box = np.array([[306, 132, 925, 893]])
 77 |             input_point, input_label = None, None
 78 |         elif i==2:
 79 |             input_point = np.array([[495,518],[217,140]])
 80 |             input_label = np.ones(input_point.shape[0])
 81 |             input_box = None
 82 |         elif i==3:
 83 |             input_point = np.array([[221,482],[498,633],[750,379]])
 84 |             input_label = np.ones(input_point.shape[0])
 85 |             input_box = None
 86 |         elif i==4:
 87 |             input_box = np.array([[64,76,940,919]])
 88 |             input_point, input_label = None, None
 89 |         elif i==5:
 90 |             input_point = np.array([[373,363], [452, 575]])
 91 |             input_label = np.ones(input_point.shape[0])
 92 |             input_box = None
 93 |         elif i==6:
 94 |             input_box = np.array([[181, 196, 757, 495]])
 95 |             input_point, input_label = None, None
 96 |         elif i==7:
 97 |             # multi box input
 98 |             input_box = torch.tensor([[45,260,515,470], [310,228,424,296]],device=predictor.device)
 99 |             transformed_box = predictor.transform.apply_boxes_torch(input_box, image.shape[:2])
100 |             input_point, input_label = None, None
101 | 
102 |         batch_box = False if input_box is None else len(input_box)>1 
103 |         result_path = 'demo/baseline_sam_result/'
104 |         os.makedirs(result_path, exist_ok=True)
105 | 
106 |         if not batch_box: 
107 |             masks, scores, logits = predictor.predict(
108 |                 point_coords=input_point,
109 |                 point_labels=input_label,
110 |                 box = input_box,
111 |                 multimask_output=False,
112 |             )
113 |             show_res(masks,scores,input_point, input_label, input_box, result_path + 'example'+str(i), image)
114 |         else:
115 |             masks, scores, logits = predictor.predict_torch(
116 |                 point_coords=input_point,
117 |                 point_labels=input_label,
118 |                 boxes=transformed_box,
119 |                 multimask_output=False,
120 |             )
121 |             masks = masks.squeeze(1).cpu().numpy()
122 |             scores = scores.squeeze(1).cpu().numpy()
123 |             input_box = input_box.cpu().numpy()
124 |             show_res_multi(masks, scores, input_point, input_label, input_box, result_path + 'example'+str(i), image)
125 | 
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/demo/input_imgs/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/dog.jpg


--------------------------------------------------------------------------------
/demo/input_imgs/example0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example0.png


--------------------------------------------------------------------------------
/demo/input_imgs/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example1.png


--------------------------------------------------------------------------------
/demo/input_imgs/example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example2.png


--------------------------------------------------------------------------------
/demo/input_imgs/example3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example3.png


--------------------------------------------------------------------------------
/demo/input_imgs/example4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example4.png


--------------------------------------------------------------------------------
/demo/input_imgs/example5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example5.png


--------------------------------------------------------------------------------
/demo/input_imgs/example6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example6.png


--------------------------------------------------------------------------------
/demo/input_imgs/example7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example7.png


--------------------------------------------------------------------------------
/demo/input_imgs/example8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/demo/input_imgs/example8.png


--------------------------------------------------------------------------------
/figs/coco_vis_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/coco_vis_comp.png


--------------------------------------------------------------------------------
/figs/davis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/davis.png


--------------------------------------------------------------------------------
/figs/points_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/points_comp.png


--------------------------------------------------------------------------------
/figs/sam-hf-framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/sam-hf-framework.png


--------------------------------------------------------------------------------
/figs/sam_variants_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/sam_variants_comp.png


--------------------------------------------------------------------------------
/figs/sam_vs_hqsam_backbones.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/sam_vs_hqsam_backbones.png


--------------------------------------------------------------------------------
/figs/ytvis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/figs/ytvis.png


--------------------------------------------------------------------------------
/sam-hq2/assets/hq-sam2-results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/assets/hq-sam2-results.png


--------------------------------------------------------------------------------
/sam-hq2/checkpoints/download_ckpts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 4 | # All rights reserved.
 5 | 
 6 | # This source code is licensed under the license found in the
 7 | # LICENSE file in the root directory of this source tree.
 8 | 
 9 | # Use either wget or curl to download the checkpoints
10 | if command -v wget &> /dev/null; then
11 |     CMD="wget"
12 | elif command -v curl &> /dev/null; then
13 |     CMD="curl -L -O"
14 | else
15 |     echo "Please install wget or curl to download the checkpoints."
16 |     exit 1
17 | fi
18 | 
19 | # Define the URLs for SAM 2 checkpoints
20 | # SAM2_BASE_URL="https://dl.fbaipublicfiles.com/segment_anything_2/072824"
21 | # sam2_hiera_t_url="${SAM2_BASE_URL}/sam2_hiera_tiny.pt"
22 | # sam2_hiera_s_url="${SAM2_BASE_URL}/sam2_hiera_small.pt"
23 | # sam2_hiera_b_plus_url="${SAM2_BASE_URL}/sam2_hiera_base_plus.pt"
24 | # sam2_hiera_l_url="${SAM2_BASE_URL}/sam2_hiera_large.pt"
25 | 
26 | # Download each of the four checkpoints using wget
27 | # echo "Downloading sam2_hiera_tiny.pt checkpoint..."
28 | # $CMD $sam2_hiera_t_url || { echo "Failed to download checkpoint from $sam2_hiera_t_url"; exit 1; }
29 | 
30 | # echo "Downloading sam2_hiera_small.pt checkpoint..."
31 | # $CMD $sam2_hiera_s_url || { echo "Failed to download checkpoint from $sam2_hiera_s_url"; exit 1; }
32 | 
33 | # echo "Downloading sam2_hiera_base_plus.pt checkpoint..."
34 | # $CMD $sam2_hiera_b_plus_url || { echo "Failed to download checkpoint from $sam2_hiera_b_plus_url"; exit 1; }
35 | 
36 | # echo "Downloading sam2_hiera_large.pt checkpoint..."
37 | # $CMD $sam2_hiera_l_url || { echo "Failed to download checkpoint from $sam2_hiera_l_url"; exit 1; }
38 | 
39 | # Define the URLs for SAM 2.1 checkpoints
40 | #SAM2p1_BASE_URL="https://dl.fbaipublicfiles.com/segment_anything_2/092824"
41 | #sam2p1_hiera_t_url="${SAM2p1_BASE_URL}/sam2.1_hiera_tiny.pt"
42 | #sam2p1_hiera_s_url="${SAM2p1_BASE_URL}/sam2.1_hiera_small.pt"
43 | #sam2p1_hiera_b_plus_url="${SAM2p1_BASE_URL}/sam2.1_hiera_base_plus.pt"
44 | #sam2p1_hiera_l_url="${SAM2p1_BASE_URL}/sam2.1_hiera_large.pt"
45 | # sam2p1_hq_hiera_l_url="https://huggingface.co/mqye/sam-hq2/resolve/main/sam2.1_hq_hiera_large.pt?download=true"
46 | sam2p1_hq_hiera_l_url="https://huggingface.co/lkeab/hq-sam/resolve/main/sam2.1_hq_hiera_large.pt?download=true"
47 | # SAM 2.1 checkpoints
48 | 
49 | echo "Downloading sam2.1_hq_hiera_l.pt checkpoint..."
50 | $CMD $sam2p1_hq_hiera_l_url || { echo "Failed to download checkpoint from $sam2p1_hiera_t_url"; exit 1; }
51 | 
52 | mv sam2.1_hq_hiera_large.pt?download=true sam2.1_hq_hiera_large.pt
53 | 
54 | echo "HQ-SAM-2 checkpoints are downloaded successfully."
55 | 


--------------------------------------------------------------------------------
/sam-hq2/demo/demo_hqsam2.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import matplotlib.pyplot as plt
  4 | import cv2
  5 | from sam2.build_sam import build_sam2
  6 | from sam2.sam2_image_predictor import SAM2ImagePredictor
  7 | import os
  8 | 
  9 | def show_mask(mask, ax, random_color=False):
 10 |     if random_color:
 11 |         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
 12 |     else:
 13 |         color = np.array([30/255, 144/255, 255/255, 0.6])
 14 |     h, w = mask.shape[-2:]
 15 |     mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
 16 |     ax.imshow(mask_image)
 17 |     
 18 | def show_points(coords, labels, ax, marker_size=375):
 19 |     pos_points = coords[labels==1]
 20 |     neg_points = coords[labels==0]
 21 |     ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
 22 |     ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)   
 23 |     
 24 | def show_box(box, ax):
 25 |     x0, y0 = box[0], box[1]
 26 |     w, h = box[2] - box[0], box[3] - box[1]
 27 |     ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))    
 28 | 
 29 | 
 30 | def show_res(masks, scores, input_point, input_label, input_box, filename, image):
 31 |     for i, (mask, score) in enumerate(zip(masks, scores)):
 32 |         plt.figure(figsize=(10,10))
 33 |         plt.imshow(image)
 34 |         show_mask(mask, plt.gca())
 35 |         if input_box is not None:
 36 |             box = input_box[i]
 37 |             show_box(box, plt.gca())
 38 |         if (input_point is not None) and (input_label is not None): 
 39 |             show_points(input_point, input_label, plt.gca())
 40 |         
 41 |         print(f"Score: {score:.3f}")
 42 |         plt.axis('off')
 43 |         plt.savefig(filename+'_'+str(i)+'.png',bbox_inches='tight',pad_inches=-0.1)
 44 |         plt.close()
 45 | 
 46 | def show_res_multi(masks, scores, input_point, input_label, input_box, filename, image):
 47 |     plt.figure(figsize=(10, 10))
 48 |     plt.imshow(image)
 49 |     for mask in masks:
 50 |         show_mask(mask, plt.gca(), random_color=True)
 51 |     for box in input_box:
 52 |         show_box(box, plt.gca())
 53 |     for score in scores:
 54 |         print(f"Score: {score:.3f}")
 55 |     plt.axis('off')
 56 |     plt.savefig(filename +'.png',bbox_inches='tight',pad_inches=-0.1)
 57 |     plt.close()
 58 | 
 59 | 
 60 | if __name__ == "__main__":
 61 |     checkpoint = "./checkpoints/sam2.1_hq_hiera_large.pt"
 62 |     model_cfg = "configs/sam2.1/sam2.1_hq_hiera_l.yaml"
 63 |     predictor = SAM2ImagePredictor(build_sam2(model_cfg, checkpoint))
 64 | 
 65 |     for i in range(1,5):
 66 |         print("image:   ",i)
 67 |         # hq_token_only: False means use hq output to correct SAM output. 
 68 |         #                True means use hq output only. 
 69 |         #                Default: False
 70 |         hq_token_only = False 
 71 |         # To achieve best visualization effect, for images contain multiple objects (like typical coco images), we suggest to set hq_token_only=False
 72 |         # For images contain single object, we suggest to set hq_token_only = True
 73 |         # For quantiative evaluation on COCO/YTVOS/DAVIS/UVO/LVIS etc., we set hq_token_only = False
 74 | 
 75 |         image = cv2.imread('./demo/input_images/example'+str(i)+'.png')
 76 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 77 |         predictor.set_image(image)
 78 | 
 79 |         if i==1:
 80 |             input_box = np.array([[306, 132, 925, 893]])
 81 |             input_point, input_label = None, None
 82 |         elif i==2:
 83 |             input_point = np.array([[495,518],[217,140]])
 84 |             input_label = np.ones(input_point.shape[0])
 85 |             input_box = None
 86 |         elif i==3:
 87 |             input_box = np.array([[64,76,940,919]])
 88 |             input_point, input_label = None, None
 89 |         elif i==4:
 90 |             # multi box input
 91 |             input_box = torch.tensor([[45,260,515,470], [310,228,424,296]],device=predictor.device)
 92 |             # transformed_box = predictor.transform.apply_boxes_torch(input_box, image.shape[:2])
 93 |             input_point, input_label = None, None
 94 | 
 95 |         batch_box = False if input_box is None else len(input_box)>1 
 96 |         result_path = 'demo/hq_sam_result_vis/'
 97 |         os.makedirs(result_path, exist_ok=True)
 98 | 
 99 |         with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
100 |             masks, scores, logits = predictor.predict(point_coords=input_point,
101 |                                             point_labels=input_label,
102 |                                             box=input_box,
103 |                                             multimask_output=False, hq_token_only=hq_token_only)
104 | 
105 |             if not batch_box:
106 |                 show_res(masks,scores,input_point, input_label, input_box, result_path + 'example'+str(i), image)
107 |             else:
108 |                 masks = masks.squeeze(1)
109 |                 scores = scores.squeeze(1)
110 |                 input_box = input_box.cpu().numpy()
111 |                 show_res_multi(masks, scores, input_point, input_label, input_box, result_path + 'example'+str(i), image)
112 | 
113 |         
114 | 
115 |     
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/sam-hq2/demo/input_images/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/demo/input_images/example1.png


--------------------------------------------------------------------------------
/sam-hq2/demo/input_images/example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/demo/input_images/example2.png


--------------------------------------------------------------------------------
/sam-hq2/demo/input_images/example3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/demo/input_images/example3.png


--------------------------------------------------------------------------------
/sam-hq2/demo/input_images/example4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/demo/input_images/example4.png


--------------------------------------------------------------------------------
/sam-hq2/notebooks/images/cars.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/images/cars.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/images/groceries.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/images/groceries.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/images/truck.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/images/truck.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom.mp4


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00000.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00001.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00002.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00003.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00004.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00005.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00005.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00006.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00006.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00007.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00007.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00008.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00008.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00009.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00009.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00010.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00011.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00011.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00012.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00012.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00013.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00013.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00014.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00014.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00015.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00015.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00016.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00016.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00017.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00017.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00018.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00018.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00019.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00019.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00020.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00020.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00021.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00021.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00022.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00022.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00023.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00023.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00024.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00024.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00025.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00025.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00026.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00026.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00027.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00027.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00028.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00028.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00029.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00029.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00030.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00030.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00031.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00031.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00032.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00032.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00033.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00033.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00034.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00034.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00035.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00035.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00036.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00036.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00037.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00037.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00038.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00038.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00039.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00039.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00040.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00040.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00041.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00041.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00042.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00042.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00043.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00043.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00044.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00044.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00045.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00045.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00046.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00046.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00047.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00047.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00048.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00048.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00049.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00049.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00050.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00050.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00051.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00051.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00052.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00052.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00053.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00053.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00054.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00054.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00055.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00055.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00056.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00056.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00057.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00057.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00058.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00058.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00059.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00059.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00060.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00060.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00061.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00061.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00062.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00062.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00063.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00063.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00064.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00064.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00065.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00065.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00066.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00066.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00067.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00067.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00068.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00068.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00069.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00069.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00070.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00070.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00071.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00071.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00072.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00072.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00073.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00073.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00074.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00074.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00075.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00075.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00076.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00076.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00077.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00077.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00078.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00078.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00079.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00079.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00080.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00080.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00081.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00081.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00082.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00082.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00083.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00083.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00084.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00084.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00085.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00085.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00086.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00086.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00087.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00087.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00088.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00088.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00089.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00089.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00090.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00090.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00091.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00091.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00092.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00092.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00093.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00093.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00094.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00094.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00095.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00095.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00096.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00096.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00097.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00097.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00098.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00098.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00099.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00099.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00100.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00101.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00102.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00102.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00103.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00103.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00104.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00104.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00105.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00105.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00106.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00106.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00107.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00107.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00108.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00108.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00109.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00109.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00110.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00110.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00111.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00111.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00112.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00112.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00113.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00113.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00114.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00114.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00115.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00115.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00116.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00116.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00117.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00117.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00118.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00118.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00119.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00119.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00120.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00120.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00121.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00121.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00122.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00122.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00123.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00123.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00124.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00124.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00125.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00125.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00126.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00126.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00127.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00127.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00128.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00128.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00129.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00129.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00130.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00130.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00131.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00131.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00132.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00132.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00133.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00133.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00134.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00134.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00135.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00135.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00136.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00136.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00137.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00137.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00138.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00138.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00139.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00139.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00140.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00140.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00141.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00141.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00142.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00142.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00143.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00143.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00144.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00144.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00145.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00145.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00146.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00146.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00147.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00147.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00148.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00148.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00149.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00149.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00150.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00150.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00151.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00151.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00152.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00152.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00153.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00153.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00154.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00154.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00155.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00155.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00156.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00156.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00157.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00157.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00158.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00158.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00159.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00159.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00160.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00160.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00161.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00161.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00162.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00162.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00163.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00163.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00164.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00164.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00165.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00165.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00166.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00166.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00167.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00167.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00168.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00168.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00169.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00169.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00170.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00170.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00171.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00171.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00172.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00172.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00173.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00173.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00174.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00174.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00175.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00175.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00176.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00176.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00177.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00177.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00178.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00178.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00179.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00179.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00180.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00180.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00181.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00181.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00182.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00182.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00183.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00183.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00184.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00184.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00185.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00185.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00186.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00186.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00187.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00187.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00188.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00188.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00189.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00189.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00190.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00190.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00191.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00191.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00192.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00192.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00193.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00193.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00194.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00194.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00195.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00195.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00196.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00196.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00197.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00197.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00198.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00198.jpg


--------------------------------------------------------------------------------
/sam-hq2/notebooks/videos/bedroom/00199.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/sam-hq2/notebooks/videos/bedroom/00199.jpg


--------------------------------------------------------------------------------
/sam-hq2/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=61.0",  
4 |     "torch>=2.3.1",
5 |     ]
6 | build-backend = "setuptools.build_meta"
7 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from hydra import initialize_config_module
 8 | from hydra.core.global_hydra import GlobalHydra
 9 | 
10 | if not GlobalHydra.instance().is_initialized():
11 |     initialize_config_module("sam2", version_base="1.2")
12 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/configs/sam2.1/sam2.1_hiera_b+.yaml:
--------------------------------------------------------------------------------
  1 | # @package _global_
  2 | 
  3 | # Model
  4 | model:
  5 |   _target_: sam2.modeling.sam2_base.SAM2Base
  6 |   image_encoder:
  7 |     _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
  8 |     scalp: 1
  9 |     trunk:
 10 |       _target_: sam2.modeling.backbones.hieradet.Hiera
 11 |       embed_dim: 112
 12 |       num_heads: 2
 13 |     neck:
 14 |       _target_: sam2.modeling.backbones.image_encoder.FpnNeck
 15 |       position_encoding:
 16 |         _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
 17 |         num_pos_feats: 256
 18 |         normalize: true
 19 |         scale: null
 20 |         temperature: 10000
 21 |       d_model: 256
 22 |       backbone_channel_list: [896, 448, 224, 112]
 23 |       fpn_top_down_levels: [2, 3]  # output level 0 and 1 directly use the backbone features
 24 |       fpn_interp_model: nearest
 25 | 
 26 |   memory_attention:
 27 |     _target_: sam2.modeling.memory_attention.MemoryAttention
 28 |     d_model: 256
 29 |     pos_enc_at_input: true
 30 |     layer:
 31 |       _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
 32 |       activation: relu
 33 |       dim_feedforward: 2048
 34 |       dropout: 0.1
 35 |       pos_enc_at_attn: false
 36 |       self_attention:
 37 |         _target_: sam2.modeling.sam.transformer.RoPEAttention
 38 |         rope_theta: 10000.0
 39 |         feat_sizes: [32, 32]
 40 |         embedding_dim: 256
 41 |         num_heads: 1
 42 |         downsample_rate: 1
 43 |         dropout: 0.1
 44 |       d_model: 256
 45 |       pos_enc_at_cross_attn_keys: true
 46 |       pos_enc_at_cross_attn_queries: false
 47 |       cross_attention:
 48 |         _target_: sam2.modeling.sam.transformer.RoPEAttention
 49 |         rope_theta: 10000.0
 50 |         feat_sizes: [32, 32]
 51 |         rope_k_repeat: True
 52 |         embedding_dim: 256
 53 |         num_heads: 1
 54 |         downsample_rate: 1
 55 |         dropout: 0.1
 56 |         kv_in_dim: 64
 57 |     num_layers: 4
 58 | 
 59 |   memory_encoder:
 60 |       _target_: sam2.modeling.memory_encoder.MemoryEncoder
 61 |       out_dim: 64
 62 |       position_encoding:
 63 |         _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
 64 |         num_pos_feats: 64
 65 |         normalize: true
 66 |         scale: null
 67 |         temperature: 10000
 68 |       mask_downsampler:
 69 |         _target_: sam2.modeling.memory_encoder.MaskDownSampler
 70 |         kernel_size: 3
 71 |         stride: 2
 72 |         padding: 1
 73 |       fuser:
 74 |         _target_: sam2.modeling.memory_encoder.Fuser
 75 |         layer:
 76 |           _target_: sam2.modeling.memory_encoder.CXBlock
 77 |           dim: 256
 78 |           kernel_size: 7
 79 |           padding: 3
 80 |           layer_scale_init_value: 1e-6
 81 |           use_dwconv: True  # depth-wise convs
 82 |         num_layers: 2
 83 | 
 84 |   num_maskmem: 7
 85 |   image_size: 1024
 86 |   # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
 87 |   sigmoid_scale_for_mem_enc: 20.0
 88 |   sigmoid_bias_for_mem_enc: -10.0
 89 |   use_mask_input_as_output_without_sam: true
 90 |   # Memory
 91 |   directly_add_no_mem_embed: true
 92 |   no_obj_embed_spatial: true
 93 |   # use high-resolution feature map in the SAM mask decoder
 94 |   use_high_res_features_in_sam: true
 95 |   # output 3 masks on the first click on initial conditioning frames
 96 |   multimask_output_in_sam: true
 97 |   # SAM heads
 98 |   iou_prediction_use_sigmoid: True
 99 |   # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
100 |   use_obj_ptrs_in_encoder: true
101 |   add_tpos_enc_to_obj_ptrs: true
102 |   proj_tpos_enc_in_obj_ptrs: true
103 |   use_signed_tpos_enc_to_obj_ptrs: true
104 |   only_obj_ptrs_in_the_past_for_eval: true
105 |   # object occlusion prediction
106 |   pred_obj_scores: true
107 |   pred_obj_scores_mlp: true
108 |   fixed_no_obj_ptr: true
109 |   # multimask tracking settings
110 |   multimask_output_for_tracking: true
111 |   use_multimask_token_for_obj_ptr: true
112 |   multimask_min_pt_num: 0
113 |   multimask_max_pt_num: 1
114 |   use_mlp_for_obj_ptr_proj: true
115 |   # Compilation flag
116 |   compile_image_encoder: False
117 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/configs/sam2.1/sam2.1_hiera_l.yaml:
--------------------------------------------------------------------------------
  1 | # @package _global_
  2 | 
  3 | # Model
  4 | model:
  5 |   _target_: sam2.modeling.sam2_base.SAM2Base
  6 |   image_encoder:
  7 |     _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
  8 |     scalp: 1
  9 |     trunk:
 10 |       _target_: sam2.modeling.backbones.hieradet.Hiera
 11 |       embed_dim: 144
 12 |       num_heads: 2
 13 |       stages: [2, 6, 36, 4]
 14 |       global_att_blocks: [23, 33, 43]
 15 |       window_pos_embed_bkg_spatial_size: [7, 7]
 16 |       window_spec: [8, 4, 16, 8]
 17 |     neck:
 18 |       _target_: sam2.modeling.backbones.image_encoder.FpnNeck
 19 |       position_encoding:
 20 |         _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
 21 |         num_pos_feats: 256
 22 |         normalize: true
 23 |         scale: null
 24 |         temperature: 10000
 25 |       d_model: 256
 26 |       backbone_channel_list: [1152, 576, 288, 144]
 27 |       fpn_top_down_levels: [2, 3]  # output level 0 and 1 directly use the backbone features
 28 |       fpn_interp_model: nearest
 29 | 
 30 |   memory_attention:
 31 |     _target_: sam2.modeling.memory_attention.MemoryAttention
 32 |     d_model: 256
 33 |     pos_enc_at_input: true
 34 |     layer:
 35 |       _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
 36 |       activation: relu
 37 |       dim_feedforward: 2048
 38 |       dropout: 0.1
 39 |       pos_enc_at_attn: false
 40 |       self_attention:
 41 |         _target_: sam2.modeling.sam.transformer.RoPEAttention
 42 |         rope_theta: 10000.0
 43 |         feat_sizes: [32, 32]
 44 |         embedding_dim: 256
 45 |         num_heads: 1
 46 |         downsample_rate: 1
 47 |         dropout: 0.1
 48 |       d_model: 256
 49 |       pos_enc_at_cross_attn_keys: true
 50 |       pos_enc_at_cross_attn_queries: false
 51 |       cross_attention:
 52 |         _target_: sam2.modeling.sam.transformer.RoPEAttention
 53 |         rope_theta: 10000.0
 54 |         feat_sizes: [32, 32]
 55 |         rope_k_repeat: True
 56 |         embedding_dim: 256
 57 |         num_heads: 1
 58 |         downsample_rate: 1
 59 |         dropout: 0.1
 60 |         kv_in_dim: 64
 61 |     num_layers: 4
 62 | 
 63 |   memory_encoder:
 64 |       _target_: sam2.modeling.memory_encoder.MemoryEncoder
 65 |       out_dim: 64
 66 |       position_encoding:
 67 |         _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
 68 |         num_pos_feats: 64
 69 |         normalize: true
 70 |         scale: null
 71 |         temperature: 10000
 72 |       mask_downsampler:
 73 |         _target_: sam2.modeling.memory_encoder.MaskDownSampler
 74 |         kernel_size: 3
 75 |         stride: 2
 76 |         padding: 1
 77 |       fuser:
 78 |         _target_: sam2.modeling.memory_encoder.Fuser
 79 |         layer:
 80 |           _target_: sam2.modeling.memory_encoder.CXBlock
 81 |           dim: 256
 82 |           kernel_size: 7
 83 |           padding: 3
 84 |           layer_scale_init_value: 1e-6
 85 |           use_dwconv: True  # depth-wise convs
 86 |         num_layers: 2
 87 | 
 88 |   num_maskmem: 7
 89 |   image_size: 1024
 90 |   # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
 91 |   sigmoid_scale_for_mem_enc: 20.0
 92 |   sigmoid_bias_for_mem_enc: -10.0
 93 |   use_mask_input_as_output_without_sam: true
 94 |   # Memory
 95 |   directly_add_no_mem_embed: true
 96 |   no_obj_embed_spatial: true
 97 |   # use high-resolution feature map in the SAM mask decoder
 98 |   use_high_res_features_in_sam: true
 99 |   # output 3 masks on the first click on initial conditioning frames
100 |   multimask_output_in_sam: true
101 |   # SAM heads
102 |   iou_prediction_use_sigmoid: True
103 |   # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
104 |   use_obj_ptrs_in_encoder: true
105 |   add_tpos_enc_to_obj_ptrs: true
106 |   proj_tpos_enc_in_obj_ptrs: true
107 |   use_signed_tpos_enc_to_obj_ptrs: true
108 |   only_obj_ptrs_in_the_past_for_eval: true
109 |   # object occlusion prediction
110 |   pred_obj_scores: true
111 |   pred_obj_scores_mlp: true
112 |   fixed_no_obj_ptr: true
113 |   # multimask tracking settings
114 |   multimask_output_for_tracking: true
115 |   use_multimask_token_for_obj_ptr: true
116 |   multimask_min_pt_num: 0
117 |   multimask_max_pt_num: 1
118 |   use_mlp_for_obj_ptr_proj: true
119 |   # Compilation flag
120 |   compile_image_encoder: False
121 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/configs/sam2.1/sam2.1_hiera_s.yaml:
--------------------------------------------------------------------------------
  1 | # @package _global_
  2 | 
  3 | # Model
  4 | model:
  5 |   _target_: sam2.modeling.sam2_base.SAM2Base
  6 |   image_encoder:
  7 |     _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
  8 |     scalp: 1
  9 |     trunk:
 10 |       _target_: sam2.modeling.backbones.hieradet.Hiera
 11 |       embed_dim: 96
 12 |       num_heads: 1
 13 |       stages: [1, 2, 11, 2]
 14 |       global_att_blocks: [7, 10, 13]
 15 |       window_pos_embed_bkg_spatial_size: [7, 7]
 16 |     neck:
 17 |       _target_: sam2.modeling.backbones.image_encoder.FpnNeck
 18 |       position_encoding:
 19 |         _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
 20 |         num_pos_feats: 256
 21 |         normalize: true
 22 |         scale: null
 23 |         temperature: 10000
 24 |       d_model: 256
 25 |       backbone_channel_list: [768, 384, 192, 96]
 26 |       fpn_top_down_levels: [2, 3]  # output level 0 and 1 directly use the backbone features
 27 |       fpn_interp_model: nearest
 28 | 
 29 |   memory_attention:
 30 |     _target_: sam2.modeling.memory_attention.MemoryAttention
 31 |     d_model: 256
 32 |     pos_enc_at_input: true
 33 |     layer:
 34 |       _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
 35 |       activation: relu
 36 |       dim_feedforward: 2048
 37 |       dropout: 0.1
 38 |       pos_enc_at_attn: false
 39 |       self_attention:
 40 |         _target_: sam2.modeling.sam.transformer.RoPEAttention
 41 |         rope_theta: 10000.0
 42 |         feat_sizes: [32, 32]
 43 |         embedding_dim: 256
 44 |         num_heads: 1
 45 |         downsample_rate: 1
 46 |         dropout: 0.1
 47 |       d_model: 256
 48 |       pos_enc_at_cross_attn_keys: true
 49 |       pos_enc_at_cross_attn_queries: false
 50 |       cross_attention:
 51 |         _target_: sam2.modeling.sam.transformer.RoPEAttention
 52 |         rope_theta: 10000.0
 53 |         feat_sizes: [32, 32]
 54 |         rope_k_repeat: True
 55 |         embedding_dim: 256
 56 |         num_heads: 1
 57 |         downsample_rate: 1
 58 |         dropout: 0.1
 59 |         kv_in_dim: 64
 60 |     num_layers: 4
 61 | 
 62 |   memory_encoder:
 63 |       _target_: sam2.modeling.memory_encoder.MemoryEncoder
 64 |       out_dim: 64
 65 |       position_encoding:
 66 |         _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
 67 |         num_pos_feats: 64
 68 |         normalize: true
 69 |         scale: null
 70 |         temperature: 10000
 71 |       mask_downsampler:
 72 |         _target_: sam2.modeling.memory_encoder.MaskDownSampler
 73 |         kernel_size: 3
 74 |         stride: 2
 75 |         padding: 1
 76 |       fuser:
 77 |         _target_: sam2.modeling.memory_encoder.Fuser
 78 |         layer:
 79 |           _target_: sam2.modeling.memory_encoder.CXBlock
 80 |           dim: 256
 81 |           kernel_size: 7
 82 |           padding: 3
 83 |           layer_scale_init_value: 1e-6
 84 |           use_dwconv: True  # depth-wise convs
 85 |         num_layers: 2
 86 | 
 87 |   num_maskmem: 7
 88 |   image_size: 1024
 89 |   # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
 90 |   sigmoid_scale_for_mem_enc: 20.0
 91 |   sigmoid_bias_for_mem_enc: -10.0
 92 |   use_mask_input_as_output_without_sam: true
 93 |   # Memory
 94 |   directly_add_no_mem_embed: true
 95 |   no_obj_embed_spatial: true
 96 |   # use high-resolution feature map in the SAM mask decoder
 97 |   use_high_res_features_in_sam: true
 98 |   # output 3 masks on the first click on initial conditioning frames
 99 |   multimask_output_in_sam: true
100 |   # SAM heads
101 |   iou_prediction_use_sigmoid: True
102 |   # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
103 |   use_obj_ptrs_in_encoder: true
104 |   add_tpos_enc_to_obj_ptrs: true
105 |   proj_tpos_enc_in_obj_ptrs: true
106 |   use_signed_tpos_enc_to_obj_ptrs: true
107 |   only_obj_ptrs_in_the_past_for_eval: true
108 |   # object occlusion prediction
109 |   pred_obj_scores: true
110 |   pred_obj_scores_mlp: true
111 |   fixed_no_obj_ptr: true
112 |   # multimask tracking settings
113 |   multimask_output_for_tracking: true
114 |   use_multimask_token_for_obj_ptr: true
115 |   multimask_min_pt_num: 0
116 |   multimask_max_pt_num: 1
117 |   use_mlp_for_obj_ptr_proj: true
118 |   # Compilation flag
119 |   compile_image_encoder: False
120 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/configs/sam2.1/sam2.1_hiera_t.yaml:
--------------------------------------------------------------------------------
  1 | # @package _global_
  2 | 
  3 | # Model
  4 | model:
  5 |   _target_: sam2.modeling.sam2_base.SAM2Base
  6 |   image_encoder:
  7 |     _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
  8 |     scalp: 1
  9 |     trunk:
 10 |       _target_: sam2.modeling.backbones.hieradet.Hiera
 11 |       embed_dim: 96
 12 |       num_heads: 1
 13 |       stages: [1, 2, 7, 2]
 14 |       global_att_blocks: [5, 7, 9]
 15 |       window_pos_embed_bkg_spatial_size: [7, 7]
 16 |     neck:
 17 |       _target_: sam2.modeling.backbones.image_encoder.FpnNeck
 18 |       position_encoding:
 19 |         _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
 20 |         num_pos_feats: 256
 21 |         normalize: true
 22 |         scale: null
 23 |         temperature: 10000
 24 |       d_model: 256
 25 |       backbone_channel_list: [768, 384, 192, 96]
 26 |       fpn_top_down_levels: [2, 3]  # output level 0 and 1 directly use the backbone features
 27 |       fpn_interp_model: nearest
 28 | 
 29 |   memory_attention:
 30 |     _target_: sam2.modeling.memory_attention.MemoryAttention
 31 |     d_model: 256
 32 |     pos_enc_at_input: true
 33 |     layer:
 34 |       _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
 35 |       activation: relu
 36 |       dim_feedforward: 2048
 37 |       dropout: 0.1
 38 |       pos_enc_at_attn: false
 39 |       self_attention:
 40 |         _target_: sam2.modeling.sam.transformer.RoPEAttention
 41 |         rope_theta: 10000.0
 42 |         feat_sizes: [32, 32]
 43 |         embedding_dim: 256
 44 |         num_heads: 1
 45 |         downsample_rate: 1
 46 |         dropout: 0.1
 47 |       d_model: 256
 48 |       pos_enc_at_cross_attn_keys: true
 49 |       pos_enc_at_cross_attn_queries: false
 50 |       cross_attention:
 51 |         _target_: sam2.modeling.sam.transformer.RoPEAttention
 52 |         rope_theta: 10000.0
 53 |         feat_sizes: [32, 32]
 54 |         rope_k_repeat: True
 55 |         embedding_dim: 256
 56 |         num_heads: 1
 57 |         downsample_rate: 1
 58 |         dropout: 0.1
 59 |         kv_in_dim: 64
 60 |     num_layers: 4
 61 | 
 62 |   memory_encoder:
 63 |       _target_: sam2.modeling.memory_encoder.MemoryEncoder
 64 |       out_dim: 64
 65 |       position_encoding:
 66 |         _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
 67 |         num_pos_feats: 64
 68 |         normalize: true
 69 |         scale: null
 70 |         temperature: 10000
 71 |       mask_downsampler:
 72 |         _target_: sam2.modeling.memory_encoder.MaskDownSampler
 73 |         kernel_size: 3
 74 |         stride: 2
 75 |         padding: 1
 76 |       fuser:
 77 |         _target_: sam2.modeling.memory_encoder.Fuser
 78 |         layer:
 79 |           _target_: sam2.modeling.memory_encoder.CXBlock
 80 |           dim: 256
 81 |           kernel_size: 7
 82 |           padding: 3
 83 |           layer_scale_init_value: 1e-6
 84 |           use_dwconv: True  # depth-wise convs
 85 |         num_layers: 2
 86 | 
 87 |   num_maskmem: 7
 88 |   image_size: 1024
 89 |   # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
 90 |   # SAM decoder
 91 |   sigmoid_scale_for_mem_enc: 20.0
 92 |   sigmoid_bias_for_mem_enc: -10.0
 93 |   use_mask_input_as_output_without_sam: true
 94 |   # Memory
 95 |   directly_add_no_mem_embed: true
 96 |   no_obj_embed_spatial: true
 97 |   # use high-resolution feature map in the SAM mask decoder
 98 |   use_high_res_features_in_sam: true
 99 |   # output 3 masks on the first click on initial conditioning frames
100 |   multimask_output_in_sam: true
101 |   # SAM heads
102 |   iou_prediction_use_sigmoid: True
103 |   # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
104 |   use_obj_ptrs_in_encoder: true
105 |   add_tpos_enc_to_obj_ptrs: true
106 |   proj_tpos_enc_in_obj_ptrs: true
107 |   use_signed_tpos_enc_to_obj_ptrs: true
108 |   only_obj_ptrs_in_the_past_for_eval: true
109 |   # object occlusion prediction
110 |   pred_obj_scores: true
111 |   pred_obj_scores_mlp: true
112 |   fixed_no_obj_ptr: true
113 |   # multimask tracking settings
114 |   multimask_output_for_tracking: true
115 |   use_multimask_token_for_obj_ptr: true
116 |   multimask_min_pt_num: 0
117 |   multimask_max_pt_num: 1
118 |   use_mlp_for_obj_ptr_proj: true
119 |   # Compilation flag
120 |   # HieraT does not currently support compilation, should always be set to False
121 |   compile_image_encoder: False
122 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/configs/sam2.1/sam2.1_hq_hiera_l.yaml:
--------------------------------------------------------------------------------
  1 | # @package _global_
  2 | 
  3 | # Model
  4 | model:
  5 |   _target_: sam2.modeling.sam2_hq_base.SAM2HQBase
  6 |   image_encoder:
  7 |     _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
  8 |     scalp: 1
  9 |     trunk:
 10 |       _target_: sam2.modeling.backbones.hieradet.Hiera
 11 |       embed_dim: 144
 12 |       num_heads: 2
 13 |       stages: [2, 6, 36, 4]
 14 |       global_att_blocks: [23, 33, 43]
 15 |       window_pos_embed_bkg_spatial_size: [7, 7]
 16 |       window_spec: [8, 4, 16, 8]
 17 |     neck:
 18 |       _target_: sam2.modeling.backbones.image_encoder.FpnNeck
 19 |       position_encoding:
 20 |         _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
 21 |         num_pos_feats: 256
 22 |         normalize: true
 23 |         scale: null
 24 |         temperature: 10000
 25 |       d_model: 256
 26 |       backbone_channel_list: [1152, 576, 288, 144]
 27 |       fpn_top_down_levels: [2, 3]  # output level 0 and 1 directly use the backbone features
 28 |       fpn_interp_model: nearest
 29 | 
 30 |   memory_attention:
 31 |     _target_: sam2.modeling.memory_attention.MemoryAttention
 32 |     d_model: 256
 33 |     pos_enc_at_input: true
 34 |     layer:
 35 |       _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
 36 |       activation: relu
 37 |       dim_feedforward: 2048
 38 |       dropout: 0.1
 39 |       pos_enc_at_attn: false
 40 |       self_attention:
 41 |         _target_: sam2.modeling.sam.transformer.RoPEAttention
 42 |         rope_theta: 10000.0
 43 |         feat_sizes: [32, 32]
 44 |         embedding_dim: 256
 45 |         num_heads: 1
 46 |         downsample_rate: 1
 47 |         dropout: 0.1
 48 |       d_model: 256
 49 |       pos_enc_at_cross_attn_keys: true
 50 |       pos_enc_at_cross_attn_queries: false
 51 |       cross_attention:
 52 |         _target_: sam2.modeling.sam.transformer.RoPEAttention
 53 |         rope_theta: 10000.0
 54 |         feat_sizes: [32, 32]
 55 |         rope_k_repeat: True
 56 |         embedding_dim: 256
 57 |         num_heads: 1
 58 |         downsample_rate: 1
 59 |         dropout: 0.1
 60 |         kv_in_dim: 64
 61 |     num_layers: 4
 62 | 
 63 |   memory_encoder:
 64 |       _target_: sam2.modeling.memory_encoder.MemoryEncoder
 65 |       out_dim: 64
 66 |       position_encoding:
 67 |         _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
 68 |         num_pos_feats: 64
 69 |         normalize: true
 70 |         scale: null
 71 |         temperature: 10000
 72 |       mask_downsampler:
 73 |         _target_: sam2.modeling.memory_encoder.MaskDownSampler
 74 |         kernel_size: 3
 75 |         stride: 2
 76 |         padding: 1
 77 |       fuser:
 78 |         _target_: sam2.modeling.memory_encoder.Fuser
 79 |         layer:
 80 |           _target_: sam2.modeling.memory_encoder.CXBlock
 81 |           dim: 256
 82 |           kernel_size: 7
 83 |           padding: 3
 84 |           layer_scale_init_value: 1e-6
 85 |           use_dwconv: True  # depth-wise convs
 86 |         num_layers: 2
 87 | 
 88 |   num_maskmem: 7
 89 |   image_size: 1024
 90 |   # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
 91 |   sigmoid_scale_for_mem_enc: 20.0
 92 |   sigmoid_bias_for_mem_enc: -10.0
 93 |   use_mask_input_as_output_without_sam: true
 94 |   # Memory
 95 |   directly_add_no_mem_embed: true
 96 |   no_obj_embed_spatial: true
 97 |   # use high-resolution feature map in the SAM mask decoder
 98 |   use_high_res_features_in_sam: true
 99 |   # output 3 masks on the first click on initial conditioning frames
100 |   multimask_output_in_sam: true
101 |   # SAM heads
102 |   iou_prediction_use_sigmoid: True
103 |   # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
104 |   use_obj_ptrs_in_encoder: true
105 |   add_tpos_enc_to_obj_ptrs: true
106 |   proj_tpos_enc_in_obj_ptrs: true
107 |   use_signed_tpos_enc_to_obj_ptrs: true
108 |   only_obj_ptrs_in_the_past_for_eval: true
109 |   # object occlusion prediction
110 |   pred_obj_scores: true
111 |   pred_obj_scores_mlp: true
112 |   fixed_no_obj_ptr: true
113 |   # multimask tracking settings
114 |   multimask_output_for_tracking: true
115 |   use_multimask_token_for_obj_ptr: true
116 |   multimask_min_pt_num: 0
117 |   multimask_max_pt_num: 1
118 |   use_mlp_for_obj_ptr_proj: true
119 |   # Compilation flag
120 |   compile_image_encoder: False
121 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | 
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/modeling/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | 
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/modeling/backbones/image_encoder.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from typing import List, Optional
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | 
 14 | class ImageEncoder(nn.Module):
 15 |     def __init__(
 16 |         self,
 17 |         trunk: nn.Module,
 18 |         neck: nn.Module,
 19 |         scalp: int = 0,
 20 |     ):
 21 |         super().__init__()
 22 |         self.trunk = trunk
 23 |         self.neck = neck
 24 |         self.scalp = scalp
 25 |         assert (
 26 |             self.trunk.channel_list == self.neck.backbone_channel_list
 27 |         ), f"Channel dims of trunk and neck do not match. Trunk: {self.trunk.channel_list}, neck: {self.neck.backbone_channel_list}"
 28 | 
 29 |     def forward(self, sample: torch.Tensor):
 30 |         # Forward through backbone
 31 |         features, pos = self.neck(self.trunk(sample))
 32 |         if self.scalp > 0:
 33 |             # Discard the lowest resolution features
 34 |             features, pos = features[: -self.scalp], pos[: -self.scalp]
 35 | 
 36 |         src = features[-1]
 37 |         output = {
 38 |             "vision_features": src,
 39 |             "vision_pos_enc": pos,
 40 |             "backbone_fpn": features,
 41 |         }
 42 |         return output
 43 | 
 44 | 
 45 | class FpnNeck(nn.Module):
 46 |     """
 47 |     A modified variant of Feature Pyramid Network (FPN) neck
 48 |     (we remove output conv and also do bicubic interpolation similar to ViT
 49 |     pos embed interpolation)
 50 |     """
 51 | 
 52 |     def __init__(
 53 |         self,
 54 |         position_encoding: nn.Module,
 55 |         d_model: int,
 56 |         backbone_channel_list: List[int],
 57 |         kernel_size: int = 1,
 58 |         stride: int = 1,
 59 |         padding: int = 0,
 60 |         fpn_interp_model: str = "bilinear",
 61 |         fuse_type: str = "sum",
 62 |         fpn_top_down_levels: Optional[List[int]] = None,
 63 |     ):
 64 |         """Initialize the neck
 65 |         :param trunk: the backbone
 66 |         :param position_encoding: the positional encoding to use
 67 |         :param d_model: the dimension of the model
 68 |         :param neck_norm: the normalization to use
 69 |         """
 70 |         super().__init__()
 71 |         self.position_encoding = position_encoding
 72 |         self.convs = nn.ModuleList()
 73 |         self.backbone_channel_list = backbone_channel_list
 74 |         self.d_model = d_model
 75 |         for dim in backbone_channel_list:
 76 |             current = nn.Sequential()
 77 |             current.add_module(
 78 |                 "conv",
 79 |                 nn.Conv2d(
 80 |                     in_channels=dim,
 81 |                     out_channels=d_model,
 82 |                     kernel_size=kernel_size,
 83 |                     stride=stride,
 84 |                     padding=padding,
 85 |                 ),
 86 |             )
 87 | 
 88 |             self.convs.append(current)
 89 |         self.fpn_interp_model = fpn_interp_model
 90 |         assert fuse_type in ["sum", "avg"]
 91 |         self.fuse_type = fuse_type
 92 | 
 93 |         # levels to have top-down features in its outputs
 94 |         # e.g. if fpn_top_down_levels is [2, 3], then only outputs of level 2 and 3
 95 |         # have top-down propagation, while outputs of level 0 and level 1 have only
 96 |         # lateral features from the same backbone level.
 97 |         if fpn_top_down_levels is None:
 98 |             # default is to have top-down features on all levels
 99 |             fpn_top_down_levels = range(len(self.convs))
100 |         self.fpn_top_down_levels = list(fpn_top_down_levels)
101 | 
102 |     def forward(self, xs: List[torch.Tensor]):
103 | 
104 |         out = [None] * len(self.convs)
105 |         pos = [None] * len(self.convs)
106 |         assert len(xs) == len(self.convs)
107 |         # fpn forward pass
108 |         # see https://github.com/facebookresearch/detectron2/blob/main/detectron2/modeling/backbone/fpn.py
109 |         prev_features = None
110 |         # forward in top-down order (from low to high resolution)
111 |         n = len(self.convs) - 1
112 |         for i in range(n, -1, -1):
113 |             x = xs[i]
114 |             lateral_features = self.convs[n - i](x)
115 |             if i in self.fpn_top_down_levels and prev_features is not None:
116 |                 top_down_features = F.interpolate(
117 |                     prev_features.to(dtype=torch.float32),
118 |                     scale_factor=2.0,
119 |                     mode=self.fpn_interp_model,
120 |                     align_corners=(
121 |                         None if self.fpn_interp_model == "nearest" else False
122 |                     ),
123 |                     antialias=False,
124 |                 )
125 |                 prev_features = lateral_features + top_down_features
126 |                 if self.fuse_type == "avg":
127 |                     prev_features /= 2
128 |             else:
129 |                 prev_features = lateral_features
130 |             x_out = prev_features
131 |             out[i] = x_out
132 |             pos[i] = self.position_encoding(x_out).to(x_out.dtype)
133 | 
134 |         return out, pos
135 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/modeling/backbones/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | """Some utilities for backbones, in particular for windowing"""
 8 | 
 9 | from typing import Tuple
10 | 
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | 
15 | 
16 | def window_partition(x, window_size):
17 |     """
18 |     Partition into non-overlapping windows with padding if needed.
19 |     Args:
20 |         x (tensor): input tokens with [B, H, W, C].
21 |         window_size (int): window size.
22 |     Returns:
23 |         windows: windows after partition with [B * num_windows, window_size, window_size, C].
24 |         (Hp, Wp): padded height and width before partition
25 |     """
26 |     B, H, W, C = x.shape
27 | 
28 |     pad_h = (window_size - H % window_size) % window_size
29 |     pad_w = (window_size - W % window_size) % window_size
30 |     if pad_h > 0 or pad_w > 0:
31 |         x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h))
32 |     Hp, Wp = H + pad_h, W + pad_w
33 | 
34 |     x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C)
35 |     windows = (
36 |         x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
37 |     )
38 |     return windows, (Hp, Wp)
39 | 
40 | 
41 | def window_unpartition(windows, window_size, pad_hw, hw):
42 |     """
43 |     Window unpartition into original sequences and removing padding.
44 |     Args:
45 |         x (tensor): input tokens with [B * num_windows, window_size, window_size, C].
46 |         window_size (int): window size.
47 |         pad_hw (Tuple): padded height and width (Hp, Wp).
48 |         hw (Tuple): original height and width (H, W) before padding.
49 |     Returns:
50 |         x: unpartitioned sequences with [B, H, W, C].
51 |     """
52 |     Hp, Wp = pad_hw
53 |     H, W = hw
54 |     B = windows.shape[0] // (Hp * Wp // window_size // window_size)
55 |     x = windows.view(
56 |         B, Hp // window_size, Wp // window_size, window_size, window_size, -1
57 |     )
58 |     x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, Hp, Wp, -1)
59 | 
60 |     if Hp > H or Wp > W:
61 |         x = x[:, :H, :W, :].contiguous()
62 |     return x
63 | 
64 | 
65 | class PatchEmbed(nn.Module):
66 |     """
67 |     Image to Patch Embedding.
68 |     """
69 | 
70 |     def __init__(
71 |         self,
72 |         kernel_size: Tuple[int, ...] = (7, 7),
73 |         stride: Tuple[int, ...] = (4, 4),
74 |         padding: Tuple[int, ...] = (3, 3),
75 |         in_chans: int = 3,
76 |         embed_dim: int = 768,
77 |     ):
78 |         """
79 |         Args:
80 |             kernel_size (Tuple): kernel size of the projection layer.
81 |             stride (Tuple): stride of the projection layer.
82 |             padding (Tuple): padding size of the projection layer.
83 |             in_chans (int): Number of input image channels.
84 |             embed_dim (int):  embed_dim (int): Patch embedding dimension.
85 |         """
86 |         super().__init__()
87 |         self.proj = nn.Conv2d(
88 |             in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding
89 |         )
90 | 
91 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
92 |         x = self.proj(x)
93 |         # B C H W -> B H W C
94 |         x = x.permute(0, 2, 3, 1)
95 |         return x
96 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/modeling/sam/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | 
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | 
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/sam-hq2/sam2/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import warnings
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | from torchvision.transforms import Normalize, Resize, ToTensor
 13 | 
 14 | 
 15 | class SAM2Transforms(nn.Module):
 16 |     def __init__(
 17 |         self, resolution, mask_threshold, max_hole_area=0.0, max_sprinkle_area=0.0
 18 |     ):
 19 |         """
 20 |         Transforms for SAM2.
 21 |         """
 22 |         super().__init__()
 23 |         self.resolution = resolution
 24 |         self.mask_threshold = mask_threshold
 25 |         self.max_hole_area = max_hole_area
 26 |         self.max_sprinkle_area = max_sprinkle_area
 27 |         self.mean = [0.485, 0.456, 0.406]
 28 |         self.std = [0.229, 0.224, 0.225]
 29 |         self.to_tensor = ToTensor()
 30 |         self.transforms = torch.jit.script(
 31 |             nn.Sequential(
 32 |                 Resize((self.resolution, self.resolution)),
 33 |                 Normalize(self.mean, self.std),
 34 |             )
 35 |         )
 36 | 
 37 |     def __call__(self, x):
 38 |         x = self.to_tensor(x)
 39 |         return self.transforms(x)
 40 | 
 41 |     def forward_batch(self, img_list):
 42 |         img_batch = [self.transforms(self.to_tensor(img)) for img in img_list]
 43 |         img_batch = torch.stack(img_batch, dim=0)
 44 |         return img_batch
 45 | 
 46 |     def transform_coords(
 47 |         self, coords: torch.Tensor, normalize=False, orig_hw=None
 48 |     ) -> torch.Tensor:
 49 |         """
 50 |         Expects a torch tensor with length 2 in the last dimension. The coordinates can be in absolute image or normalized coordinates,
 51 |         If the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
 52 | 
 53 |         Returns
 54 |             Un-normalized coordinates in the range of [0, 1] which is expected by the SAM2 model.
 55 |         """
 56 |         if normalize:
 57 |             assert orig_hw is not None
 58 |             h, w = orig_hw
 59 |             coords = coords.clone()
 60 |             coords[..., 0] = coords[..., 0] / w
 61 |             coords[..., 1] = coords[..., 1] / h
 62 | 
 63 |         coords = coords * self.resolution  # unnormalize coords
 64 |         return coords
 65 | 
 66 |     def transform_boxes(
 67 |         self, boxes: torch.Tensor, normalize=False, orig_hw=None
 68 |     ) -> torch.Tensor:
 69 |         """
 70 |         Expects a tensor of shape Bx4. The coordinates can be in absolute image or normalized coordinates,
 71 |         if the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
 72 |         """
 73 |         boxes = self.transform_coords(boxes.reshape(-1, 2, 2), normalize, orig_hw)
 74 |         return boxes
 75 | 
 76 |     def postprocess_masks(self, masks: torch.Tensor, orig_hw) -> torch.Tensor:
 77 |         """
 78 |         Perform PostProcessing on output masks.
 79 |         """
 80 |         from sam2.utils.misc import get_connected_components
 81 | 
 82 |         masks = masks.float()
 83 |         input_masks = masks
 84 |         mask_flat = masks.flatten(0, 1).unsqueeze(1)  # flatten as 1-channel image
 85 |         try:
 86 |             if self.max_hole_area > 0:
 87 |                 # Holes are those connected components in background with area <= self.fill_hole_area
 88 |                 # (background regions are those with mask scores <= self.mask_threshold)
 89 |                 labels, areas = get_connected_components(
 90 |                     mask_flat <= self.mask_threshold
 91 |                 )
 92 |                 is_hole = (labels > 0) & (areas <= self.max_hole_area)
 93 |                 is_hole = is_hole.reshape_as(masks)
 94 |                 # We fill holes with a small positive mask score (10.0) to change them to foreground.
 95 |                 masks = torch.where(is_hole, self.mask_threshold + 10.0, masks)
 96 | 
 97 |             if self.max_sprinkle_area > 0:
 98 |                 labels, areas = get_connected_components(
 99 |                     mask_flat > self.mask_threshold
100 |                 )
101 |                 is_hole = (labels > 0) & (areas <= self.max_sprinkle_area)
102 |                 is_hole = is_hole.reshape_as(masks)
103 |                 # We fill holes with negative mask score (-10.0) to change them to background.
104 |                 masks = torch.where(is_hole, self.mask_threshold - 10.0, masks)
105 |         except Exception as e:
106 |             # Skip the post-processing step if the CUDA kernel fails
107 |             warnings.warn(
108 |                 f"{e}\n\nSkipping the post-processing step due to the error above. You can "
109 |                 "still use SAM 2 and it's OK to ignore the error above, although some post-processing "
110 |                 "functionality may be limited (which doesn't affect the results in most cases; see "
111 |                 "https://github.com/facebookresearch/sam2/blob/main/INSTALL.md).",
112 |                 category=UserWarning,
113 |                 stacklevel=2,
114 |             )
115 |             masks = input_masks
116 | 
117 |         masks = F.interpolate(masks, orig_hw, mode="bilinear", align_corners=False)
118 |         return masks
119 | 


--------------------------------------------------------------------------------
/sam-hq2/setup.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | import os
  7 | 
  8 | from setuptools import find_packages, setup
  9 | 
 10 | # Package metadata
 11 | NAME = "HQ-SAM-2"
 12 | VERSION = "1.0"
 13 | DESCRIPTION = "SAM-HQ 2: Segment Anything in High Quality for Images and Videos"
 14 | URL = "https://github.com/SysCV/sam-hq"
 15 | AUTHOR = "HQ-SAM Team"
 16 | AUTHOR_EMAIL = "None"
 17 | LICENSE = "Apache 2.0"
 18 | 
 19 | # Read the contents of README file
 20 | with open("README.md", "r", encoding="utf-8") as f:
 21 |     LONG_DESCRIPTION = f.read()
 22 | 
 23 | # Required dependencies
 24 | REQUIRED_PACKAGES = [
 25 |     "torch>=2.3.1",
 26 |     "torchvision>=0.18.1",
 27 |     "numpy>=1.24.4",
 28 |     "tqdm>=4.66.1",
 29 |     "hydra-core>=1.3.2",
 30 |     "iopath>=0.1.10",
 31 |     "pillow>=9.4.0",
 32 |     "matplotlib>=3.9.1",
 33 |     "opencv-python>=4.7.0",
 34 | ]
 35 | 
 36 | EXTRA_PACKAGES = {
 37 |     "notebooks": [
 38 |         "matplotlib>=3.9.1",
 39 |         "jupyter>=1.0.0",
 40 |         "opencv-python>=4.7.0",
 41 |         "eva-decord>=0.6.1",
 42 |     ],
 43 |     "interactive-demo": [
 44 |         "Flask>=3.0.3",
 45 |         "Flask-Cors>=5.0.0",
 46 |         "av>=13.0.0",
 47 |         "dataclasses-json>=0.6.7",
 48 |         "eva-decord>=0.6.1",
 49 |         "gunicorn>=23.0.0",
 50 |         "imagesize>=1.4.1",
 51 |         "pycocotools>=2.0.8",
 52 |         "strawberry-graphql>=0.239.2",
 53 |     ],
 54 |     "dev": [
 55 |         "black==24.2.0",
 56 |         "usort==1.0.2",
 57 |         "ufmt==2.0.0b2",
 58 |         "fvcore>=0.1.5.post20221221",
 59 |         "pandas>=2.2.2",
 60 |         "scikit-image>=0.24.0",
 61 |         "tensorboard>=2.17.0",
 62 |         "pycocotools>=2.0.8",
 63 |         "tensordict>=0.5.0",
 64 |         "opencv-python>=4.7.0",
 65 |         "submitit>=1.5.1",
 66 |     ],
 67 | }
 68 | 
 69 | # By default, we also build the SAM 2 CUDA extension.
 70 | # You may turn off CUDA build with `export SAM2_BUILD_CUDA=0`.
 71 | BUILD_CUDA = os.getenv("SAM2_BUILD_CUDA", "1") == "1"
 72 | # By default, we allow SAM 2 installation to proceed even with build errors.
 73 | # You may force stopping on errors with `export SAM2_BUILD_ALLOW_ERRORS=0`.
 74 | BUILD_ALLOW_ERRORS = os.getenv("SAM2_BUILD_ALLOW_ERRORS", "1") == "1"
 75 | 
 76 | # Catch and skip errors during extension building and print a warning message
 77 | # (note that this message only shows up under verbose build mode
 78 | # "pip install -v -e ." or "python setup.py build_ext -v")
 79 | CUDA_ERROR_MSG = (
 80 |     "{}\n\n"
 81 |     "Failed to build the SAM 2 CUDA extension due to the error above. "
 82 |     "You can still use SAM 2 and it's OK to ignore the error above, although some "
 83 |     "post-processing functionality may be limited (which doesn't affect the results in most cases; "
 84 |     "(see https://github.com/facebookresearch/sam2/blob/main/INSTALL.md).\n"
 85 | )
 86 | 
 87 | 
 88 | def get_extensions():
 89 |     if not BUILD_CUDA:
 90 |         return []
 91 | 
 92 |     try:
 93 |         from torch.utils.cpp_extension import CUDAExtension
 94 | 
 95 |         srcs = ["sam2/csrc/connected_components.cu"]
 96 |         compile_args = {
 97 |             "cxx": [],
 98 |             "nvcc": [
 99 |                 "-DCUDA_HAS_FP16=1",
100 |                 "-D__CUDA_NO_HALF_OPERATORS__",
101 |                 "-D__CUDA_NO_HALF_CONVERSIONS__",
102 |                 "-D__CUDA_NO_HALF2_OPERATORS__",
103 |             ],
104 |         }
105 |         ext_modules = [CUDAExtension("sam2._C", srcs, extra_compile_args=compile_args)]
106 |     except Exception as e:
107 |         if BUILD_ALLOW_ERRORS:
108 |             print(CUDA_ERROR_MSG.format(e))
109 |             ext_modules = []
110 |         else:
111 |             raise e
112 | 
113 |     return ext_modules
114 | 
115 | 
116 | try:
117 |     from torch.utils.cpp_extension import BuildExtension
118 | 
119 |     class BuildExtensionIgnoreErrors(BuildExtension):
120 | 
121 |         def finalize_options(self):
122 |             try:
123 |                 super().finalize_options()
124 |             except Exception as e:
125 |                 print(CUDA_ERROR_MSG.format(e))
126 |                 self.extensions = []
127 | 
128 |         def build_extensions(self):
129 |             try:
130 |                 super().build_extensions()
131 |             except Exception as e:
132 |                 print(CUDA_ERROR_MSG.format(e))
133 |                 self.extensions = []
134 | 
135 |         def get_ext_filename(self, ext_name):
136 |             try:
137 |                 return super().get_ext_filename(ext_name)
138 |             except Exception as e:
139 |                 print(CUDA_ERROR_MSG.format(e))
140 |                 self.extensions = []
141 |                 return "_C.so"
142 | 
143 |     cmdclass = {
144 |         "build_ext": (
145 |             BuildExtensionIgnoreErrors.with_options(no_python_abi_suffix=True)
146 |             if BUILD_ALLOW_ERRORS
147 |             else BuildExtension.with_options(no_python_abi_suffix=True)
148 |         )
149 |     }
150 | except Exception as e:
151 |     cmdclass = {}
152 |     if BUILD_ALLOW_ERRORS:
153 |         print(CUDA_ERROR_MSG.format(e))
154 |     else:
155 |         raise e
156 | 
157 | 
158 | # Setup configuration
159 | setup(
160 |     name=NAME,
161 |     version=VERSION,
162 |     description=DESCRIPTION,
163 |     long_description=LONG_DESCRIPTION,
164 |     long_description_content_type="text/markdown",
165 |     url=URL,
166 |     author=AUTHOR,
167 |     author_email=AUTHOR_EMAIL,
168 |     license=LICENSE,
169 |     packages=find_packages(exclude="notebooks"),
170 |     include_package_data=True,
171 |     install_requires=REQUIRED_PACKAGES,
172 |     extras_require=EXTRA_PACKAGES,
173 |     python_requires=">=3.10.0",
174 |     ext_modules=get_extensions(),
175 |     cmdclass=cmdclass,
176 | )
177 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/.asset/COCO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/COCO.png


--------------------------------------------------------------------------------
/seginw/GroundingDINO/.asset/GD_GLIGEN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/GD_GLIGEN.png


--------------------------------------------------------------------------------
/seginw/GroundingDINO/.asset/GD_SD.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/GD_SD.png


--------------------------------------------------------------------------------
/seginw/GroundingDINO/.asset/ODinW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/ODinW.png


--------------------------------------------------------------------------------
/seginw/GroundingDINO/.asset/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/arch.png


--------------------------------------------------------------------------------
/seginw/GroundingDINO/.asset/cats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/cats.png


--------------------------------------------------------------------------------
/seginw/GroundingDINO/.asset/hero_figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/.asset/hero_figure.png


--------------------------------------------------------------------------------
/seginw/GroundingDINO/demo/gradio_app.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from functools import partial
  3 | import cv2
  4 | import requests
  5 | import os
  6 | from io import BytesIO
  7 | from PIL import Image
  8 | import numpy as np
  9 | from pathlib import Path
 10 | 
 11 | 
 12 | import warnings
 13 | 
 14 | import torch
 15 | 
 16 | # prepare the environment
 17 | os.system("python setup.py build develop --user")
 18 | os.system("pip install packaging==21.3")
 19 | os.system("pip install gradio")
 20 | 
 21 | 
 22 | warnings.filterwarnings("ignore")
 23 | 
 24 | import gradio as gr
 25 | 
 26 | from groundingdino.models import build_model
 27 | from groundingdino.util.slconfig import SLConfig
 28 | from groundingdino.util.utils import clean_state_dict
 29 | from groundingdino.util.inference import annotate, load_image, predict
 30 | import groundingdino.datasets.transforms as T
 31 | 
 32 | from huggingface_hub import hf_hub_download
 33 | 
 34 | 
 35 | 
 36 | # Use this command for evaluate the GLIP-T model
 37 | config_file = "groundingdino/config/GroundingDINO_SwinT_OGC.py"
 38 | ckpt_repo_id = "ShilongLiu/GroundingDINO"
 39 | ckpt_filenmae = "groundingdino_swint_ogc.pth"
 40 | 
 41 | 
 42 | def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
 43 |     args = SLConfig.fromfile(model_config_path) 
 44 |     model = build_model(args)
 45 |     args.device = device
 46 | 
 47 |     cache_file = hf_hub_download(repo_id=repo_id, filename=filename)
 48 |     checkpoint = torch.load(cache_file, map_location='cpu')
 49 |     log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
 50 |     print("Model loaded from {} \n => {}".format(cache_file, log))
 51 |     _ = model.eval()
 52 |     return model    
 53 | 
 54 | def image_transform_grounding(init_image):
 55 |     transform = T.Compose([
 56 |         T.RandomResize([800], max_size=1333),
 57 |         T.ToTensor(),
 58 |         T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 59 |     ])
 60 |     image, _ = transform(init_image, None) # 3, h, w
 61 |     return init_image, image
 62 | 
 63 | def image_transform_grounding_for_vis(init_image):
 64 |     transform = T.Compose([
 65 |         T.RandomResize([800], max_size=1333),
 66 |     ])
 67 |     image, _ = transform(init_image, None) # 3, h, w
 68 |     return image
 69 | 
 70 | model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
 71 | 
 72 | def run_grounding(input_image, grounding_caption, box_threshold, text_threshold):
 73 |     init_image = input_image.convert("RGB")
 74 |     original_size = init_image.size
 75 | 
 76 |     _, image_tensor = image_transform_grounding(init_image)
 77 |     image_pil: Image = image_transform_grounding_for_vis(init_image)
 78 | 
 79 |     # run grounidng
 80 |     boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
 81 |     annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
 82 |     image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
 83 | 
 84 | 
 85 |     return image_with_box
 86 | 
 87 | if __name__ == "__main__":
 88 | 
 89 |     parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True)
 90 |     parser.add_argument("--debug", action="store_true", help="using debug mode")
 91 |     parser.add_argument("--share", action="store_true", help="share the app")
 92 |     args = parser.parse_args()
 93 | 
 94 |     block = gr.Blocks().queue()
 95 |     with block:
 96 |         gr.Markdown("# [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO)")
 97 |         gr.Markdown("### Open-World Detection with Grounding DINO")
 98 | 
 99 |         with gr.Row():
100 |             with gr.Column():
101 |                 input_image = gr.Image(source='upload', type="pil")
102 |                 grounding_caption = gr.Textbox(label="Detection Prompt")
103 |                 run_button = gr.Button(label="Run")
104 |                 with gr.Accordion("Advanced options", open=False):
105 |                     box_threshold = gr.Slider(
106 |                         label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
107 |                     )
108 |                     text_threshold = gr.Slider(
109 |                         label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
110 |                     )
111 | 
112 |             with gr.Column():
113 |                 gallery = gr.outputs.Image(
114 |                     type="pil",
115 |                     # label="grounding results"
116 |                 ).style(full_width=True, full_height=True)
117 |                 # gallery = gr.Gallery(label="Generated images", show_label=False).style(
118 |                 #         grid=[1], height="auto", container=True, full_width=True, full_height=True)
119 | 
120 |         run_button.click(fn=run_grounding, inputs=[
121 |                         input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
122 | 
123 | 
124 |     block.launch(server_name='0.0.0.0', server_port=7579, debug=args.debug, share=args.share)
125 | 
126 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/groundingdino/__init__.py


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py:
--------------------------------------------------------------------------------
 1 | batch_size = 1
 2 | modelname = "groundingdino"
 3 | backbone = "swin_B_384_22k"
 4 | position_embedding = "sine"
 5 | pe_temperatureH = 20
 6 | pe_temperatureW = 20
 7 | return_interm_indices = [1, 2, 3]
 8 | backbone_freeze_keywords = None
 9 | enc_layers = 6
10 | dec_layers = 6
11 | pre_norm = False
12 | dim_feedforward = 2048
13 | hidden_dim = 256
14 | dropout = 0.0
15 | nheads = 8
16 | num_queries = 900
17 | query_dim = 4
18 | num_patterns = 0
19 | num_feature_levels = 4
20 | enc_n_points = 4
21 | dec_n_points = 4
22 | two_stage_type = "standard"
23 | two_stage_bbox_embed_share = False
24 | two_stage_class_embed_share = False
25 | transformer_activation = "relu"
26 | dec_pred_bbox_embed_share = True
27 | dn_box_noise_scale = 1.0
28 | dn_label_noise_ratio = 0.5
29 | dn_label_coef = 1.0
30 | dn_bbox_coef = 1.0
31 | embed_init_tgt = True
32 | dn_labelbook_size = 2000
33 | max_text_len = 256
34 | text_encoder_type = "bert-base-uncased"
35 | use_text_enhancer = True
36 | use_fusion_layer = True
37 | use_checkpoint = True
38 | use_transformer_ckpt = True
39 | use_text_cross_attention = True
40 | text_dropout = 0.0
41 | fusion_dropout = 0.0
42 | fusion_droppath = 0.1
43 | sub_sentence_present = True
44 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py:
--------------------------------------------------------------------------------
 1 | batch_size = 1
 2 | modelname = "groundingdino"
 3 | backbone = "swin_T_224_1k"
 4 | position_embedding = "sine"
 5 | pe_temperatureH = 20
 6 | pe_temperatureW = 20
 7 | return_interm_indices = [1, 2, 3]
 8 | backbone_freeze_keywords = None
 9 | enc_layers = 6
10 | dec_layers = 6
11 | pre_norm = False
12 | dim_feedforward = 2048
13 | hidden_dim = 256
14 | dropout = 0.0
15 | nheads = 8
16 | num_queries = 900
17 | query_dim = 4
18 | num_patterns = 0
19 | num_feature_levels = 4
20 | enc_n_points = 4
21 | dec_n_points = 4
22 | two_stage_type = "standard"
23 | two_stage_bbox_embed_share = False
24 | two_stage_class_embed_share = False
25 | transformer_activation = "relu"
26 | dec_pred_bbox_embed_share = True
27 | dn_box_noise_scale = 1.0
28 | dn_label_noise_ratio = 0.5
29 | dn_label_coef = 1.0
30 | dn_bbox_coef = 1.0
31 | embed_init_tgt = True
32 | dn_labelbook_size = 2000
33 | max_text_len = 256
34 | text_encoder_type = "bert-base-uncased"
35 | use_text_enhancer = True
36 | use_fusion_layer = True
37 | use_checkpoint = True
38 | use_transformer_ckpt = True
39 | use_text_cross_attention = True
40 | text_dropout = 0.0
41 | fusion_dropout = 0.0
42 | fusion_droppath = 0.1
43 | sub_sentence_present = True
44 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/seginw/GroundingDINO/groundingdino/datasets/__init__.py


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/models/GroundingDINO/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Grounding DINO
 3 | # url: https://github.com/IDEA-Research/GroundingDINO
 4 | # Copyright (c) 2023 IDEA. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------
 7 | # Conditional DETR
 8 | # Copyright (c) 2021 Microsoft. All Rights Reserved.
 9 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
10 | # ------------------------------------------------------------------------
11 | # Copied from DETR (https://github.com/facebookresearch/detr)
12 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
13 | # ------------------------------------------------------------------------
14 | 
15 | from .groundingdino import build_groundingdino
16 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/models/GroundingDINO/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbone import build_backbone
2 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | 
13 | #include "ms_deform_attn_cpu.h"
14 | 
15 | #ifdef WITH_CUDA
16 | #include "ms_deform_attn_cuda.h"
17 | #endif
18 | 
19 | namespace groundingdino {
20 | 
21 | at::Tensor
22 | ms_deform_attn_forward(
23 |     const at::Tensor &value, 
24 |     const at::Tensor &spatial_shapes,
25 |     const at::Tensor &level_start_index,
26 |     const at::Tensor &sampling_loc,
27 |     const at::Tensor &attn_weight,
28 |     const int im2col_step)
29 | {
30 |     if (value.type().is_cuda())
31 |     {
32 | #ifdef WITH_CUDA
33 |         return ms_deform_attn_cuda_forward(
34 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
35 | #else
36 |         AT_ERROR("Not compiled with GPU support");
37 | #endif
38 |     }
39 |     AT_ERROR("Not implemented on the CPU");
40 | }
41 | 
42 | std::vector<at::Tensor>
43 | ms_deform_attn_backward(
44 |     const at::Tensor &value, 
45 |     const at::Tensor &spatial_shapes,
46 |     const at::Tensor &level_start_index,
47 |     const at::Tensor &sampling_loc,
48 |     const at::Tensor &attn_weight,
49 |     const at::Tensor &grad_output,
50 |     const int im2col_step)
51 | {
52 |     if (value.type().is_cuda())
53 |     {
54 | #ifdef WITH_CUDA
55 |         return ms_deform_attn_cuda_backward(
56 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
57 | #else
58 |         AT_ERROR("Not compiled with GPU support");
59 | #endif
60 |     }
61 |     AT_ERROR("Not implemented on the CPU");
62 | }
63 | 
64 | } // namespace groundingdino


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #include <vector>
12 | 
13 | #include <ATen/ATen.h>
14 | #include <ATen/cuda/CUDAContext.h>
15 | 
16 | namespace groundingdino {
17 | 
18 | at::Tensor
19 | ms_deform_attn_cpu_forward(
20 |     const at::Tensor &value, 
21 |     const at::Tensor &spatial_shapes,
22 |     const at::Tensor &level_start_index,
23 |     const at::Tensor &sampling_loc,
24 |     const at::Tensor &attn_weight,
25 |     const int im2col_step)
26 | {
27 |     AT_ERROR("Not implement on cpu");
28 | }
29 | 
30 | std::vector<at::Tensor>
31 | ms_deform_attn_cpu_backward(
32 |     const at::Tensor &value, 
33 |     const at::Tensor &spatial_shapes,
34 |     const at::Tensor &level_start_index,
35 |     const at::Tensor &sampling_loc,
36 |     const at::Tensor &attn_weight,
37 |     const at::Tensor &grad_output,
38 |     const int im2col_step)
39 | {
40 |     AT_ERROR("Not implement on cpu");
41 | }
42 | 
43 | } // namespace groundingdino
44 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | namespace groundingdino {
15 | 
16 | at::Tensor
17 | ms_deform_attn_cpu_forward(
18 |     const at::Tensor &value, 
19 |     const at::Tensor &spatial_shapes,
20 |     const at::Tensor &level_start_index,
21 |     const at::Tensor &sampling_loc,
22 |     const at::Tensor &attn_weight,
23 |     const int im2col_step);
24 | 
25 | std::vector<at::Tensor>
26 | ms_deform_attn_cpu_backward(
27 |     const at::Tensor &value, 
28 |     const at::Tensor &spatial_shapes,
29 |     const at::Tensor &level_start_index,
30 |     const at::Tensor &sampling_loc,
31 |     const at::Tensor &attn_weight,
32 |     const at::Tensor &grad_output,
33 |     const int im2col_step);
34 | 
35 | } // namespace groundingdino
36 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cuda.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/extension.h>
13 | 
14 | namespace groundingdino {
15 | 
16 | at::Tensor ms_deform_attn_cuda_forward(
17 |     const at::Tensor &value, 
18 |     const at::Tensor &spatial_shapes,
19 |     const at::Tensor &level_start_index,
20 |     const at::Tensor &sampling_loc,
21 |     const at::Tensor &attn_weight,
22 |     const int im2col_step);
23 | 
24 | std::vector<at::Tensor> ms_deform_attn_cuda_backward(
25 |     const at::Tensor &value, 
26 |     const at::Tensor &spatial_shapes,
27 |     const at::Tensor &level_start_index,
28 |     const at::Tensor &sampling_loc,
29 |     const at::Tensor &attn_weight,
30 |     const at::Tensor &grad_output,
31 |     const int im2col_step);
32 | 
33 | } // namespace groundingdino


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/cuda_version.cu:
--------------------------------------------------------------------------------
1 | #include <cuda_runtime_api.h>
2 | 
3 | namespace groundingdino {
4 | int get_cudart_version() {
5 |   return CUDART_VERSION;
6 | }
7 | } // namespace groundingdino
8 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/models/GroundingDINO/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | #include "MsDeformAttn/ms_deform_attn.h"
 4 | 
 5 | namespace groundingdino {
 6 | 
 7 | #ifdef WITH_CUDA
 8 | extern int get_cudart_version();
 9 | #endif
10 | 
11 | std::string get_cuda_version() {
12 | #ifdef WITH_CUDA
13 |   std::ostringstream oss;
14 | 
15 |   // copied from
16 |   // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
17 |   auto printCudaStyleVersion = [&](int v) {
18 |     oss << (v / 1000) << "." << (v / 10 % 100);
19 |     if (v % 10 != 0) {
20 |       oss << "." << (v % 10);
21 |     }
22 |   };
23 |   printCudaStyleVersion(get_cudart_version());
24 |   return oss.str();
25 | #else
26 |   return std::string("not available");
27 | #endif
28 | }
29 | 
30 | // similar to
31 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
32 | std::string get_compiler_version() {
33 |   std::ostringstream ss;
34 | #if defined(__GNUC__)
35 | #ifndef __clang__
36 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
37 | #endif
38 | #endif
39 | 
40 | #if defined(__clang_major__)
41 |   {
42 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
43 |        << __clang_patchlevel__;
44 |   }
45 | #endif
46 | 
47 | #if defined(_MSC_VER)
48 |   { ss << "MSVC " << _MSC_FULL_VER; }
49 | #endif
50 |   return ss.str();
51 | }
52 | 
53 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
54 |   m.def("ms_deform_attn_forward", &ms_deform_attn_forward, "ms_deform_attn_forward");
55 |   m.def("ms_deform_attn_backward", &ms_deform_attn_backward, "ms_deform_attn_backward");
56 | }
57 | 
58 | } // namespace groundingdino


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/models/GroundingDINO/transformer_vanilla.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Grounding DINO
  3 | # url: https://github.com/IDEA-Research/GroundingDINO
  4 | # Copyright (c) 2023 IDEA. All Rights Reserved.
  5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
  6 | # ------------------------------------------------------------------------
  7 | # Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved
  8 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  9 | """
 10 | DETR Transformer class.
 11 | 
 12 | Copy-paste from torch.nn.Transformer with modifications:
 13 |     * positional encodings are passed in MHattention
 14 |     * extra LN at the end of encoder is removed
 15 |     * decoder returns a stack of activations from all decoding layers
 16 | """
 17 | from typing import Optional
 18 | 
 19 | import torch
 20 | import torch.nn.functional as F
 21 | from torch import Tensor, nn
 22 | 
 23 | from .utils import (
 24 |     MLP,
 25 |     _get_activation_fn,
 26 |     _get_clones,
 27 |     gen_encoder_output_proposals,
 28 |     gen_sineembed_for_position,
 29 |     sigmoid_focal_loss,
 30 | )
 31 | 
 32 | 
 33 | class TextTransformer(nn.Module):
 34 |     def __init__(self, num_layers, d_model=256, nheads=8, dim_feedforward=2048, dropout=0.1):
 35 |         super().__init__()
 36 |         self.num_layers = num_layers
 37 |         self.d_model = d_model
 38 |         self.nheads = nheads
 39 |         self.dim_feedforward = dim_feedforward
 40 |         self.norm = None
 41 | 
 42 |         single_encoder_layer = TransformerEncoderLayer(
 43 |             d_model=d_model, nhead=nheads, dim_feedforward=dim_feedforward, dropout=dropout
 44 |         )
 45 |         self.layers = _get_clones(single_encoder_layer, num_layers)
 46 | 
 47 |     def forward(self, memory_text: torch.Tensor, text_attention_mask: torch.Tensor):
 48 |         """
 49 | 
 50 |         Args:
 51 |             text_attention_mask: bs, num_token
 52 |             memory_text: bs, num_token, d_model
 53 | 
 54 |         Raises:
 55 |             RuntimeError: _description_
 56 | 
 57 |         Returns:
 58 |             output: bs, num_token, d_model
 59 |         """
 60 | 
 61 |         output = memory_text.transpose(0, 1)
 62 | 
 63 |         for layer in self.layers:
 64 |             output = layer(output, src_key_padding_mask=text_attention_mask)
 65 | 
 66 |         if self.norm is not None:
 67 |             output = self.norm(output)
 68 | 
 69 |         return output.transpose(0, 1)
 70 | 
 71 | 
 72 | class TransformerEncoderLayer(nn.Module):
 73 |     def __init__(
 74 |         self,
 75 |         d_model,
 76 |         nhead,
 77 |         dim_feedforward=2048,
 78 |         dropout=0.1,
 79 |         activation="relu",
 80 |         normalize_before=False,
 81 |     ):
 82 |         super().__init__()
 83 |         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
 84 |         # Implementation of Feedforward model
 85 |         self.linear1 = nn.Linear(d_model, dim_feedforward)
 86 |         self.dropout = nn.Dropout(dropout)
 87 |         self.linear2 = nn.Linear(dim_feedforward, d_model)
 88 | 
 89 |         self.norm1 = nn.LayerNorm(d_model)
 90 |         self.norm2 = nn.LayerNorm(d_model)
 91 |         self.dropout1 = nn.Dropout(dropout)
 92 |         self.dropout2 = nn.Dropout(dropout)
 93 | 
 94 |         self.activation = _get_activation_fn(activation)
 95 |         self.normalize_before = normalize_before
 96 |         self.nhead = nhead
 97 | 
 98 |     def with_pos_embed(self, tensor, pos: Optional[Tensor]):
 99 |         return tensor if pos is None else tensor + pos
100 | 
101 |     def forward(
102 |         self,
103 |         src,
104 |         src_mask: Optional[Tensor] = None,
105 |         src_key_padding_mask: Optional[Tensor] = None,
106 |         pos: Optional[Tensor] = None,
107 |     ):
108 |         # repeat attn mask
109 |         if src_mask.dim() == 3 and src_mask.shape[0] == src.shape[1]:
110 |             # bs, num_q, num_k
111 |             src_mask = src_mask.repeat(self.nhead, 1, 1)
112 | 
113 |         q = k = self.with_pos_embed(src, pos)
114 | 
115 |         src2 = self.self_attn(q, k, value=src, attn_mask=src_mask)[0]
116 | 
117 |         # src2 = self.self_attn(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
118 |         src = src + self.dropout1(src2)
119 |         src = self.norm1(src)
120 |         src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
121 |         src = src + self.dropout2(src2)
122 |         src = self.norm2(src)
123 |         return src
124 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Grounding DINO
 3 | # url: https://github.com/IDEA-Research/GroundingDINO
 4 | # Copyright (c) 2023 IDEA. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------
 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 8 | from .GroundingDINO import build_groundingdino
 9 | 
10 | 
11 | def build_model(args):
12 |     # we use register to maintain models from catdet6 on.
13 |     from .registry import MODULE_BUILD_FUNCS
14 | 
15 |     assert args.modelname in MODULE_BUILD_FUNCS._module_dict
16 |     build_func = MODULE_BUILD_FUNCS.get(args.modelname)
17 |     model = build_func(args)
18 |     return model
19 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/models/registry.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Grounding DINO
 3 | # url: https://github.com/IDEA-Research/GroundingDINO
 4 | # Copyright (c) 2023 IDEA. All Rights Reserved.
 5 | # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | # ------------------------------------------------------------------------
 7 | # -*- coding: utf-8 -*-
 8 | # @Author: Yihao Chen
 9 | # @Date:   2021-08-16 16:03:17
10 | # @Last Modified by:   Shilong Liu
11 | # @Last Modified time: 2022-01-23 15:26
12 | # modified from mmcv
13 | 
14 | import inspect
15 | from functools import partial
16 | 
17 | 
18 | class Registry(object):
19 |     def __init__(self, name):
20 |         self._name = name
21 |         self._module_dict = dict()
22 | 
23 |     def __repr__(self):
24 |         format_str = self.__class__.__name__ + "(name={}, items={})".format(
25 |             self._name, list(self._module_dict.keys())
26 |         )
27 |         return format_str
28 | 
29 |     def __len__(self):
30 |         return len(self._module_dict)
31 | 
32 |     @property
33 |     def name(self):
34 |         return self._name
35 | 
36 |     @property
37 |     def module_dict(self):
38 |         return self._module_dict
39 | 
40 |     def get(self, key):
41 |         return self._module_dict.get(key, None)
42 | 
43 |     def registe_with_name(self, module_name=None, force=False):
44 |         return partial(self.register, module_name=module_name, force=force)
45 | 
46 |     def register(self, module_build_function, module_name=None, force=False):
47 |         """Register a module build function.
48 |         Args:
49 |             module (:obj:`nn.Module`): Module to be registered.
50 |         """
51 |         if not inspect.isfunction(module_build_function):
52 |             raise TypeError(
53 |                 "module_build_function must be a function, but got {}".format(
54 |                     type(module_build_function)
55 |                 )
56 |             )
57 |         if module_name is None:
58 |             module_name = module_build_function.__name__
59 |         if not force and module_name in self._module_dict:
60 |             raise KeyError("{} is already registered in {}".format(module_name, self.name))
61 |         self._module_dict[module_name] = module_build_function
62 | 
63 |         return module_build_function
64 | 
65 | 
66 | MODULE_BUILD_FUNCS = Registry("model build functions")
67 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/util/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/util/box_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | """
  3 | Utilities for bounding box manipulation and GIoU.
  4 | """
  5 | import torch
  6 | from torchvision.ops.boxes import box_area
  7 | 
  8 | 
  9 | def box_cxcywh_to_xyxy(x):
 10 |     x_c, y_c, w, h = x.unbind(-1)
 11 |     b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
 12 |     return torch.stack(b, dim=-1)
 13 | 
 14 | 
 15 | def box_xyxy_to_cxcywh(x):
 16 |     x0, y0, x1, y1 = x.unbind(-1)
 17 |     b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
 18 |     return torch.stack(b, dim=-1)
 19 | 
 20 | 
 21 | # modified from torchvision to also return the union
 22 | def box_iou(boxes1, boxes2):
 23 |     area1 = box_area(boxes1)
 24 |     area2 = box_area(boxes2)
 25 | 
 26 |     # import ipdb; ipdb.set_trace()
 27 |     lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
 28 |     rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
 29 | 
 30 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
 31 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
 32 | 
 33 |     union = area1[:, None] + area2 - inter
 34 | 
 35 |     iou = inter / (union + 1e-6)
 36 |     return iou, union
 37 | 
 38 | 
 39 | def generalized_box_iou(boxes1, boxes2):
 40 |     """
 41 |     Generalized IoU from https://giou.stanford.edu/
 42 | 
 43 |     The boxes should be in [x0, y0, x1, y1] format
 44 | 
 45 |     Returns a [N, M] pairwise matrix, where N = len(boxes1)
 46 |     and M = len(boxes2)
 47 |     """
 48 |     # degenerate boxes gives inf / nan results
 49 |     # so do an early check
 50 |     assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
 51 |     assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
 52 |     # except:
 53 |     #     import ipdb; ipdb.set_trace()
 54 |     iou, union = box_iou(boxes1, boxes2)
 55 | 
 56 |     lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
 57 |     rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
 58 | 
 59 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
 60 |     area = wh[:, :, 0] * wh[:, :, 1]
 61 | 
 62 |     return iou - (area - union) / (area + 1e-6)
 63 | 
 64 | 
 65 | # modified from torchvision to also return the union
 66 | def box_iou_pairwise(boxes1, boxes2):
 67 |     area1 = box_area(boxes1)
 68 |     area2 = box_area(boxes2)
 69 | 
 70 |     lt = torch.max(boxes1[:, :2], boxes2[:, :2])  # [N,2]
 71 |     rb = torch.min(boxes1[:, 2:], boxes2[:, 2:])  # [N,2]
 72 | 
 73 |     wh = (rb - lt).clamp(min=0)  # [N,2]
 74 |     inter = wh[:, 0] * wh[:, 1]  # [N]
 75 | 
 76 |     union = area1 + area2 - inter
 77 | 
 78 |     iou = inter / union
 79 |     return iou, union
 80 | 
 81 | 
 82 | def generalized_box_iou_pairwise(boxes1, boxes2):
 83 |     """
 84 |     Generalized IoU from https://giou.stanford.edu/
 85 | 
 86 |     Input:
 87 |         - boxes1, boxes2: N,4
 88 |     Output:
 89 |         - giou: N, 4
 90 |     """
 91 |     # degenerate boxes gives inf / nan results
 92 |     # so do an early check
 93 |     assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
 94 |     assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
 95 |     assert boxes1.shape == boxes2.shape
 96 |     iou, union = box_iou_pairwise(boxes1, boxes2)  # N, 4
 97 | 
 98 |     lt = torch.min(boxes1[:, :2], boxes2[:, :2])
 99 |     rb = torch.max(boxes1[:, 2:], boxes2[:, 2:])
100 | 
101 |     wh = (rb - lt).clamp(min=0)  # [N,2]
102 |     area = wh[:, 0] * wh[:, 1]
103 | 
104 |     return iou - (area - union) / area
105 | 
106 | 
107 | def masks_to_boxes(masks):
108 |     """Compute the bounding boxes around the provided masks
109 | 
110 |     The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
111 | 
112 |     Returns a [N, 4] tensors, with the boxes in xyxy format
113 |     """
114 |     if masks.numel() == 0:
115 |         return torch.zeros((0, 4), device=masks.device)
116 | 
117 |     h, w = masks.shape[-2:]
118 | 
119 |     y = torch.arange(0, h, dtype=torch.float)
120 |     x = torch.arange(0, w, dtype=torch.float)
121 |     y, x = torch.meshgrid(y, x)
122 | 
123 |     x_mask = masks * x.unsqueeze(0)
124 |     x_max = x_mask.flatten(1).max(-1)[0]
125 |     x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
126 | 
127 |     y_mask = masks * y.unsqueeze(0)
128 |     y_max = y_mask.flatten(1).max(-1)[0]
129 |     y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
130 | 
131 |     return torch.stack([x_min, y_min, x_max, y_max], 1)
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     x = torch.rand(5, 4)
136 |     y = torch.rand(3, 4)
137 |     iou, union = box_iou(x, y)
138 |     import ipdb
139 | 
140 |     ipdb.set_trace()
141 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/util/get_tokenlizer.py:
--------------------------------------------------------------------------------
 1 | from transformers import AutoTokenizer, BertModel, BertTokenizer, RobertaModel, RobertaTokenizerFast
 2 | 
 3 | 
 4 | def get_tokenlizer(text_encoder_type):
 5 |     if not isinstance(text_encoder_type, str):
 6 |         # print("text_encoder_type is not a str")
 7 |         if hasattr(text_encoder_type, "text_encoder_type"):
 8 |             text_encoder_type = text_encoder_type.text_encoder_type
 9 |         elif text_encoder_type.get("text_encoder_type", False):
10 |             text_encoder_type = text_encoder_type.get("text_encoder_type")
11 |         else:
12 |             raise ValueError(
13 |                 "Unknown type of text_encoder_type: {}".format(type(text_encoder_type))
14 |             )
15 |     print("final text_encoder_type: {}".format(text_encoder_type))
16 | 
17 |     tokenizer = AutoTokenizer.from_pretrained(text_encoder_type)
18 |     return tokenizer
19 | 
20 | 
21 | def get_pretrained_language_model(text_encoder_type):
22 |     if text_encoder_type == "bert-base-uncased":
23 |         return BertModel.from_pretrained(text_encoder_type)
24 |     if text_encoder_type == "roberta-base":
25 |         return RobertaModel.from_pretrained(text_encoder_type)
26 |     raise ValueError("Unknown text_encoder_type {}".format(text_encoder_type))
27 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/util/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import functools
 3 | import logging
 4 | import os
 5 | import sys
 6 | 
 7 | from termcolor import colored
 8 | 
 9 | 
10 | class _ColorfulFormatter(logging.Formatter):
11 |     def __init__(self, *args, **kwargs):
12 |         self._root_name = kwargs.pop("root_name") + "."
13 |         self._abbrev_name = kwargs.pop("abbrev_name", "")
14 |         if len(self._abbrev_name):
15 |             self._abbrev_name = self._abbrev_name + "."
16 |         super(_ColorfulFormatter, self).__init__(*args, **kwargs)
17 | 
18 |     def formatMessage(self, record):
19 |         record.name = record.name.replace(self._root_name, self._abbrev_name)
20 |         log = super(_ColorfulFormatter, self).formatMessage(record)
21 |         if record.levelno == logging.WARNING:
22 |             prefix = colored("WARNING", "red", attrs=["blink"])
23 |         elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL:
24 |             prefix = colored("ERROR", "red", attrs=["blink", "underline"])
25 |         else:
26 |             return log
27 |         return prefix + " " + log
28 | 
29 | 
30 | # so that calling setup_logger multiple times won't add many handlers
31 | @functools.lru_cache()
32 | def setup_logger(output=None, distributed_rank=0, *, color=True, name="imagenet", abbrev_name=None):
33 |     """
34 |     Initialize the detectron2 logger and set its verbosity level to "INFO".
35 | 
36 |     Args:
37 |         output (str): a file name or a directory to save log. If None, will not save log file.
38 |             If ends with ".txt" or ".log", assumed to be a file name.
39 |             Otherwise, logs will be saved to `output/log.txt`.
40 |         name (str): the root module name of this logger
41 | 
42 |     Returns:
43 |         logging.Logger: a logger
44 |     """
45 |     logger = logging.getLogger(name)
46 |     logger.setLevel(logging.DEBUG)
47 |     logger.propagate = False
48 | 
49 |     if abbrev_name is None:
50 |         abbrev_name = name
51 | 
52 |     plain_formatter = logging.Formatter(
53 |         "[%(asctime)s.%(msecs)03d]: %(message)s", datefmt="%m/%d %H:%M:%S"
54 |     )
55 |     # stdout logging: master only
56 |     if distributed_rank == 0:
57 |         ch = logging.StreamHandler(stream=sys.stdout)
58 |         ch.setLevel(logging.DEBUG)
59 |         if color:
60 |             formatter = _ColorfulFormatter(
61 |                 colored("[%(asctime)s.%(msecs)03d]: ", "green") + "%(message)s",
62 |                 datefmt="%m/%d %H:%M:%S",
63 |                 root_name=name,
64 |                 abbrev_name=str(abbrev_name),
65 |             )
66 |         else:
67 |             formatter = plain_formatter
68 |         ch.setFormatter(formatter)
69 |         logger.addHandler(ch)
70 | 
71 |     # file logging: all workers
72 |     if output is not None:
73 |         if output.endswith(".txt") or output.endswith(".log"):
74 |             filename = output
75 |         else:
76 |             filename = os.path.join(output, "log.txt")
77 |         if distributed_rank > 0:
78 |             filename = filename + f".rank{distributed_rank}"
79 |         os.makedirs(os.path.dirname(filename), exist_ok=True)
80 | 
81 |         fh = logging.StreamHandler(_cached_log_stream(filename))
82 |         fh.setLevel(logging.DEBUG)
83 |         fh.setFormatter(plain_formatter)
84 |         logger.addHandler(fh)
85 | 
86 |     return logger
87 | 
88 | 
89 | # cache the opened file object, so that different calls to `setup_logger`
90 | # with the same file name can safely write to the same file.
91 | @functools.lru_cache(maxsize=None)
92 | def _cached_log_stream(filename):
93 |     return open(filename, "a")
94 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/util/time_counter.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import time
 3 | 
 4 | 
 5 | class TimeCounter:
 6 |     def __init__(self) -> None:
 7 |         pass
 8 | 
 9 |     def clear(self):
10 |         self.timedict = {}
11 |         self.basetime = time.perf_counter()
12 | 
13 |     def timeit(self, name):
14 |         nowtime = time.perf_counter() - self.basetime
15 |         self.timedict[name] = nowtime
16 |         self.basetime = time.perf_counter()
17 | 
18 | 
19 | class TimeHolder:
20 |     def __init__(self) -> None:
21 |         self.timedict = {}
22 | 
23 |     def update(self, _timedict: dict):
24 |         for k, v in _timedict.items():
25 |             if k not in self.timedict:
26 |                 self.timedict[k] = AverageMeter(name=k, val_only=True)
27 |             self.timedict[k].update(val=v)
28 | 
29 |     def final_res(self):
30 |         return {k: v.avg for k, v in self.timedict.items()}
31 | 
32 |     def __str__(self):
33 |         return json.dumps(self.final_res(), indent=2)
34 | 
35 | 
36 | class AverageMeter(object):
37 |     """Computes and stores the average and current value"""
38 | 
39 |     def __init__(self, name, fmt=":f", val_only=False):
40 |         self.name = name
41 |         self.fmt = fmt
42 |         self.val_only = val_only
43 |         self.reset()
44 | 
45 |     def reset(self):
46 |         self.val = 0
47 |         self.avg = 0
48 |         self.sum = 0
49 |         self.count = 0
50 | 
51 |     def update(self, val, n=1):
52 |         self.val = val
53 |         self.sum += val * n
54 |         self.count += n
55 |         self.avg = self.sum / self.count
56 | 
57 |     def __str__(self):
58 |         if self.val_only:
59 |             fmtstr = "{name} {val" + self.fmt + "}"
60 |         else:
61 |             fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})"
62 |         return fmtstr.format(**self.__dict__)
63 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/util/vl_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | from typing import List
  4 | 
  5 | import torch
  6 | 
  7 | 
  8 | def create_positive_map_from_span(tokenized, token_span, max_text_len=256):
  9 |     """construct a map such that positive_map[i,j] = True iff box i is associated to token j
 10 |     Input:
 11 |         - tokenized:
 12 |             - input_ids: Tensor[1, ntokens]
 13 |             - attention_mask: Tensor[1, ntokens]
 14 |         - token_span: list with length num_boxes.
 15 |             - each item: [start_idx, end_idx]
 16 |     """
 17 |     positive_map = torch.zeros((len(token_span), max_text_len), dtype=torch.float)
 18 |     for j, tok_list in enumerate(token_span):
 19 |         for (beg, end) in tok_list:
 20 |             beg_pos = tokenized.char_to_token(beg)
 21 |             end_pos = tokenized.char_to_token(end - 1)
 22 |             if beg_pos is None:
 23 |                 try:
 24 |                     beg_pos = tokenized.char_to_token(beg + 1)
 25 |                     if beg_pos is None:
 26 |                         beg_pos = tokenized.char_to_token(beg + 2)
 27 |                 except:
 28 |                     beg_pos = None
 29 |             if end_pos is None:
 30 |                 try:
 31 |                     end_pos = tokenized.char_to_token(end - 2)
 32 |                     if end_pos is None:
 33 |                         end_pos = tokenized.char_to_token(end - 3)
 34 |                 except:
 35 |                     end_pos = None
 36 |             if beg_pos is None or end_pos is None:
 37 |                 continue
 38 | 
 39 |             assert beg_pos is not None and end_pos is not None
 40 |             if os.environ.get("SHILONG_DEBUG_ONLY_ONE_POS", None) == "TRUE":
 41 |                 positive_map[j, beg_pos] = 1
 42 |                 break
 43 |             else:
 44 |                 positive_map[j, beg_pos : end_pos + 1].fill_(1)
 45 | 
 46 |     return positive_map / (positive_map.sum(-1)[:, None] + 1e-6)
 47 | 
 48 | 
 49 | def build_captions_and_token_span(cat_list, force_lowercase):
 50 |     """
 51 |     Return:
 52 |         captions: str
 53 |         cat2tokenspan: dict
 54 |             {
 55 |                 'dog': [[0, 2]],
 56 |                 ...
 57 |             }
 58 |     """
 59 | 
 60 |     cat2tokenspan = {}
 61 |     captions = ""
 62 |     for catname in cat_list:
 63 |         class_name = catname
 64 |         if force_lowercase:
 65 |             class_name = class_name.lower()
 66 |         if "/" in class_name:
 67 |             class_name_list: List = class_name.strip().split("/")
 68 |             class_name_list.append(class_name)
 69 |             class_name: str = random.choice(class_name_list)
 70 | 
 71 |         tokens_positive_i = []
 72 |         subnamelist = [i.strip() for i in class_name.strip().split(" ")]
 73 |         for subname in subnamelist:
 74 |             if len(subname) == 0:
 75 |                 continue
 76 |             if len(captions) > 0:
 77 |                 captions = captions + " "
 78 |             strat_idx = len(captions)
 79 |             end_idx = strat_idx + len(subname)
 80 |             tokens_positive_i.append([strat_idx, end_idx])
 81 |             captions = captions + subname
 82 | 
 83 |         if len(tokens_positive_i) > 0:
 84 |             captions = captions + " ."
 85 |             cat2tokenspan[class_name] = tokens_positive_i
 86 | 
 87 |     return captions, cat2tokenspan
 88 | 
 89 | 
 90 | def build_id2posspan_and_caption(category_dict: dict):
 91 |     """Build id2pos_span and caption from category_dict
 92 | 
 93 |     Args:
 94 |         category_dict (dict): category_dict
 95 |     """
 96 |     cat_list = [item["name"].lower() for item in category_dict]
 97 |     id2catname = {item["id"]: item["name"].lower() for item in category_dict}
 98 |     caption, cat2posspan = build_captions_and_token_span(cat_list, force_lowercase=True)
 99 |     id2posspan = {catid: cat2posspan[catname] for catid, catname in id2catname.items()}
100 |     return id2posspan, caption
101 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/groundingdino/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1.0'
2 | 


--------------------------------------------------------------------------------
/seginw/GroundingDINO/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch
 2 | torchvision
 3 | transformers
 4 | addict
 5 | yapf
 6 | timm
 7 | numpy
 8 | opencv-python
 9 | supervision
10 | pycocotools


--------------------------------------------------------------------------------
/seginw/sam2:
--------------------------------------------------------------------------------
1 | ../sam-hq2/sam2/


--------------------------------------------------------------------------------
/seginw/segment_anything/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .build_sam import (
 8 |     build_sam,
 9 |     build_sam_vit_h,
10 |     build_sam_vit_l,
11 |     build_sam_vit_b,
12 |     sam_model_registry,
13 | )
14 | from .build_sam_hq import (
15 |     build_sam_hq,
16 |     build_sam_hq_vit_h,
17 |     build_sam_hq_vit_l,
18 |     build_sam_hq_vit_b,
19 |     sam_hq_model_registry,
20 | )
21 | from .predictor import SamPredictor
22 | from .automatic_mask_generator import SamAutomaticMaskGenerator
23 | 


--------------------------------------------------------------------------------
/seginw/segment_anything/build_sam.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | 
  9 | from functools import partial
 10 | 
 11 | from .modeling import ImageEncoderViT, MaskDecoder, PromptEncoder, Sam, TwoWayTransformer
 12 | 
 13 | 
 14 | def build_sam_vit_h(checkpoint=None):
 15 |     return _build_sam(
 16 |         encoder_embed_dim=1280,
 17 |         encoder_depth=32,
 18 |         encoder_num_heads=16,
 19 |         encoder_global_attn_indexes=[7, 15, 23, 31],
 20 |         checkpoint=checkpoint,
 21 |     )
 22 | 
 23 | 
 24 | build_sam = build_sam_vit_h
 25 | 
 26 | 
 27 | def build_sam_vit_l(checkpoint=None):
 28 |     return _build_sam(
 29 |         encoder_embed_dim=1024,
 30 |         encoder_depth=24,
 31 |         encoder_num_heads=16,
 32 |         encoder_global_attn_indexes=[5, 11, 17, 23],
 33 |         checkpoint=checkpoint,
 34 |     )
 35 | 
 36 | 
 37 | def build_sam_vit_b(checkpoint=None):
 38 |     return _build_sam(
 39 |         encoder_embed_dim=768,
 40 |         encoder_depth=12,
 41 |         encoder_num_heads=12,
 42 |         encoder_global_attn_indexes=[2, 5, 8, 11],
 43 |         checkpoint=checkpoint,
 44 |     )
 45 | 
 46 | 
 47 | sam_model_registry = {
 48 |     "default": build_sam,
 49 |     "vit_h": build_sam,
 50 |     "vit_l": build_sam_vit_l,
 51 |     "vit_b": build_sam_vit_b,
 52 | }
 53 | 
 54 | 
 55 | def _build_sam(
 56 |     encoder_embed_dim,
 57 |     encoder_depth,
 58 |     encoder_num_heads,
 59 |     encoder_global_attn_indexes,
 60 |     checkpoint=None,
 61 | ):
 62 |     prompt_embed_dim = 256
 63 |     image_size = 1024
 64 |     vit_patch_size = 16
 65 |     image_embedding_size = image_size // vit_patch_size
 66 |     sam = Sam(
 67 |         image_encoder=ImageEncoderViT(
 68 |             depth=encoder_depth,
 69 |             embed_dim=encoder_embed_dim,
 70 |             img_size=image_size,
 71 |             mlp_ratio=4,
 72 |             norm_layer=partial(torch.nn.LayerNorm, eps=1e-6),
 73 |             num_heads=encoder_num_heads,
 74 |             patch_size=vit_patch_size,
 75 |             qkv_bias=True,
 76 |             use_rel_pos=True,
 77 |             global_attn_indexes=encoder_global_attn_indexes,
 78 |             window_size=14,
 79 |             out_chans=prompt_embed_dim,
 80 |         ),
 81 |         prompt_encoder=PromptEncoder(
 82 |             embed_dim=prompt_embed_dim,
 83 |             image_embedding_size=(image_embedding_size, image_embedding_size),
 84 |             input_image_size=(image_size, image_size),
 85 |             mask_in_chans=16,
 86 |         ),
 87 |         mask_decoder=MaskDecoder(
 88 |             num_multimask_outputs=3,
 89 |             transformer=TwoWayTransformer(
 90 |                 depth=2,
 91 |                 embedding_dim=prompt_embed_dim,
 92 |                 mlp_dim=2048,
 93 |                 num_heads=8,
 94 |             ),
 95 |             transformer_dim=prompt_embed_dim,
 96 |             iou_head_depth=3,
 97 |             iou_head_hidden_dim=256,
 98 |         ),
 99 |         pixel_mean=[123.675, 116.28, 103.53],
100 |         pixel_std=[58.395, 57.12, 57.375],
101 |     )
102 |     sam.eval()
103 |     if checkpoint is not None:
104 |         with open(checkpoint, "rb") as f:
105 |             state_dict = torch.load(f)
106 |         sam.load_state_dict(state_dict)
107 |     return sam
108 | 


--------------------------------------------------------------------------------
/seginw/segment_anything/build_sam_hq.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | 
  9 | from functools import partial
 10 | 
 11 | from .modeling import ImageEncoderViT, MaskDecoderHQ, PromptEncoder, Sam, TwoWayTransformer
 12 | 
 13 | 
 14 | def build_sam_hq_vit_h(checkpoint=None):
 15 |     return _build_sam(
 16 |         encoder_embed_dim=1280,
 17 |         encoder_depth=32,
 18 |         encoder_num_heads=16,
 19 |         encoder_global_attn_indexes=[7, 15, 23, 31],
 20 |         checkpoint=checkpoint,
 21 |     )
 22 | 
 23 | 
 24 | build_sam_hq = build_sam_hq_vit_h
 25 | 
 26 | 
 27 | def build_sam_hq_vit_l(checkpoint=None):
 28 |     return _build_sam(
 29 |         encoder_embed_dim=1024,
 30 |         encoder_depth=24,
 31 |         encoder_num_heads=16,
 32 |         encoder_global_attn_indexes=[5, 11, 17, 23],
 33 |         checkpoint=checkpoint,
 34 |     )
 35 | 
 36 | 
 37 | def build_sam_hq_vit_b(checkpoint=None):
 38 |     return _build_sam(
 39 |         encoder_embed_dim=768,
 40 |         encoder_depth=12,
 41 |         encoder_num_heads=12,
 42 |         encoder_global_attn_indexes=[2, 5, 8, 11],
 43 |         checkpoint=checkpoint,
 44 |     )
 45 | 
 46 | 
 47 | sam_hq_model_registry = {
 48 |     "default": build_sam_hq_vit_h,
 49 |     "vit_h": build_sam_hq_vit_h,
 50 |     "vit_l": build_sam_hq_vit_l,
 51 |     "vit_b": build_sam_hq_vit_b,
 52 | }
 53 | 
 54 | 
 55 | def _build_sam(
 56 |     encoder_embed_dim,
 57 |     encoder_depth,
 58 |     encoder_num_heads,
 59 |     encoder_global_attn_indexes,
 60 |     checkpoint=None,
 61 | ):
 62 |     prompt_embed_dim = 256
 63 |     image_size = 1024
 64 |     vit_patch_size = 16
 65 |     image_embedding_size = image_size // vit_patch_size
 66 |     sam = Sam(
 67 |         image_encoder=ImageEncoderViT(
 68 |             depth=encoder_depth,
 69 |             embed_dim=encoder_embed_dim,
 70 |             img_size=image_size,
 71 |             mlp_ratio=4,
 72 |             norm_layer=partial(torch.nn.LayerNorm, eps=1e-6),
 73 |             num_heads=encoder_num_heads,
 74 |             patch_size=vit_patch_size,
 75 |             qkv_bias=True,
 76 |             use_rel_pos=True,
 77 |             global_attn_indexes=encoder_global_attn_indexes,
 78 |             window_size=14,
 79 |             out_chans=prompt_embed_dim,
 80 |         ),
 81 |         prompt_encoder=PromptEncoder(
 82 |             embed_dim=prompt_embed_dim,
 83 |             image_embedding_size=(image_embedding_size, image_embedding_size),
 84 |             input_image_size=(image_size, image_size),
 85 |             mask_in_chans=16,
 86 |         ),
 87 |         mask_decoder=MaskDecoderHQ(
 88 |             num_multimask_outputs=3,
 89 |             transformer=TwoWayTransformer(
 90 |                 depth=2,
 91 |                 embedding_dim=prompt_embed_dim,
 92 |                 mlp_dim=2048,
 93 |                 num_heads=8,
 94 |             ),
 95 |             transformer_dim=prompt_embed_dim,
 96 |             iou_head_depth=3,
 97 |             iou_head_hidden_dim=256,
 98 |             vit_dim=encoder_embed_dim,
 99 |         ),
100 |         pixel_mean=[123.675, 116.28, 103.53],
101 |         pixel_std=[58.395, 57.12, 57.375],
102 |     )
103 |     # sam.eval()
104 |     if checkpoint is not None:
105 |         with open(checkpoint, "rb") as f:
106 |             state_dict = torch.load(f)
107 |         info = sam.load_state_dict(state_dict, strict=False)
108 |         print(info)
109 |     for n, p in sam.named_parameters():
110 |         if 'hf_token' not in n and 'hf_mlp' not in n and 'compress_vit_feat' not in n and 'embedding_encoder' not in n and 'embedding_maskfeature' not in n:
111 |             p.requires_grad = False
112 | 
113 |     return sam


--------------------------------------------------------------------------------
/seginw/segment_anything/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .sam import Sam
 8 | from .image_encoder import ImageEncoderViT
 9 | from .mask_decoder_hq import MaskDecoderHQ
10 | from .mask_decoder import MaskDecoder
11 | from .prompt_encoder import PromptEncoder
12 | from .transformer import TwoWayTransformer
13 | 


--------------------------------------------------------------------------------
/seginw/segment_anything/modeling/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | from typing import Type
11 | 
12 | 
13 | class MLPBlock(nn.Module):
14 |     def __init__(
15 |         self,
16 |         embedding_dim: int,
17 |         mlp_dim: int,
18 |         act: Type[nn.Module] = nn.GELU,
19 |     ) -> None:
20 |         super().__init__()
21 |         self.lin1 = nn.Linear(embedding_dim, mlp_dim)
22 |         self.lin2 = nn.Linear(mlp_dim, embedding_dim)
23 |         self.act = act()
24 | 
25 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
26 |         return self.lin2(self.act(self.lin1(x)))
27 | 
28 | 
29 | # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
30 | # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119  # noqa
31 | class LayerNorm2d(nn.Module):
32 |     def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
33 |         super().__init__()
34 |         self.weight = nn.Parameter(torch.ones(num_channels))
35 |         self.bias = nn.Parameter(torch.zeros(num_channels))
36 |         self.eps = eps
37 | 
38 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
39 |         u = x.mean(1, keepdim=True)
40 |         s = (x - u).pow(2).mean(1, keepdim=True)
41 |         x = (x - u) / torch.sqrt(s + self.eps)
42 |         x = self.weight[:, None, None] * x + self.bias[:, None, None]
43 |         return x
44 | 


--------------------------------------------------------------------------------
/seginw/segment_anything/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | 
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/seginw/segment_anything/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from torch.nn import functional as F
 10 | from torchvision.transforms.functional import resize, to_pil_image  # type: ignore
 11 | 
 12 | from copy import deepcopy
 13 | from typing import Tuple
 14 | 
 15 | 
 16 | class ResizeLongestSide:
 17 |     """
 18 |     Resizes images to longest side 'target_length', as well as provides
 19 |     methods for resizing coordinates and boxes. Provides methods for
 20 |     transforming both numpy array and batched torch tensors.
 21 |     """
 22 | 
 23 |     def __init__(self, target_length: int) -> None:
 24 |         self.target_length = target_length
 25 | 
 26 |     def apply_image(self, image: np.ndarray) -> np.ndarray:
 27 |         """
 28 |         Expects a numpy array with shape HxWxC in uint8 format.
 29 |         """
 30 |         target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
 31 |         return np.array(resize(to_pil_image(image), target_size))
 32 | 
 33 |     def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
 34 |         """
 35 |         Expects a numpy array of length 2 in the final dimension. Requires the
 36 |         original image size in (H, W) format.
 37 |         """
 38 |         old_h, old_w = original_size
 39 |         new_h, new_w = self.get_preprocess_shape(
 40 |             original_size[0], original_size[1], self.target_length
 41 |         )
 42 |         coords = deepcopy(coords).astype(float)
 43 |         coords[..., 0] = coords[..., 0] * (new_w / old_w)
 44 |         coords[..., 1] = coords[..., 1] * (new_h / old_h)
 45 |         return coords
 46 | 
 47 |     def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
 48 |         """
 49 |         Expects a numpy array shape Bx4. Requires the original image size
 50 |         in (H, W) format.
 51 |         """
 52 |         boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size)
 53 |         return boxes.reshape(-1, 4)
 54 | 
 55 |     def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
 56 |         """
 57 |         Expects batched images with shape BxCxHxW and float format. This
 58 |         transformation may not exactly match apply_image. apply_image is
 59 |         the transformation expected by the model.
 60 |         """
 61 |         # Expects an image in BCHW format. May not exactly match apply_image.
 62 |         target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
 63 |         return F.interpolate(
 64 |             image, target_size, mode="bilinear", align_corners=False, antialias=True
 65 |         )
 66 | 
 67 |     def apply_coords_torch(
 68 |         self, coords: torch.Tensor, original_size: Tuple[int, ...]
 69 |     ) -> torch.Tensor:
 70 |         """
 71 |         Expects a torch tensor with length 2 in the last dimension. Requires the
 72 |         original image size in (H, W) format.
 73 |         """
 74 |         old_h, old_w = original_size
 75 |         new_h, new_w = self.get_preprocess_shape(
 76 |             original_size[0], original_size[1], self.target_length
 77 |         )
 78 |         coords = deepcopy(coords).to(torch.float)
 79 |         coords[..., 0] = coords[..., 0] * (new_w / old_w)
 80 |         coords[..., 1] = coords[..., 1] * (new_h / old_h)
 81 |         return coords
 82 | 
 83 |     def apply_boxes_torch(
 84 |         self, boxes: torch.Tensor, original_size: Tuple[int, ...]
 85 |     ) -> torch.Tensor:
 86 |         """
 87 |         Expects a torch tensor with shape Bx4. Requires the original image
 88 |         size in (H, W) format.
 89 |         """
 90 |         boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size)
 91 |         return boxes.reshape(-1, 4)
 92 | 
 93 |     @staticmethod
 94 |     def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]:
 95 |         """
 96 |         Compute the output size given input size and target long side length.
 97 |         """
 98 |         scale = long_side_length * 1.0 / max(oldh, oldw)
 99 |         newh, neww = oldh * scale, oldw * scale
100 |         neww = int(neww + 0.5)
101 |         newh = int(newh + 0.5)
102 |         return (newh, neww)
103 | 


--------------------------------------------------------------------------------
/seginw/test_seginw.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash  
2 |   
3 | for file in ./data/seginw/*;  
4 | do  
5 | echo $file is data path \! ;  
6 | 
7 | python test_ap_on_seginw.py -c GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py -p pretrained_checkpoint/groundingdino_swinb_cogcoor.pth --anno_path $file/valid/_annotations_min1cat.coco.json --image_dir $file/valid/ 
8 | done


--------------------------------------------------------------------------------
/seginw/test_seginw_hq.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash  
2 |   
3 | for file in ./data/seginw/*;  
4 | do  
5 | echo $file is data path \! ;  
6 | 
7 | python test_ap_on_seginw.py -c GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py -p pretrained_checkpoint/groundingdino_swinb_cogcoor.pth --anno_path $file/valid/_annotations_min1cat.coco.json --image_dir $file/valid/ --use_sam_hq
8 | done


--------------------------------------------------------------------------------
/seginw/test_seginw_sam2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash  
2 |   
3 | for file in ./data/seginw/*;  
4 | do  
5 | echo $file is data path \! ;  
6 | 
7 | python test_ap_on_seginw_sam2.py -c GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py -p pretrained_checkpoint/groundingdino_swinb_cogcoor.pth --anno_path $file/valid/_annotations_min1cat.coco.json --image_dir $file/valid/ 
8 | done


--------------------------------------------------------------------------------
/seginw/test_seginw_sam_hq2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash  
2 |   
3 | for file in ./data/seginw/*;  
4 | do  
5 | echo $file is data path \! ;  
6 | 
7 | python test_ap_on_seginw_sam2.py -c GroundingDINO/groundingdino/config/GroundingDINO_SwinB.py -p pretrained_checkpoint/groundingdino_swinb_cogcoor.pth --anno_path $file/valid/_annotations_min1cat.coco.json --image_dir $file/valid/ --use_sam_hq 
8 | done


--------------------------------------------------------------------------------
/segment_anything/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .build_sam import (
 8 |     build_sam,
 9 |     build_sam_vit_h,
10 |     build_sam_vit_l,
11 |     build_sam_vit_b,
12 |     sam_model_registry,
13 | )
14 | from .build_sam_baseline import sam_model_registry_baseline
15 | from .predictor import SamPredictor
16 | from .automatic_mask_generator import SamAutomaticMaskGenerator
17 | 


--------------------------------------------------------------------------------
/segment_anything/build_sam_baseline.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | 
  9 | from functools import partial
 10 | 
 11 | from .modeling import ImageEncoderViT, MaskDecoder, PromptEncoder, Sam, TwoWayTransformer, TinyViT
 12 | 
 13 | 
 14 | def build_sam_vit_h(checkpoint=None):
 15 |     return _build_sam(
 16 |         encoder_embed_dim=1280,
 17 |         encoder_depth=32,
 18 |         encoder_num_heads=16,
 19 |         encoder_global_attn_indexes=[7, 15, 23, 31],
 20 |         checkpoint=checkpoint,
 21 |     )
 22 | 
 23 | 
 24 | build_sam = build_sam_vit_h
 25 | 
 26 | 
 27 | def build_sam_vit_l(checkpoint=None):
 28 |     return _build_sam(
 29 |         encoder_embed_dim=1024,
 30 |         encoder_depth=24,
 31 |         encoder_num_heads=16,
 32 |         encoder_global_attn_indexes=[5, 11, 17, 23],
 33 |         checkpoint=checkpoint,
 34 |     )
 35 | 
 36 | 
 37 | def build_sam_vit_b(checkpoint=None):
 38 |     return _build_sam(
 39 |         encoder_embed_dim=768,
 40 |         encoder_depth=12,
 41 |         encoder_num_heads=12,
 42 |         encoder_global_attn_indexes=[2, 5, 8, 11],
 43 |         checkpoint=checkpoint,
 44 |     )
 45 | 
 46 | 
 47 | def build_sam_vit_t(checkpoint=None):
 48 |     prompt_embed_dim = 256
 49 |     image_size = 1024
 50 |     vit_patch_size = 16
 51 |     image_embedding_size = image_size // vit_patch_size
 52 |     mobile_sam = Sam(
 53 |             image_encoder=TinyViT(img_size=1024, in_chans=3, num_classes=1000,
 54 |                 embed_dims=[64, 128, 160, 320],
 55 |                 depths=[2, 2, 6, 2],
 56 |                 num_heads=[2, 4, 5, 10],
 57 |                 window_sizes=[7, 7, 14, 7],
 58 |                 mlp_ratio=4.,
 59 |                 drop_rate=0.,
 60 |                 drop_path_rate=0.0,
 61 |                 use_checkpoint=False,
 62 |                 mbconv_expand_ratio=4.0,
 63 |                 local_conv_size=3,
 64 |                 layer_lr_decay=0.8
 65 |             ),
 66 |             prompt_encoder=PromptEncoder(
 67 |             embed_dim=prompt_embed_dim,
 68 |             image_embedding_size=(image_embedding_size, image_embedding_size),
 69 |             input_image_size=(image_size, image_size),
 70 |             mask_in_chans=16,
 71 |             ),
 72 |             mask_decoder=MaskDecoder(
 73 |                     num_multimask_outputs=3,
 74 |                     transformer=TwoWayTransformer(
 75 |                     depth=2,
 76 |                     embedding_dim=prompt_embed_dim,
 77 |                     mlp_dim=2048,
 78 |                     num_heads=8,
 79 |                 ),
 80 |                 transformer_dim=prompt_embed_dim,
 81 |                 iou_head_depth=3,
 82 |                 iou_head_hidden_dim=256,
 83 |             ),
 84 |             pixel_mean=[123.675, 116.28, 103.53],
 85 |             pixel_std=[58.395, 57.12, 57.375],
 86 |         )
 87 | 
 88 |     mobile_sam.eval()
 89 |     if checkpoint is not None:
 90 |         with open(checkpoint, "rb") as f:
 91 |             state_dict = torch.load(f)
 92 |         mobile_sam.load_state_dict(state_dict)
 93 |     return mobile_sam
 94 | 
 95 | sam_model_registry_baseline = {
 96 |     "default": build_sam_vit_h,
 97 |     "vit_h": build_sam_vit_h,
 98 |     "vit_l": build_sam_vit_l,
 99 |     "vit_b": build_sam_vit_b,
100 |     "vit_tiny": build_sam_vit_t
101 | }
102 | 
103 | 
104 | def _build_sam(
105 |     encoder_embed_dim,
106 |     encoder_depth,
107 |     encoder_num_heads,
108 |     encoder_global_attn_indexes,
109 |     checkpoint=None,
110 | ):
111 |     prompt_embed_dim = 256
112 |     image_size = 1024
113 |     vit_patch_size = 16
114 |     image_embedding_size = image_size // vit_patch_size
115 |     sam = Sam(
116 |         image_encoder=ImageEncoderViT(
117 |             depth=encoder_depth,
118 |             embed_dim=encoder_embed_dim,
119 |             img_size=image_size,
120 |             mlp_ratio=4,
121 |             norm_layer=partial(torch.nn.LayerNorm, eps=1e-6),
122 |             num_heads=encoder_num_heads,
123 |             patch_size=vit_patch_size,
124 |             qkv_bias=True,
125 |             use_rel_pos=True,
126 |             global_attn_indexes=encoder_global_attn_indexes,
127 |             window_size=14,
128 |             out_chans=prompt_embed_dim,
129 |         ),
130 |         prompt_encoder=PromptEncoder(
131 |             embed_dim=prompt_embed_dim,
132 |             image_embedding_size=(image_embedding_size, image_embedding_size),
133 |             input_image_size=(image_size, image_size),
134 |             mask_in_chans=16,
135 |         ),
136 |         mask_decoder=MaskDecoder(
137 |             num_multimask_outputs=3,
138 |             transformer=TwoWayTransformer(
139 |                 depth=2,
140 |                 embedding_dim=prompt_embed_dim,
141 |                 mlp_dim=2048,
142 |                 num_heads=8,
143 |             ),
144 |             transformer_dim=prompt_embed_dim,
145 |             iou_head_depth=3,
146 |             iou_head_hidden_dim=256,
147 |         ),
148 |         pixel_mean=[123.675, 116.28, 103.53],
149 |         pixel_std=[58.395, 57.12, 57.375],
150 |     )
151 |     sam.eval()
152 |     if checkpoint is not None:
153 |         with open(checkpoint, "rb") as f:
154 |             state_dict = torch.load(f)
155 |         sam.load_state_dict(state_dict)
156 |     return sam


--------------------------------------------------------------------------------
/segment_anything/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .sam import Sam
 8 | from .image_encoder import ImageEncoderViT
 9 | from .mask_decoder_hq import MaskDecoderHQ
10 | from .mask_decoder import MaskDecoder
11 | from .prompt_encoder import PromptEncoder
12 | from .transformer import TwoWayTransformer
13 | from .tiny_vit_sam import TinyViT
14 | 


--------------------------------------------------------------------------------
/segment_anything/modeling/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | from typing import Type
11 | 
12 | 
13 | class MLPBlock(nn.Module):
14 |     def __init__(
15 |         self,
16 |         embedding_dim: int,
17 |         mlp_dim: int,
18 |         act: Type[nn.Module] = nn.GELU,
19 |     ) -> None:
20 |         super().__init__()
21 |         self.lin1 = nn.Linear(embedding_dim, mlp_dim)
22 |         self.lin2 = nn.Linear(mlp_dim, embedding_dim)
23 |         self.act = act()
24 | 
25 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
26 |         return self.lin2(self.act(self.lin1(x)))
27 | 
28 | 
29 | # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
30 | # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119  # noqa
31 | class LayerNorm2d(nn.Module):
32 |     def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
33 |         super().__init__()
34 |         self.weight = nn.Parameter(torch.ones(num_channels))
35 |         self.bias = nn.Parameter(torch.zeros(num_channels))
36 |         self.eps = eps
37 | 
38 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
39 |         u = x.mean(1, keepdim=True)
40 |         s = (x - u).pow(2).mean(1, keepdim=True)
41 |         x = (x - u) / torch.sqrt(s + self.eps)
42 |         x = self.weight[:, None, None] * x + self.bias[:, None, None]
43 |         return x
44 | 


--------------------------------------------------------------------------------
/segment_anything/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | 
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/segment_anything/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from torch.nn import functional as F
 10 | from torchvision.transforms.functional import resize, to_pil_image  # type: ignore
 11 | 
 12 | from copy import deepcopy
 13 | from typing import Tuple
 14 | 
 15 | 
 16 | class ResizeLongestSide:
 17 |     """
 18 |     Resizes images to the longest side 'target_length', as well as provides
 19 |     methods for resizing coordinates and boxes. Provides methods for
 20 |     transforming both numpy array and batched torch tensors.
 21 |     """
 22 | 
 23 |     def __init__(self, target_length: int) -> None:
 24 |         self.target_length = target_length
 25 | 
 26 |     def apply_image(self, image: np.ndarray) -> np.ndarray:
 27 |         """
 28 |         Expects a numpy array with shape HxWxC in uint8 format.
 29 |         """
 30 |         target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
 31 |         return np.array(resize(to_pil_image(image), target_size))
 32 | 
 33 |     def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
 34 |         """
 35 |         Expects a numpy array of length 2 in the final dimension. Requires the
 36 |         original image size in (H, W) format.
 37 |         """
 38 |         old_h, old_w = original_size
 39 |         new_h, new_w = self.get_preprocess_shape(
 40 |             original_size[0], original_size[1], self.target_length
 41 |         )
 42 |         coords = deepcopy(coords).astype(float)
 43 |         coords[..., 0] = coords[..., 0] * (new_w / old_w)
 44 |         coords[..., 1] = coords[..., 1] * (new_h / old_h)
 45 |         return coords
 46 | 
 47 |     def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
 48 |         """
 49 |         Expects a numpy array shape Bx4. Requires the original image size
 50 |         in (H, W) format.
 51 |         """
 52 |         boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size)
 53 |         return boxes.reshape(-1, 4)
 54 | 
 55 |     def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
 56 |         """
 57 |         Expects batched images with shape BxCxHxW and float format. This
 58 |         transformation may not exactly match apply_image. apply_image is
 59 |         the transformation expected by the model.
 60 |         """
 61 |         # Expects an image in BCHW format. May not exactly match apply_image.
 62 |         target_size = self.get_preprocess_shape(image.shape[2], image.shape[3], self.target_length)
 63 |         return F.interpolate(
 64 |             image, target_size, mode="bilinear", align_corners=False, antialias=True
 65 |         )
 66 | 
 67 |     def apply_coords_torch(
 68 |         self, coords: torch.Tensor, original_size: Tuple[int, ...]
 69 |     ) -> torch.Tensor:
 70 |         """
 71 |         Expects a torch tensor with length 2 in the last dimension. Requires the
 72 |         original image size in (H, W) format.
 73 |         """
 74 |         old_h, old_w = original_size
 75 |         new_h, new_w = self.get_preprocess_shape(
 76 |             original_size[0], original_size[1], self.target_length
 77 |         )
 78 |         coords = deepcopy(coords).to(torch.float)
 79 |         coords[..., 0] = coords[..., 0] * (new_w / old_w)
 80 |         coords[..., 1] = coords[..., 1] * (new_h / old_h)
 81 |         return coords
 82 | 
 83 |     def apply_boxes_torch(
 84 |         self, boxes: torch.Tensor, original_size: Tuple[int, ...]
 85 |     ) -> torch.Tensor:
 86 |         """
 87 |         Expects a torch tensor with shape Bx4. Requires the original image
 88 |         size in (H, W) format.
 89 |         """
 90 |         boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size)
 91 |         return boxes.reshape(-1, 4)
 92 | 
 93 |     @staticmethod
 94 |     def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]:
 95 |         """
 96 |         Compute the output size given input size and target long side length.
 97 |         """
 98 |         scale = long_side_length * 1.0 / max(oldh, oldw)
 99 |         newh, neww = oldh * scale, oldw * scale
100 |         neww = int(neww + 0.5)
101 |         newh = int(newh + 0.5)
102 |         return (newh, neww)
103 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length=100
 3 | multi_line_output=3
 4 | include_trailing_comma=True
 5 | known_standard_library=numpy,setuptools
 6 | skip_glob=*/__init__.py
 7 | known_myself=segment_anything
 8 | known_third_party=matplotlib,cv2,torch,torchvision,pycocotools,onnx,black,isort
 9 | no_lines_before=STDLIB,THIRDPARTY
10 | sections=FUTURE,STDLIB,THIRDPARTY,MYSELF,FIRSTPARTY,LOCALFOLDER
11 | default_section=FIRSTPARTY
12 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from setuptools import find_packages, setup
 8 | 
 9 | setup(
10 |     name="segment_anything",
11 |     version="1.0",
12 |     install_requires=[],
13 |     packages=find_packages(exclude="notebooks"),
14 |     extras_require={
15 |         "all": ["matplotlib", "pycocotools", "opencv-python", "onnx", "onnxruntime", "timm"],
16 |         "dev": ["flake8", "isort", "black", "mypy"],
17 |     },
18 | )
19 | 


--------------------------------------------------------------------------------
/train/README.md:
--------------------------------------------------------------------------------
 1 | # Training instruction for HQ-SAM
 2 | 
 3 | > [**Segment Anything in High Quality**](https://arxiv.org/abs/2306.01567)           
 4 | > Lei Ke, Mingqiao Ye, Martin Danelljan, Yifan Liu, Yu-Wing Tai, Chi-Keung Tang, Fisher Yu \
 5 | > ETH Zurich & HKUST 
 6 | 
 7 | We organize the training folder as follows.
 8 | ```
 9 | train
10 | |____data
11 | |____pretrained_checkpoint
12 | |____train.py
13 | |____utils
14 | | |____dataloader.py
15 | | |____misc.py
16 | | |____loss_mask.py
17 | |____segment_anything_training
18 | |____work_dirs
19 | ```
20 | 
21 | ## 1. Data Preparation
22 | 
23 | HQSeg-44K can be downloaded from [hugging face link](https://huggingface.co/sam-hq-team/sam-hq-training/tree/main/data)
24 | 
25 | ### Expected dataset structure for HQSeg-44K
26 | 
27 | ```
28 | data
29 | |____DIS5K
30 | |____cascade_psp
31 | | |____DUTS-TE
32 | | |____DUTS-TR
33 | | |____ecssd
34 | | |____fss_all
35 | | |____MSRA_10K
36 | |____thin_object_detection
37 | | |____COIFT
38 | | |____HRSOD
39 | | |____ThinObject5K
40 | 
41 | ```
42 | 
43 | ## 2. Init Checkpoint
44 | Init checkpoint can be downloaded from [hugging face link](https://huggingface.co/sam-hq-team/sam-hq-training/tree/main/pretrained_checkpoint)
45 | 
46 | ### Expected checkpoint
47 | 
48 | ```
49 | pretrained_checkpoint
50 | |____sam_vit_b_maskdecoder.pth
51 | |____sam_vit_b_01ec64.pth
52 | |____sam_vit_l_maskdecoder.pth
53 | |____sam_vit_l_0b3195.pth
54 | |____sam_vit_h_maskdecoder.pth
55 | |____sam_vit_h_4b8939.pth
56 | 
57 | ```
58 | 
59 | ## 3. Training
60 | To train HQ-SAM on HQSeg-44K dataset
61 | 
62 | ```
63 | python -m torch.distributed.launch --nproc_per_node=<num_gpus> train.py --checkpoint <path/to/checkpoint> --model-type <model_type> --output <path/to/output>
64 | ```
65 | 
66 | ### Example HQ-SAM-L training script
67 | ```
68 | python -m torch.distributed.launch --nproc_per_node=8 train.py --checkpoint ./pretrained_checkpoint/sam_vit_l_0b3195.pth --model-type vit_l --output work_dirs/hq_sam_l
69 | ```
70 | 
71 | ### Example HQ-SAM-B training script
72 | ```
73 | python -m torch.distributed.launch --nproc_per_node=8 train.py --checkpoint ./pretrained_checkpoint/sam_vit_b_01ec64.pth --model-type vit_b --output work_dirs/hq_sam_b
74 | ```
75 | 
76 | ### Example HQ-SAM-H training script
77 | ```
78 | python -m torch.distributed.launch --nproc_per_node=8 train.py --checkpoint ./pretrained_checkpoint/sam_vit_h_4b8939.pth --model-type vit_h --output work_dirs/hq_sam_h
79 | ```
80 | 
81 | ## 4. Evaluation
82 | To evaluate on 4 HQ-datasets
83 | 
84 | ```
85 | python -m torch.distributed.launch --nproc_per_node=<num_gpus> train.py --checkpoint <path/to/checkpoint> --model-type <model_type> --output <path/to/output> --eval --restore-model <path/to/training_checkpoint>
86 | ```
87 | 
88 | ### Example HQ-SAM-L evaluation script
89 | ```
90 | python -m torch.distributed.launch --nproc_per_node=1 train.py --checkpoint ./pretrained_checkpoint/sam_vit_l_0b3195.pth --model-type vit_l --output work_dirs/hq_sam_l --eval --restore-model work_dirs/hq_sam_l/epoch_11.pth
91 | ```
92 | 
93 | ### Example HQ-SAM-L visualization script
94 | ```
95 | python -m torch.distributed.launch --nproc_per_node=1 train.py --checkpoint ./pretrained_checkpoint/sam_vit_l_0b3195.pth --model-type vit_l --output work_dirs/hq_sam_l --eval --restore-model work_dirs/hq_sam_l/epoch_11.pth --visualize
96 | ```


--------------------------------------------------------------------------------
/train/segment_anything_training/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .build_sam import (
 8 |     build_sam,
 9 |     build_sam_vit_h,
10 |     build_sam_vit_l,
11 |     build_sam_vit_b,
12 |     sam_model_registry,
13 | )
14 | 


--------------------------------------------------------------------------------
/train/segment_anything_training/build_sam.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | 
  9 | from functools import partial
 10 | 
 11 | from .modeling import ImageEncoderViT, MaskDecoder, PromptEncoder, Sam, TwoWayTransformer
 12 | 
 13 | 
 14 | def build_sam_vit_h(checkpoint=None):
 15 |     return _build_sam(
 16 |         encoder_embed_dim=1280,
 17 |         encoder_depth=32,
 18 |         encoder_num_heads=16,
 19 |         encoder_global_attn_indexes=[7, 15, 23, 31],
 20 |         checkpoint=checkpoint,
 21 |     )
 22 | 
 23 | 
 24 | build_sam = build_sam_vit_h
 25 | 
 26 | 
 27 | def build_sam_vit_l(checkpoint=None):
 28 |     return _build_sam(
 29 |         encoder_embed_dim=1024,
 30 |         encoder_depth=24,
 31 |         encoder_num_heads=16,
 32 |         encoder_global_attn_indexes=[5, 11, 17, 23],
 33 |         checkpoint=checkpoint,
 34 |     )
 35 | 
 36 | 
 37 | def build_sam_vit_b(checkpoint=None):
 38 |     return _build_sam(
 39 |         encoder_embed_dim=768,
 40 |         encoder_depth=12,
 41 |         encoder_num_heads=12,
 42 |         encoder_global_attn_indexes=[2, 5, 8, 11],
 43 |         checkpoint=checkpoint,
 44 |     )
 45 | 
 46 | 
 47 | sam_model_registry = {
 48 |     "default": build_sam,
 49 |     "vit_h": build_sam,
 50 |     "vit_l": build_sam_vit_l,
 51 |     "vit_b": build_sam_vit_b,
 52 | }
 53 | 
 54 | 
 55 | def _build_sam(
 56 |     encoder_embed_dim,
 57 |     encoder_depth,
 58 |     encoder_num_heads,
 59 |     encoder_global_attn_indexes,
 60 |     checkpoint=None,
 61 | ):
 62 |     prompt_embed_dim = 256
 63 |     image_size = 1024
 64 |     vit_patch_size = 16
 65 |     image_embedding_size = image_size // vit_patch_size
 66 |     sam = Sam(
 67 |         image_encoder=ImageEncoderViT(
 68 |             depth=encoder_depth,
 69 |             embed_dim=encoder_embed_dim,
 70 |             img_size=image_size,
 71 |             mlp_ratio=4,
 72 |             norm_layer=partial(torch.nn.LayerNorm, eps=1e-6),
 73 |             num_heads=encoder_num_heads,
 74 |             patch_size=vit_patch_size,
 75 |             qkv_bias=True,
 76 |             use_rel_pos=True,
 77 |             global_attn_indexes=encoder_global_attn_indexes,
 78 |             window_size=14,
 79 |             out_chans=prompt_embed_dim,
 80 |         ),
 81 |         prompt_encoder=PromptEncoder(
 82 |             embed_dim=prompt_embed_dim,
 83 |             image_embedding_size=(image_embedding_size, image_embedding_size),
 84 |             input_image_size=(image_size, image_size),
 85 |             mask_in_chans=16,
 86 |         ),
 87 |         mask_decoder=MaskDecoder(
 88 |             num_multimask_outputs=3,
 89 |             transformer=TwoWayTransformer(
 90 |                 depth=2,
 91 |                 embedding_dim=prompt_embed_dim,
 92 |                 mlp_dim=2048,
 93 |                 num_heads=8,
 94 |             ),
 95 |             transformer_dim=prompt_embed_dim,
 96 |             iou_head_depth=3,
 97 |             iou_head_hidden_dim=256,
 98 |         ),
 99 |         pixel_mean=[123.675, 116.28, 103.53],
100 |         pixel_std=[58.395, 57.12, 57.375],
101 |     )
102 |     sam.eval()
103 |     if checkpoint is not None:
104 |         with open(checkpoint, "rb") as f:
105 |             state_dict = torch.load(f)
106 |         sam.load_state_dict(state_dict)
107 |     return sam
108 | 


--------------------------------------------------------------------------------
/train/segment_anything_training/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .sam import Sam
 8 | from .image_encoder import ImageEncoderViT
 9 | from .mask_decoder import MaskDecoder
10 | from .prompt_encoder import PromptEncoder
11 | from .transformer import TwoWayTransformer
12 | 


--------------------------------------------------------------------------------
/train/segment_anything_training/modeling/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | from typing import Type
11 | 
12 | 
13 | class MLPBlock(nn.Module):
14 |     def __init__(
15 |         self,
16 |         embedding_dim: int,
17 |         mlp_dim: int,
18 |         act: Type[nn.Module] = nn.GELU,
19 |     ) -> None:
20 |         super().__init__()
21 |         self.lin1 = nn.Linear(embedding_dim, mlp_dim)
22 |         self.lin2 = nn.Linear(mlp_dim, embedding_dim)
23 |         self.act = act()
24 | 
25 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
26 |         return self.lin2(self.act(self.lin1(x)))
27 | 
28 | 
29 | # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
30 | # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119  # noqa
31 | class LayerNorm2d(nn.Module):
32 |     def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
33 |         super().__init__()
34 |         self.weight = nn.Parameter(torch.ones(num_channels))
35 |         self.bias = nn.Parameter(torch.zeros(num_channels))
36 |         self.eps = eps
37 | 
38 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
39 |         u = x.mean(1, keepdim=True)
40 |         s = (x - u).pow(2).mean(1, keepdim=True)
41 |         x = (x - u) / torch.sqrt(s + self.eps)
42 |         x = self.weight[:, None, None] * x + self.bias[:, None, None]
43 |         return x
44 | 


--------------------------------------------------------------------------------
/train/segment_anything_training/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 | # All rights reserved.
3 | 
4 | # This source code is licensed under the license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/train/segment_anything_training/utils/transforms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | from torch.nn import functional as F
 10 | from torchvision.transforms.functional import resize, to_pil_image  # type: ignore
 11 | 
 12 | from copy import deepcopy
 13 | from typing import Tuple
 14 | 
 15 | 
 16 | class ResizeLongestSide:
 17 |     """
 18 |     Resizes images to longest side 'target_length', as well as provides
 19 |     methods for resizing coordinates and boxes. Provides methods for
 20 |     transforming both numpy array and batched torch tensors.
 21 |     """
 22 | 
 23 |     def __init__(self, target_length: int) -> None:
 24 |         self.target_length = target_length
 25 | 
 26 |     def apply_image(self, image: np.ndarray) -> np.ndarray:
 27 |         """
 28 |         Expects a numpy array with shape HxWxC in uint8 format.
 29 |         """
 30 |         target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
 31 |         return np.array(resize(to_pil_image(image), target_size))
 32 | 
 33 |     def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
 34 |         """
 35 |         Expects a numpy array of length 2 in the final dimension. Requires the
 36 |         original image size in (H, W) format.
 37 |         """
 38 |         old_h, old_w = original_size
 39 |         new_h, new_w = self.get_preprocess_shape(
 40 |             original_size[0], original_size[1], self.target_length
 41 |         )
 42 |         coords = deepcopy(coords).astype(float)
 43 |         coords[..., 0] = coords[..., 0] * (new_w / old_w)
 44 |         coords[..., 1] = coords[..., 1] * (new_h / old_h)
 45 |         return coords
 46 | 
 47 |     def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
 48 |         """
 49 |         Expects a numpy array shape Bx4. Requires the original image size
 50 |         in (H, W) format.
 51 |         """
 52 |         boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size)
 53 |         return boxes.reshape(-1, 4)
 54 | 
 55 |     def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
 56 |         """
 57 |         Expects batched images with shape BxCxHxW and float format. This
 58 |         transformation may not exactly match apply_image. apply_image is
 59 |         the transformation expected by the model.
 60 |         """
 61 |         # Expects an image in BCHW format. May not exactly match apply_image.
 62 |         target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
 63 |         return F.interpolate(
 64 |             image, target_size, mode="bilinear", align_corners=False, antialias=True
 65 |         )
 66 | 
 67 |     def apply_coords_torch(
 68 |         self, coords: torch.Tensor, original_size: Tuple[int, ...]
 69 |     ) -> torch.Tensor:
 70 |         """
 71 |         Expects a torch tensor with length 2 in the last dimension. Requires the
 72 |         original image size in (H, W) format.
 73 |         """
 74 |         old_h, old_w = original_size
 75 |         new_h, new_w = self.get_preprocess_shape(
 76 |             original_size[0], original_size[1], self.target_length
 77 |         )
 78 |         coords = deepcopy(coords).to(torch.float)
 79 |         coords[..., 0] = coords[..., 0] * (new_w / old_w)
 80 |         coords[..., 1] = coords[..., 1] * (new_h / old_h)
 81 |         return coords
 82 | 
 83 |     def apply_boxes_torch(
 84 |         self, boxes: torch.Tensor, original_size: Tuple[int, ...]
 85 |     ) -> torch.Tensor:
 86 |         """
 87 |         Expects a torch tensor with shape Bx4. Requires the original image
 88 |         size in (H, W) format.
 89 |         """
 90 |         boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size)
 91 |         return boxes.reshape(-1, 4)
 92 | 
 93 |     @staticmethod
 94 |     def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]:
 95 |         """
 96 |         Compute the output size given input size and target long side length.
 97 |         """
 98 |         scale = long_side_length * 1.0 / max(oldh, oldw)
 99 |         newh, neww = oldh * scale, oldw * scale
100 |         neww = int(neww + 0.5)
101 |         newh = int(newh + 0.5)
102 |         return (newh, neww)
103 | 


--------------------------------------------------------------------------------
/visual_demo/1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/1.gif


--------------------------------------------------------------------------------
/visual_demo/2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/2.gif


--------------------------------------------------------------------------------
/visual_demo/3.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/3.gif


--------------------------------------------------------------------------------
/visual_demo/4.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/4.gif


--------------------------------------------------------------------------------
/visual_demo/5.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/5.gif


--------------------------------------------------------------------------------
/visual_demo/6.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/sam-hq/afb90d71602048d0e74d4a0bbf6b8b40e3e27f26/visual_demo/6.gif


--------------------------------------------------------------------------------