├── .gitignore
├── LICENSE
├── README.md
├── TF_logger.py
├── __init__.py
├── ava
    ├── __init__.py
    ├── ava_action_list_v2.0.csv
    ├── ava_action_list_v2.1_for_activitynet_2018.pbtxt.txt
    ├── label_map_util.py
    ├── metrics.py
    ├── np_box_list.py
    ├── np_box_list_ops.py
    ├── np_box_mask_list.py
    ├── np_box_mask_list_ops.py
    ├── np_box_ops.py
    ├── np_mask_ops.py
    ├── object_detection_evaluation.py
    ├── per_image_evaluation.py
    ├── standard_fields.py
    └── teat.py
├── backbone
    ├── __init__.py
    ├── base.py
    ├── hidden_for_roi.py
    ├── hidden_for_roi2.py
    ├── hidden_for_roi_maxpool.py
    ├── resnet101.py
    ├── resnet18.py
    ├── resnet50.py
    ├── slowfast_res101.py
    ├── slowfast_res50.py
    └── slowfastnet.py
├── bbox.py
├── bbox1.py
├── cfg
    ├── tiny-yolo-voc.cfg
    ├── yolo-voc.cfg
    ├── yolo.cfg
    └── yolov3.cfg
├── config
    ├── config.py
    ├── eval_config.py
    └── train_config.py
├── darknet.py
├── data
    └── pam.gif
├── dataset
    ├── AVA.py
    ├── AVA_video_OLD.py
    ├── AVA_video_v1.py
    ├── AVA_video_v2.py
    ├── base.py
    ├── coco2017.py
    ├── coco2017_animal.py
    ├── coco2017_car.py
    ├── coco2017_person.py
    ├── voc2007.py
    └── voc2007_cat_dog.py
├── deep
    ├── __init__.py
    ├── checkpoint
    │   └── original_ckpt.t7
    ├── evaluate.py
    ├── feature_extractor.py
    ├── model.py
    ├── original_model.py
    ├── test.py
    ├── train.jpg
    └── train.py
├── deep_sort.py
├── det
    ├── det_dog.jpg
    ├── det_eagle.jpg
    ├── det_giraffe.jpg
    ├── det_herd_of_horses.jpg
    ├── det_img1.jpg
    ├── det_img2.jpg
    ├── det_img3.jpg
    ├── det_img4.jpg
    ├── det_messi.jpg
    ├── det_person.jpg
    └── det_scream.jpg
├── detect.py
├── eval.py
├── evaluator.py
├── extention
    ├── functional.py
    └── lr_scheduler.py
├── f.py
├── functional.py
├── get_ava_performance.py
├── img_to_video.py
├── imgs
    ├── dog.jpg
    ├── eagle.jpg
    ├── giraffe.jpg
    ├── herd_of_horses.jpg
    ├── img1.jpg
    ├── img2.jpg
    ├── img3.jpg
    ├── img4.jpg
    ├── messi.jpg
    ├── person.jpg
    └── scream.jpg
├── imshow_result.py
├── imshow_result_OLD.py
├── infer.py
├── infer_stream.py
├── infer_websocket.py
├── logger.py
├── logs
    ├── events.out.tfevents.1555900792.aiuser-Z390-GAMING-X
    └── events.out.tfevents.1555900949.aiuser-Z390-GAMING-X
├── model.py
├── outputs
    └── frames
    │   └── blank.TXT
├── preprocess.py
├── requirements.txt
├── roi
    ├── pooler.py
    └── pooler_.py
├── rpn
    ├── mkf.py
    └── region_proposal_network.py
├── runs
    ├── Apr15_19-42-07_aiuser-Z390-GAMING-XNet1
    │   └── events.out.tfevents.1555328527.aiuser-Z390-GAMING-X
    ├── Apr15_19-42-31_aiuser-Z390-GAMING-XNet1
    │   └── events.out.tfevents.1555328551.aiuser-Z390-GAMING-X
    ├── Apr15_19-42-47_aiuser-Z390-GAMING-XNet1
    │   └── events.out.tfevents.1555328567.aiuser-Z390-GAMING-X
    ├── Apr15_19-44-13_aiuser-Z390-GAMING-XNet1
    │   └── events.out.tfevents.1555328653.aiuser-Z390-GAMING-X
    ├── Apr15_19-47-03_aiuser-Z390-GAMING-XNet1
    │   └── events.out.tfevents.1555328823.aiuser-Z390-GAMING-X
    ├── Apr15_19-53-21_aiuser-Z390-GAMING-XNet1
    │   └── events.out.tfevents.1555329201.aiuser-Z390-GAMING-X
    ├── Apr15_19-56-51_aiuser-Z390-GAMING-XNet1
    │   └── events.out.tfevents.1555329411.aiuser-Z390-GAMING-X
    ├── Apr15_20-00-31_aiuser-Z390-GAMING-XNet1
    │   └── events.out.tfevents.1555329631.aiuser-Z390-GAMING-X
    └── Apr15_20-12-31_aiuser-Z390-GAMING-XNet1
    │   └── events.out.tfevents.1555330351.aiuser-Z390-GAMING-X
├── scripts
    ├── coco2017
    │   ├── eval.sh
    │   ├── infer.sh
    │   ├── train-bs1.sh
    │   ├── train-bs16.sh
    │   ├── train-bs2.sh
    │   ├── train-bs4.sh
    │   └── train-bs8.sh
    └── voc2007
    │   ├── eval.sh
    │   ├── infer.sh
    │   ├── train-bs1.sh
    │   ├── train-bs16.sh
    │   ├── train-bs2.sh
    │   ├── train-bs4.sh
    │   └── train-bs8.sh
├── slow_fast.ipynb
├── sort
    ├── __init__.py
    ├── detection.py
    ├── iou_matching.py
    ├── kalman_filter.py
    ├── linear_assignment.py
    ├── nn_matching.py
    ├── preprocessing.py
    ├── track.py
    └── tracker.py
├── support
    ├── layer
    │   ├── nms.py
    │   └── roi_align.py
    ├── setup.py
    └── src
    │   ├── ROIAlign.h
    │   ├── cpu
    │       ├── ROIAlign_cpu.cpp
    │       ├── nms_cpu.cpp
    │       └── vision.h
    │   ├── cuda
    │       ├── ROIAlign_cuda.cu
    │       ├── nms.cu
    │       └── vision.h
    │   ├── nms.h
    │   └── vision.cpp
├── test.py
├── test
    └── nms
    │   ├── nms-large-input.npy
    │   ├── nms-large-output.npy
    │   └── test_nms.py
├── test_con.py
├── test_daptice.py
├── test_nms.py
├── tiny-yolo-voc.cfg
├── train.py
├── trainvideo.py
├── util.py
├── video_demo.py
├── voc_eval.py
├── yolo-voc.cfg
├── yolo.cfg
└── yolov3.cfg


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Slow-Fast-pytorch-implementation with Colab notebook
 2 | ![](data/pam.gif)
 3 | 
 4 | # Run the demo on your own data
 5 | 
 6 | 1.Clone the repository: git clone https://github.com/vaib-saxena/Slow-Fast-pytorch-implementation.git
 7 | 
 8 | 2.Download Yolo v3 weights: https://drive.google.com/file/d/1SSpVueL6W_4BE3sFDkzAgdMd35Mtl2N5/view?usp=sharing and paste in the directory
 9 | 
10 | 3.Download DeepSort re-id weights: https://drive.google.com/file/d/1bwLHXS5TocUfDL2-iLNJLs8WfUOZtg9B/view?usp=sharing and paste in deep\checkpoint directory
11 | 
12 | 4.Download Pre-trained SlowFast Network weights: https://drive.google.com/file/d/1ooE-qh7LBL7kWceZRHPyIIBslWCBwdwy/view?usp=sharing and paste in the directory
13 | 
14 | 5.Modify the weights path and your video path in video_demo.py.
15 | 
16 | 6.Run video_demo.py.
17 | 
18 | # Colab notebook
19 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vaib-saxena/Slow-Fast-pytorch-implementation/blob/master/slow_fast.ipynb)
20 | 
21 | 
22 | # Dependencies
23 | - python 3 (python2 not sure)
24 | - numpy
25 | - scipy
26 | - opencv-python
27 | - torch >= 1.0.0
28 | - torchvision = 0.2.1
29 | - youtube-dl
30 | - ffmpeg
31 | 
32 | 
33 | # Reference
34 | - paper: [Slow Fast Networks](https://arxiv.org/pdf/1812.03982.pdf)
35 | 
36 | - code: [facebookresearch/SlowFast](https://github.com/facebookresearch/SlowFast)
37 | 
38 | - https://github.com/MagicChuyi/SlowFast-Network-pytorch
39 | 
40 | - paper: [Simple Online and Realtime Tracking with a Deep Association Metric](https://arxiv.org/abs/1703.07402)
41 | 
42 | - code: [nwojke/deep_sort](https://github.com/nwojke/deep_sort)
43 | 
44 | - paper: [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf)
45 | 
46 | - code: [Joseph Redmon/yolov3](https://pjreddie.com/darknet/yolo/)
47 | 


--------------------------------------------------------------------------------
/TF_logger.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import scipy.misc
 4 | try:
 5 |     from StringIO import StringIO  # Python 2.7
 6 | except ImportError:
 7 |     from io import BytesIO         # Python 3.x
 8 | 
 9 | 
10 | class Logger(object):
11 | 
12 |     def __init__(self, log_dir):
13 |         """Create a summary writer logging to log_dir."""
14 |         # 创建一个指向log文件夹的summary writer
15 |         self.writer = tf.summary.FileWriter(log_dir)
16 | 
17 |     def scalar_summary(self, tag, value, step):
18 |         """Log a scalar variable."""
19 |         # 标量信息 日志
20 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
21 |         self.writer.add_summary(summary, step)
22 | 
23 |     def image_summary(self, tag, images, step):
24 |         """Log a list of images."""
25 |         # 图像信息 日志
26 |         img_summaries = []
27 |         for i, img in enumerate(images):
28 |             # Write the image to a string
29 |             try:
30 |                 s = StringIO()
31 |             except:
32 |                 s = BytesIO()
33 |             scipy.misc.toimage(img).save(s, format="png")
34 | 
35 |             # Create an Image object
36 |             img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
37 |                                        height=img.shape[0],
38 |                                        width=img.shape[1])
39 |             # Create a Summary value
40 |             img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
41 | 
42 |         # Create and write Summary
43 |         summary = tf.Summary(value=img_summaries)
44 |         self.writer.add_summary(summary, step)
45 | 
46 |     def histo_summary(self, tag, values, step, bins=1000):
47 |         """Log a histogram of the tensor of values."""
48 |         # 直方图信息 日志
49 |         # Create a histogram using numpy
50 |         counts, bin_edges = np.histogram(values, bins=bins)
51 | 
52 |         # Fill the fields of the histogram proto
53 |         hist = tf.HistogramProto()
54 |         hist.min = float(np.min(values))
55 |         hist.max = float(np.max(values))
56 |         hist.num = int(np.prod(values.shape))
57 |         hist.sum = float(np.sum(values))
58 |         hist.sum_squares = float(np.sum(values ** 2))
59 | 
60 |         # Drop the start of the first bin
61 |         bin_edges = bin_edges[1:]
62 | 
63 |         # Add bin edges and counts
64 |         for edge in bin_edges:
65 |             hist.bucket_limit.append(edge)
66 |         for c in counts:
67 |             hist.bucket.append(c)
68 | 
69 |         # Create and write Summary
70 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
71 |         self.writer.add_summary(summary, step)
72 |         self.writer.flush()


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/__init__.py


--------------------------------------------------------------------------------
/ava/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/ava/__init__.py


--------------------------------------------------------------------------------
/ava/ava_action_list_v2.0.csv:
--------------------------------------------------------------------------------
 1 | label_id,label_name,label_type
 2 | 1,bend/bow (at the waist),PERSON_MOVEMENT
 3 | 2,crawl,PERSON_MOVEMENT
 4 | 3,crouch/kneel,PERSON_MOVEMENT
 5 | 4,dance,PERSON_MOVEMENT
 6 | 5,fall down,PERSON_MOVEMENT
 7 | 6,get up,PERSON_MOVEMENT
 8 | 7,jump/leap,PERSON_MOVEMENT
 9 | 8,lie/sleep,PERSON_MOVEMENT
10 | 9,martial art,PERSON_MOVEMENT
11 | 10,run/jog,PERSON_MOVEMENT
12 | 11,sit,PERSON_MOVEMENT
13 | 12,stand,PERSON_MOVEMENT
14 | 13,swim,PERSON_MOVEMENT
15 | 14,walk,PERSON_MOVEMENT
16 | 15,answer phone,OBJECT_MANIPULATION
17 | 16,brush teeth,OBJECT_MANIPULATION
18 | 17,carry/hold (an object),OBJECT_MANIPULATION
19 | 18,catch (an object),OBJECT_MANIPULATION
20 | 19,chop,OBJECT_MANIPULATION
21 | 20,"climb (e.g., a mountain)",OBJECT_MANIPULATION
22 | 21,clink glass,OBJECT_MANIPULATION
23 | 22,"close (e.g., a door, a box)",OBJECT_MANIPULATION
24 | 23,cook,OBJECT_MANIPULATION
25 | 24,cut,OBJECT_MANIPULATION
26 | 25,dig,OBJECT_MANIPULATION
27 | 26,dress/put on clothing,OBJECT_MANIPULATION
28 | 27,drink,OBJECT_MANIPULATION
29 | 28,"drive (e.g., a car, a truck)",OBJECT_MANIPULATION
30 | 29,eat,OBJECT_MANIPULATION
31 | 30,enter,OBJECT_MANIPULATION
32 | 31,exit,OBJECT_MANIPULATION
33 | 32,extract,OBJECT_MANIPULATION
34 | 33,fishing,OBJECT_MANIPULATION
35 | 34,hit (an object),OBJECT_MANIPULATION
36 | 35,kick (an object),OBJECT_MANIPULATION
37 | 36,lift/pick up,OBJECT_MANIPULATION
38 | 37,"listen (e.g., to music)",OBJECT_MANIPULATION
39 | 38,"open (e.g., a window, a car door)",OBJECT_MANIPULATION
40 | 39,paint,OBJECT_MANIPULATION
41 | 40,play board game,OBJECT_MANIPULATION
42 | 41,play musical instrument,OBJECT_MANIPULATION
43 | 42,play with pets,OBJECT_MANIPULATION
44 | 43,point to (an object),OBJECT_MANIPULATION
45 | 44,press,OBJECT_MANIPULATION
46 | 45,pull (an object),OBJECT_MANIPULATION
47 | 46,push (an object),OBJECT_MANIPULATION
48 | 47,put down,OBJECT_MANIPULATION
49 | 48,read,OBJECT_MANIPULATION
50 | 49,"ride (e.g., a bike, a car, a horse)",OBJECT_MANIPULATION
51 | 50,row boat,OBJECT_MANIPULATION
52 | 51,sail boat,OBJECT_MANIPULATION
53 | 52,shoot,OBJECT_MANIPULATION
54 | 53,shovel,OBJECT_MANIPULATION
55 | 54,smoke,OBJECT_MANIPULATION
56 | 55,stir,OBJECT_MANIPULATION
57 | 56,take a photo,OBJECT_MANIPULATION
58 | 57,text on/look at a cellphone,OBJECT_MANIPULATION
59 | 58,throw,OBJECT_MANIPULATION
60 | 59,touch (an object),OBJECT_MANIPULATION
61 | 60,"turn (e.g., a screwdriver)",OBJECT_MANIPULATION
62 | 61,"watch (e.g., TV)",OBJECT_MANIPULATION
63 | 62,work on a computer,OBJECT_MANIPULATION
64 | 63,write,OBJECT_MANIPULATION
65 | 64,fight/hit (a person),PERSON_INTERACTION
66 | 65,give/serve (an object) to (a person),PERSON_INTERACTION
67 | 66,grab (a person),PERSON_INTERACTION
68 | 67,hand clap,PERSON_INTERACTION
69 | 68,hand shake,PERSON_INTERACTION
70 | 69,hand wave,PERSON_INTERACTION
71 | 70,hug (a person),PERSON_INTERACTION
72 | 71,kick (a person),PERSON_INTERACTION
73 | 72,kiss (a person),PERSON_INTERACTION
74 | 73,lift (a person),PERSON_INTERACTION
75 | 74,listen to (a person),PERSON_INTERACTION
76 | 75,play with kids,PERSON_INTERACTION
77 | 76,push (another person),PERSON_INTERACTION
78 | 77,"sing to (e.g., self, a person, a group)",PERSON_INTERACTION
79 | 78,take (an object) from (a person),PERSON_INTERACTION
80 | 79,"talk to (e.g., self, a person, a group)",PERSON_INTERACTION
81 | 80,watch (a person),PERSON_INTERACTION
82 | 


--------------------------------------------------------------------------------
/ava/ava_action_list_v2.1_for_activitynet_2018.pbtxt.txt:
--------------------------------------------------------------------------------
  1 | item {
  2 |   name: "bend/bow (at the waist)"
  3 |   id: 1
  4 | }
  5 | item {
  6 |   name: "crouch/kneel"
  7 |   id: 3
  8 | }
  9 | item {
 10 |   name: "dance"
 11 |   id: 4
 12 | }
 13 | item {
 14 |   name: "fall down"
 15 |   id: 5
 16 | }
 17 | item {
 18 |   name: "get up"
 19 |   id: 6
 20 | }
 21 | item {
 22 |   name: "jump/leap"
 23 |   id: 7
 24 | }
 25 | item {
 26 |   name: "lie/sleep"
 27 |   id: 8
 28 | }
 29 | item {
 30 |   name: "martial art"
 31 |   id: 9
 32 | }
 33 | item {
 34 |   name: "run/jog"
 35 |   id: 10
 36 | }
 37 | item {
 38 |   name: "sit"
 39 |   id: 11
 40 | }
 41 | item {
 42 |   name: "stand"
 43 |   id: 12
 44 | }
 45 | item {
 46 |   name: "swim"
 47 |   id: 13
 48 | }
 49 | item {
 50 |   name: "walk"
 51 |   id: 14
 52 | }
 53 | item {
 54 |   name: "answer phone"
 55 |   id: 15
 56 | }
 57 | item {
 58 |   name: "carry/hold (an object)"
 59 |   id: 17
 60 | }
 61 | item {
 62 |   name: "climb (e.g., a mountain)"
 63 |   id: 20
 64 | }
 65 | item {
 66 |   name: "close (e.g., a door, a box)"
 67 |   id: 22
 68 | }
 69 | item {
 70 |   name: "cut"
 71 |   id: 24
 72 | }
 73 | item {
 74 |   name: "dress/put on clothing"
 75 |   id: 26
 76 | }
 77 | item {
 78 |   name: "drink"
 79 |   id: 27
 80 | }
 81 | item {
 82 |   name: "drive (e.g., a car, a truck)"
 83 |   id: 28
 84 | }
 85 | item {
 86 |   name: "eat"
 87 |   id: 29
 88 | }
 89 | item {
 90 |   name: "enter"
 91 |   id: 30
 92 | }
 93 | item {
 94 |   name: "hit (an object)"
 95 |   id: 34
 96 | }
 97 | item {
 98 |   name: "lift/pick up"
 99 |   id: 36
100 | }
101 | item {
102 |   name: "listen (e.g., to music)"
103 |   id: 37
104 | }
105 | item {
106 |   name: "open (e.g., a window, a car door)"
107 |   id: 38
108 | }
109 | item {
110 |   name: "play musical instrument"
111 |   id: 41
112 | }
113 | item {
114 |   name: "point to (an object)"
115 |   id: 43
116 | }
117 | item {
118 |   name: "pull (an object)"
119 |   id: 45
120 | }
121 | item {
122 |   name: "push (an object)"
123 |   id: 46
124 | }
125 | item {
126 |   name: "put down"
127 |   id: 47
128 | }
129 | item {
130 |   name: "read"
131 |   id: 48
132 | }
133 | item {
134 |   name: "ride (e.g., a bike, a car, a horse)"
135 |   id: 49
136 | }
137 | item {
138 |   name: "sail boat"
139 |   id: 51
140 | }
141 | item {
142 |   name: "shoot"
143 |   id: 52
144 | }
145 | item {
146 |   name: "smoke"
147 |   id: 54
148 | }
149 | item {
150 |   name: "take a photo"
151 |   id: 56
152 | }
153 | item {
154 |   name: "text on/look at a cellphone"
155 |   id: 57
156 | }
157 | item {
158 |   name: "throw"
159 |   id: 58
160 | }
161 | item {
162 |   name: "touch (an object)"
163 |   id: 59
164 | }
165 | item {
166 |   name: "turn (e.g., a screwdriver)"
167 |   id: 60
168 | }
169 | item {
170 |   name: "watch (e.g., TV)"
171 |   id: 61
172 | }
173 | item {
174 |   name: "work on a computer"
175 |   id: 62
176 | }
177 | item {
178 |   name: "write"
179 |   id: 63
180 | }
181 | item {
182 |   name: "fight/hit (a person)"
183 |   id: 64
184 | }
185 | item {
186 |   name: "give/serve (an object) to (a person)"
187 |   id: 65
188 | }
189 | item {
190 |   name: "grab (a person)"
191 |   id: 66
192 | }
193 | item {
194 |   name: "hand clap"
195 |   id: 67
196 | }
197 | item {
198 |   name: "hand shake"
199 |   id: 68
200 | }
201 | item {
202 |   name: "hand wave"
203 |   id: 69
204 | }
205 | item {
206 |   name: "hug (a person)"
207 |   id: 70
208 | }
209 | item {
210 |   name: "kiss (a person)"
211 |   id: 72
212 | }
213 | item {
214 |   name: "lift (a person)"
215 |   id: 73
216 | }
217 | item {
218 |   name: "listen to (a person)"
219 |   id: 74
220 | }
221 | item {
222 |   name: "push (another person)"
223 |   id: 76
224 | }
225 | item {
226 |   name: "sing to (e.g., self, a person, a group)"
227 |   id: 77
228 | }
229 | item {
230 |   name: "take (an object) from (a person)"
231 |   id: 78
232 | }
233 | item {
234 |   name: "talk to (e.g., self, a person, a group)"
235 |   id: 79
236 | }
237 | item {
238 |   name: "watch (a person)"
239 |   id: 80
240 | }
241 | 


--------------------------------------------------------------------------------
/ava/label_map_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Label map utility functions."""
 16 | 
 17 | import logging
 18 | 
 19 | # from google.protobuf import text_format
 20 | # from google3.third_party.tensorflow_models.object_detection.protos import string_int_label_map_pb2
 21 | 
 22 | 
 23 | def _validate_label_map(label_map):
 24 |   """Checks if a label map is valid.
 25 | 
 26 |   Args:
 27 |     label_map: StringIntLabelMap to validate.
 28 | 
 29 |   Raises:
 30 |     ValueError: if label map is invalid.
 31 |   """
 32 |   for item in label_map.item:
 33 |     if item.id < 1:
 34 |       raise ValueError('Label map ids should be >= 1.')
 35 | 
 36 | 
 37 | def create_category_index(categories):
 38 |   """Creates dictionary of COCO compatible categories keyed by category id.
 39 | 
 40 |   Args:
 41 |     categories: a list of dicts, each of which has the following keys:
 42 |       'id': (required) an integer id uniquely identifying this category.
 43 |       'name': (required) string representing category name
 44 |         e.g., 'cat', 'dog', 'pizza'.
 45 | 
 46 |   Returns:
 47 |     category_index: a dict containing the same entries as categories, but keyed
 48 |       by the 'id' field of each category.
 49 |   """
 50 |   category_index = {}
 51 |   for cat in categories:
 52 |     category_index[cat['id']] = cat
 53 |   return category_index
 54 | 
 55 | 
 56 | def get_max_label_map_index(label_map):
 57 |   """Get maximum index in label map.
 58 | 
 59 |   Args:
 60 |     label_map: a StringIntLabelMapProto
 61 | 
 62 |   Returns:
 63 |     an integer
 64 |   """
 65 |   return max([item.id for item in label_map.item])
 66 | 
 67 | 
 68 | def convert_label_map_to_categories(label_map,
 69 |                                     max_num_classes,
 70 |                                     use_display_name=True):
 71 |   """Loads label map proto and returns categories list compatible with eval.
 72 | 
 73 |   This function loads a label map and returns a list of dicts, each of which
 74 |   has the following keys:
 75 |     'id': (required) an integer id uniquely identifying this category.
 76 |     'name': (required) string representing category name
 77 |       e.g., 'cat', 'dog', 'pizza'.
 78 |   We only allow class into the list if its id-label_id_offset is
 79 |   between 0 (inclusive) and max_num_classes (exclusive).
 80 |   If there are several items mapping to the same id in the label map,
 81 |   we will only keep the first one in the categories list.
 82 | 
 83 |   Args:
 84 |     label_map: a StringIntLabelMapProto or None.  If None, a default categories
 85 |       list is created with max_num_classes categories.
 86 |     max_num_classes: maximum number of (consecutive) label indices to include.
 87 |     use_display_name: (boolean) choose whether to load 'display_name' field
 88 |       as category name.  If False or if the display_name field does not exist,
 89 |       uses 'name' field as category names instead.
 90 |   Returns:
 91 |     categories: a list of dictionaries representing all possible categories.
 92 |   """
 93 |   categories = []
 94 |   list_of_ids_already_added = []
 95 |   if not label_map:
 96 |     label_id_offset = 1
 97 |     for class_id in range(max_num_classes):
 98 |       categories.append({
 99 |           'id': class_id + label_id_offset,
100 |           'name': 'category_{}'.format(class_id + label_id_offset)
101 |       })
102 |     return categories
103 |   for item in label_map.item:
104 |     if not 0 < item.id <= max_num_classes:
105 |       logging.info('Ignore item %d since it falls outside of requested '
106 |                    'label range.', item.id)
107 |       continue
108 |     if use_display_name and item.HasField('display_name'):
109 |       name = item.display_name
110 |     else:
111 |       name = item.name
112 |     if item.id not in list_of_ids_already_added:
113 |       list_of_ids_already_added.append(item.id)
114 |       categories.append({'id': item.id, 'name': name})
115 |   return categories
116 | 
117 | 
118 | def load_labelmap(path):
119 |   """Loads label map proto.
120 | 
121 |   Args:
122 |     path: path to StringIntLabelMap proto text file.
123 |   Returns:
124 |     a StringIntLabelMapProto
125 |   """
126 |   with open(path, 'r') as fid:
127 |     label_map_string = fid.read()
128 |     label_map = string_int_label_map_pb2.StringIntLabelMap()
129 |     try:
130 |       text_format.Merge(label_map_string, label_map)
131 |     except text_format.ParseError:
132 |       label_map.ParseFromString(label_map_string)
133 |   _validate_label_map(label_map)
134 |   return label_map
135 | 
136 | 
137 | def get_label_map_dict(label_map_path, use_display_name=False):
138 |   """Reads a label map and returns a dictionary of label names to id.
139 | 
140 |   Args:
141 |     label_map_path: path to label_map.
142 |     use_display_name: whether to use the label map items' display names as keys.
143 | 
144 |   Returns:
145 |     A dictionary mapping label names to id.
146 |   """
147 |   label_map = load_labelmap(label_map_path)
148 |   label_map_dict = {}
149 |   for item in label_map.item:
150 |     if use_display_name:
151 |       label_map_dict[item.display_name] = item.id
152 |     else:
153 |       label_map_dict[item.name] = item.id
154 |   return label_map_dict
155 | 
156 | 
157 | def create_category_index_from_labelmap(label_map_path):
158 |   """Reads a label map and returns a category index.
159 | 
160 |   Args:
161 |     label_map_path: Path to `StringIntLabelMap` proto text file.
162 | 
163 |   Returns:
164 |     A category index, which is a dictionary that maps integer ids to dicts
165 |     containing categories, e.g.
166 |     {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...}
167 |   """
168 |   label_map = load_labelmap(label_map_path)
169 |   max_num_classes = max(item.id for item in label_map.item)
170 |   categories = convert_label_map_to_categories(label_map, max_num_classes)
171 |   return create_category_index(categories)
172 | 
173 | 
174 | def create_class_agnostic_category_index():
175 |   """Creates a category index with a single `object` class."""
176 |   return {1: {'id': 1, 'name': 'object'}}
177 | 


--------------------------------------------------------------------------------
/ava/metrics.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Functions for computing metrics like precision, recall, CorLoc and etc."""
 17 | from __future__ import division
 18 | 
 19 | import numpy as np
 20 | 
 21 | 
 22 | def compute_precision_recall(scores, labels, num_gt):
 23 |   """Compute precision and recall.
 24 | 
 25 |   Args:
 26 |     scores: A float numpy array representing detection score
 27 |     labels: A boolean numpy array representing true/false positive labels
 28 |     num_gt: Number of ground truth instances
 29 | 
 30 |   Raises:
 31 |     ValueError: if the input is not of the correct format
 32 | 
 33 |   Returns:
 34 |     precision: Fraction of positive instances over detected ones. This value is
 35 |       None if no ground truth labels are present.
 36 |     recall: Fraction of detected positive instance over all positive instances.
 37 |       This value is None if no ground truth labels are present.
 38 | 
 39 |   """
 40 |   if not isinstance(
 41 |       labels, np.ndarray) or labels.dtype != np.bool or len(labels.shape) != 1:
 42 |     raise ValueError("labels must be single dimension bool numpy array")
 43 | 
 44 |   if not isinstance(
 45 |       scores, np.ndarray) or len(scores.shape) != 1:
 46 |     raise ValueError("scores must be single dimension numpy array")
 47 | 
 48 |   if num_gt < np.sum(labels):
 49 |     raise ValueError("Number of true positives must be smaller than num_gt.")
 50 | 
 51 |   if len(scores) != len(labels):
 52 |     raise ValueError("scores and labels must be of the same size.")
 53 | 
 54 |   if num_gt == 0:
 55 |     return None, None
 56 | 
 57 |   sorted_indices = np.argsort(scores)
 58 |   sorted_indices = sorted_indices[::-1]
 59 |   labels = labels.astype(int)
 60 |   true_positive_labels = labels[sorted_indices]
 61 |   false_positive_labels = 1 - true_positive_labels
 62 |   cum_true_positives = np.cumsum(true_positive_labels)
 63 |   cum_false_positives = np.cumsum(false_positive_labels)
 64 |   precision = cum_true_positives.astype(float) / (
 65 |       cum_true_positives + cum_false_positives)
 66 |   recall = cum_true_positives.astype(float) / num_gt
 67 |   return precision, recall
 68 | 
 69 | 
 70 | def compute_average_precision(precision, recall):
 71 |   """Compute Average Precision according to the definition in VOCdevkit.
 72 | 
 73 |   Precision is modified to ensure that it does not decrease as recall
 74 |   decrease.
 75 | 
 76 |   Args:
 77 |     precision: A float [N, 1] numpy array of precisions
 78 |     recall: A float [N, 1] numpy array of recalls
 79 | 
 80 |   Raises:
 81 |     ValueError: if the input is not of the correct format
 82 | 
 83 |   Returns:
 84 |     average_precison: The area under the precision recall curve. NaN if
 85 |       precision and recall are None.
 86 | 
 87 |   """
 88 |   if precision is None:
 89 |     if recall is not None:
 90 |       raise ValueError("If precision is None, recall must also be None")
 91 |     return np.NAN
 92 | 
 93 |   if not isinstance(precision, np.ndarray) or not isinstance(recall,
 94 |                                                              np.ndarray):
 95 |     raise ValueError("precision and recall must be numpy array")
 96 |   if precision.dtype != np.float or recall.dtype != np.float:
 97 |     raise ValueError("input must be float numpy array.")
 98 |   if len(precision) != len(recall):
 99 |     raise ValueError("precision and recall must be of the same size.")
100 |   if not precision.size:
101 |     return 0.0
102 |   if np.amin(precision) < 0 or np.amax(precision) > 1:
103 |     raise ValueError("Precision must be in the range of [0, 1].")
104 |   if np.amin(recall) < 0 or np.amax(recall) > 1:
105 |     raise ValueError("recall must be in the range of [0, 1].")
106 |   if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)):
107 |     raise ValueError("recall must be a non-decreasing array")
108 | 
109 |   recall = np.concatenate([[0], recall, [1]])
110 |   precision = np.concatenate([[0], precision, [0]])
111 | 
112 |   # Preprocess precision to be a non-decreasing array
113 |   for i in range(len(precision) - 2, -1, -1):
114 |     precision[i] = np.maximum(precision[i], precision[i + 1])
115 | 
116 |   indices = np.where(recall[1:] != recall[:-1])[0] + 1
117 |   average_precision = np.sum(
118 |       (recall[indices] - recall[indices - 1]) * precision[indices])
119 |   return average_precision
120 | 
121 | 
122 | def compute_cor_loc(num_gt_imgs_per_class,
123 |                     num_images_correctly_detected_per_class):
124 |   """Compute CorLoc according to the definition in the following paper.
125 | 
126 |   https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf
127 | 
128 |   Returns nans if there are no ground truth images for a class.
129 | 
130 |   Args:
131 |     num_gt_imgs_per_class: 1D array, representing number of images containing
132 |         at least one object instance of a particular class
133 |     num_images_correctly_detected_per_class: 1D array, representing number of
134 |         images that are correctly detected at least one object instance of a
135 |         particular class
136 | 
137 |   Returns:
138 |     corloc_per_class: A float numpy array represents the corloc score of each
139 |       class
140 |   """
141 |   # Divide by zero expected for classes with no gt examples.
142 |   with np.errstate(divide="ignore", invalid="ignore"):
143 |     return np.where(
144 |         num_gt_imgs_per_class == 0, np.nan,
145 |         num_images_correctly_detected_per_class / num_gt_imgs_per_class)
146 | 


--------------------------------------------------------------------------------
/ava/np_box_list.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Numpy BoxList classes and functions."""
 17 | 
 18 | import numpy as np
 19 | 
 20 | 
 21 | class BoxList(object):
 22 |   """Box collection.
 23 | 
 24 |   BoxList represents a list of bounding boxes as numpy array, where each
 25 |   bounding box is represented as a row of 4 numbers,
 26 |   [y_min, x_min, y_max, x_max].  It is assumed that all bounding boxes within a
 27 |   given list correspond to a single image.
 28 | 
 29 |   Optionally, users can add additional related fields (such as
 30 |   objectness/classification scores).
 31 |   """
 32 | 
 33 |   def __init__(self, data):
 34 |     """Constructs box collection.
 35 | 
 36 |     Args:
 37 |       data: a numpy array of shape [N, 4] representing box coordinates
 38 | 
 39 |     Raises:
 40 |       ValueError: if bbox data is not a numpy array
 41 |       ValueError: if invalid dimensions for bbox data
 42 |     """
 43 |     if not isinstance(data, np.ndarray):
 44 |       raise ValueError('data must be a numpy array.')
 45 |     if len(data.shape) != 2 or data.shape[1] != 4:
 46 |       raise ValueError('Invalid dimensions for box data.')
 47 |     if data.dtype != np.float32 and data.dtype != np.float64:
 48 |       raise ValueError('Invalid data type for box data: float is required.')
 49 |     if not self._is_valid_boxes(data):
 50 |       raise ValueError('Invalid box data. data must be a numpy array of '
 51 |                        'N*[y_min, x_min, y_max, x_max]')
 52 |     self.data = {'boxes': data}
 53 | 
 54 |   def num_boxes(self):
 55 |     """Return number of boxes held in collections."""
 56 |     return self.data['boxes'].shape[0]
 57 | 
 58 |   def get_extra_fields(self):
 59 |     """Return all non-box fields."""
 60 |     return [k for k in self.data.keys() if k != 'boxes']
 61 | 
 62 |   def has_field(self, field):
 63 |     return field in self.data
 64 | 
 65 |   def add_field(self, field, field_data):
 66 |     """Add data to a specified field.
 67 | 
 68 |     Args:
 69 |       field: a string parameter used to speficy a related field to be accessed.
 70 |       field_data: a numpy array of [N, ...] representing the data associated
 71 |           with the field.
 72 |     Raises:
 73 |       ValueError: if the field is already exist or the dimension of the field
 74 |           data does not matches the number of boxes.
 75 |     """
 76 |     if self.has_field(field):
 77 |       raise ValueError('Field ' + field + 'already exists')
 78 |     if len(field_data.shape) < 1 or field_data.shape[0] != self.num_boxes():
 79 |       raise ValueError('Invalid dimensions for field data')
 80 |     self.data[field] = field_data
 81 | 
 82 |   def get(self):
 83 |     """Convenience function for accesssing box coordinates.
 84 | 
 85 |     Returns:
 86 |       a numpy array of shape [N, 4] representing box corners
 87 |     """
 88 |     return self.get_field('boxes')
 89 | 
 90 |   def get_field(self, field):
 91 |     """Accesses data associated with the specified field in the box collection.
 92 | 
 93 |     Args:
 94 |       field: a string parameter used to speficy a related field to be accessed.
 95 | 
 96 |     Returns:
 97 |       a numpy 1-d array representing data of an associated field
 98 | 
 99 |     Raises:
100 |       ValueError: if invalid field
101 |     """
102 |     if not self.has_field(field):
103 |       raise ValueError('field {} does not exist'.format(field))
104 |     return self.data[field]
105 | 
106 |   def get_coordinates(self):
107 |     """Get corner coordinates of boxes.
108 | 
109 |     Returns:
110 |      a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max]
111 |     """
112 |     box_coordinates = self.get()
113 |     y_min = box_coordinates[:, 0]
114 |     x_min = box_coordinates[:, 1]
115 |     y_max = box_coordinates[:, 2]
116 |     x_max = box_coordinates[:, 3]
117 |     return [y_min, x_min, y_max, x_max]
118 | 
119 |   def _is_valid_boxes(self, data):
120 |     """Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin].
121 | 
122 |     Args:
123 |       data: a numpy array of shape [N, 4] representing box coordinates
124 | 
125 |     Returns:
126 |       a boolean indicating whether all ymax of boxes are equal or greater than
127 |           ymin, and all xmax of boxes are equal or greater than xmin.
128 |     """
129 |     if data.shape[0] > 0:
130 |       for i in range(data.shape[0]):
131 |         if data[i, 0] > data[i, 2] or data[i, 1] > data[i, 3]:
132 |           return False
133 |     return True
134 | 


--------------------------------------------------------------------------------
/ava/np_box_mask_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Numpy BoxMaskList classes and functions."""
17 | 
18 | import numpy as np
19 | import ava.np_box_list as np_box_list
20 | 
21 | 
22 | class BoxMaskList(np_box_list.BoxList):
23 |   """Convenience wrapper for BoxList with masks.
24 | 
25 |   BoxMaskList extends the np_box_list.BoxList to contain masks as well.
26 |   In particular, its constructor receives both boxes and masks. Note that the
27 |   masks correspond to the full image.
28 |   """
29 | 
30 |   def __init__(self, box_data, mask_data):
31 |     """Constructs box collection.
32 | 
33 |     Args:
34 |       box_data: a numpy array of shape [N, 4] representing box coordinates
35 |       mask_data: a numpy array of shape [N, height, width] representing masks
36 |         with values are in {0,1}. The masks correspond to the full
37 |         image. The height and the width will be equal to image height and width.
38 | 
39 |     Raises:
40 |       ValueError: if bbox data is not a numpy array
41 |       ValueError: if invalid dimensions for bbox data
42 |       ValueError: if mask data is not a numpy array
43 |       ValueError: if invalid dimension for mask data
44 |     """
45 |     super(BoxMaskList, self).__init__(box_data)
46 |     if not isinstance(mask_data, np.ndarray):
47 |       raise ValueError('Mask data must be a numpy array.')
48 |     if len(mask_data.shape) != 3:
49 |       raise ValueError('Invalid dimensions for mask data.')
50 |     if mask_data.dtype != np.uint8:
51 |       raise ValueError('Invalid data type for mask data: uint8 is required.')
52 |     if mask_data.shape[0] != box_data.shape[0]:
53 |       raise ValueError('There should be the same number of boxes and masks.')
54 |     self.data['masks'] = mask_data
55 | 
56 |   def get_masks(self):
57 |     """Convenience function for accessing masks.
58 | 
59 |     Returns:
60 |       a numpy array of shape [N, height, width] representing masks
61 |     """
62 |     return self.get_field('masks')
63 | 
64 | 


--------------------------------------------------------------------------------
/ava/np_box_ops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Operations for [N, 4] numpy arrays representing bounding boxes.
17 | 
18 | Example box operations that are supported:
19 |   * Areas: compute bounding box areas
20 |   * IOU: pairwise intersection-over-union scores
21 | """
22 | import numpy as np
23 | 
24 | 
25 | def area(boxes):
26 |   """Computes area of boxes.
27 | 
28 |   Args:
29 |     boxes: Numpy array with shape [N, 4] holding N boxes
30 | 
31 |   Returns:
32 |     a numpy array with shape [N*1] representing box areas
33 |   """
34 |   return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
35 | 
36 | 
37 | def intersection(boxes1, boxes2):
38 |   """Compute pairwise intersection areas between boxes.
39 | 
40 |   Args:
41 |     boxes1: a numpy array with shape [N, 4] holding N boxes
42 |     boxes2: a numpy array with shape [M, 4] holding M boxes
43 | 
44 |   Returns:
45 |     a numpy array with shape [N*M] representing pairwise intersection area
46 |   """
47 |   [y_min1, x_min1, y_max1, x_max1] = np.split(boxes1, 4, axis=1)
48 |   [y_min2, x_min2, y_max2, x_max2] = np.split(boxes2, 4, axis=1)
49 | 
50 |   all_pairs_min_ymax = np.minimum(y_max1, np.transpose(y_max2))
51 |   all_pairs_max_ymin = np.maximum(y_min1, np.transpose(y_min2))
52 |   intersect_heights = np.maximum(
53 |       np.zeros(all_pairs_max_ymin.shape),
54 |       all_pairs_min_ymax - all_pairs_max_ymin)
55 |   all_pairs_min_xmax = np.minimum(x_max1, np.transpose(x_max2))
56 |   all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
57 |   intersect_widths = np.maximum(
58 |       np.zeros(all_pairs_max_xmin.shape),
59 |       all_pairs_min_xmax - all_pairs_max_xmin)
60 |   return intersect_heights * intersect_widths
61 | 
62 | 
63 | def iou(boxes1, boxes2):
64 |   """Computes pairwise intersection-over-union between box collections.
65 | 
66 |   Args:
67 |     boxes1: a numpy array with shape [N, 4] holding N boxes.
68 |     boxes2: a numpy array with shape [M, 4] holding N boxes.
69 | 
70 |   Returns:
71 |     a numpy array with shape [N, M] representing pairwise iou scores.
72 |   """
73 |   intersect = intersection(boxes1, boxes2)
74 |   area1 = area(boxes1)
75 |   area2 = area(boxes2)
76 |   union = np.expand_dims(area1, axis=1) + np.expand_dims(
77 |       area2, axis=0) - intersect
78 |   return intersect / union
79 | 
80 | 
81 | def ioa(boxes1, boxes2):
82 |   """Computes pairwise intersection-over-area between box collections.
83 | 
84 |   Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
85 |   their intersection area over box2's area. Note that ioa is not symmetric,
86 |   that is, IOA(box1, box2) != IOA(box2, box1).
87 | 
88 |   Args:
89 |     boxes1: a numpy array with shape [N, 4] holding N boxes.
90 |     boxes2: a numpy array with shape [M, 4] holding N boxes.
91 | 
92 |   Returns:
93 |     a numpy array with shape [N, M] representing pairwise ioa scores.
94 |   """
95 |   intersect = intersection(boxes1, boxes2)
96 |   areas = np.expand_dims(area(boxes2), axis=0)
97 |   return intersect / areas
98 | 


--------------------------------------------------------------------------------
/ava/np_mask_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Operations for [N, height, width] numpy arrays representing masks.
 17 | 
 18 | Example mask operations that are supported:
 19 |   * Areas: compute mask areas
 20 |   * IOU: pairwise intersection-over-union scores
 21 | """
 22 | import numpy as np
 23 | 
 24 | EPSILON = 1e-7
 25 | 
 26 | 
 27 | def area(masks):
 28 |   """Computes area of masks.
 29 | 
 30 |   Args:
 31 |     masks: Numpy array with shape [N, height, width] holding N masks. Masks
 32 |       values are of type np.uint8 and values are in {0,1}.
 33 | 
 34 |   Returns:
 35 |     a numpy array with shape [N*1] representing mask areas.
 36 | 
 37 |   Raises:
 38 |     ValueError: If masks.dtype is not np.uint8
 39 |   """
 40 |   if masks.dtype != np.uint8:
 41 |     raise ValueError('Masks type should be np.uint8')
 42 |   return np.sum(masks, axis=(1, 2), dtype=np.float32)
 43 | 
 44 | 
 45 | def intersection(masks1, masks2):
 46 |   """Compute pairwise intersection areas between masks.
 47 | 
 48 |   Args:
 49 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
 50 |       values are of type np.uint8 and values are in {0,1}.
 51 |     masks2: a numpy array with shape [M, height, width] holding M masks. Masks
 52 |       values are of type np.uint8 and values are in {0,1}.
 53 | 
 54 |   Returns:
 55 |     a numpy array with shape [N*M] representing pairwise intersection area.
 56 | 
 57 |   Raises:
 58 |     ValueError: If masks1 and masks2 are not of type np.uint8.
 59 |   """
 60 |   if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
 61 |     raise ValueError('masks1 and masks2 should be of type np.uint8')
 62 |   n = masks1.shape[0]
 63 |   m = masks2.shape[0]
 64 |   answer = np.zeros([n, m], dtype=np.float32)
 65 |   for i in np.arange(n):
 66 |     for j in np.arange(m):
 67 |       answer[i, j] = np.sum(np.minimum(masks1[i], masks2[j]), dtype=np.float32)
 68 |   return answer
 69 | 
 70 | 
 71 | def iou(masks1, masks2):
 72 |   """Computes pairwise intersection-over-union between mask collections.
 73 | 
 74 |   Args:
 75 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
 76 |       values are of type np.uint8 and values are in {0,1}.
 77 |     masks2: a numpy array with shape [M, height, width] holding N masks. Masks
 78 |       values are of type np.uint8 and values are in {0,1}.
 79 | 
 80 |   Returns:
 81 |     a numpy array with shape [N, M] representing pairwise iou scores.
 82 | 
 83 |   Raises:
 84 |     ValueError: If masks1 and masks2 are not of type np.uint8.
 85 |   """
 86 |   if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
 87 |     raise ValueError('masks1 and masks2 should be of type np.uint8')
 88 |   intersect = intersection(masks1, masks2)
 89 |   area1 = area(masks1)
 90 |   area2 = area(masks2)
 91 |   union = np.expand_dims(area1, axis=1) + np.expand_dims(
 92 |       area2, axis=0) - intersect
 93 |   return intersect / np.maximum(union, EPSILON)
 94 | 
 95 | 
 96 | def ioa(masks1, masks2):
 97 |   """Computes pairwise intersection-over-area between box collections.
 98 | 
 99 |   Intersection-over-area (ioa) between two masks, mask1 and mask2 is defined as
100 |   their intersection area over mask2's area. Note that ioa is not symmetric,
101 |   that is, IOA(mask1, mask2) != IOA(mask2, mask1).
102 | 
103 |   Args:
104 |     masks1: a numpy array with shape [N, height, width] holding N masks. Masks
105 |       values are of type np.uint8 and values are in {0,1}.
106 |     masks2: a numpy array with shape [M, height, width] holding N masks. Masks
107 |       values are of type np.uint8 and values are in {0,1}.
108 | 
109 |   Returns:
110 |     a numpy array with shape [N, M] representing pairwise ioa scores.
111 | 
112 |   Raises:
113 |     ValueError: If masks1 and masks2 are not of type np.uint8.
114 |   """
115 |   if masks1.dtype != np.uint8 or masks2.dtype != np.uint8:
116 |     raise ValueError('masks1 and masks2 should be of type np.uint8')
117 |   intersect = intersection(masks1, masks2)
118 |   areas = np.expand_dims(area(masks2), axis=0)
119 |   return intersect / (areas + EPSILON)
120 | 


--------------------------------------------------------------------------------
/ava/teat.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | def t():
4 |     list=[1,2,3,4,5]
5 |     list=np.array(list)
6 |     list2=[2,3]
7 |     print(list[list2])
8 | if __name__ == '__main__':
9 |     t()


--------------------------------------------------------------------------------
/backbone/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/backbone/__init__.py


--------------------------------------------------------------------------------
/backbone/base.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Type
 2 | 
 3 | from torch import nn
 4 | 
 5 | 
 6 | class Base(object):
 7 |     OPTIONS = ['resnet18', 'resnet50', 'resnet101','slowfastnet']
 8 |     @staticmethod
 9 |     def from_name(name: str) -> Type['Base']:
10 |         if name == 'resnet18':
11 |             from backbone.resnet18 import ResNet18
12 |             return ResNet18
13 |         elif name == 'resnet50':
14 |             from backbone.resnet50 import ResNet50
15 |             return ResNet50
16 |         elif name == 'resnet101':
17 |             from backbone.resnet101 import ResNet101
18 |             return ResNet101
19 |         elif name == 'slowfastnet101':
20 |             from backbone.slowfast_res101 import slowfast_res101
21 |             return slowfast_res101
22 |         elif name == 'slowfastnet50':
23 |             from backbone.slowfast_res50 import slowfast_res50
24 |             return slowfast_res50
25 |         else:
26 |             raise ValueError
27 | 
28 |     def __init__(self, pretrained: bool):
29 |         super().__init__()
30 |         self._pretrained = pretrained
31 | 
32 | 
33 | 
34 |     def features(self) -> Tuple[nn.Module, nn.Module, int, int]:
35 |         raise NotImplementedError


--------------------------------------------------------------------------------
/backbone/hidden_for_roi.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.autograd import Variable
 5 | import numpy as np
 6 | 
 7 | 
 8 | class Hidden(nn.Module):
 9 | 
10 |     def __init__(self, inplanes, planes, stride=1):
11 |         super(Hidden, self).__init__()
12 |         self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size=3,padding=1, bias=False)
13 |         self.bn1 = nn.BatchNorm2d(planes)
14 |         self.conv2 = nn.Conv2d(inplanes, inplanes, kernel_size=3, stride=stride,
15 |                                padding=1, bias=False)
16 |         self.bn2 = nn.BatchNorm2d(planes)
17 |         self.conv3 = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1,bias=False)
18 |         self.bn3 = nn.BatchNorm2d(planes)
19 |         self.relu = nn.ReLU(inplace=True)
20 | 
21 |     def forward(self, x):
22 |         out = self.conv1(x)
23 |         out = self.bn1(out)
24 |         out = self.relu(out)
25 | 
26 |         out = self.conv2(out)
27 |         out = self.bn2(out)
28 |         out = self.relu(out)
29 | 
30 |         out = self.conv3(out)
31 |         out = self.bn3(out)
32 |         out = self.relu(out)
33 |         out = nn.AdaptiveAvgPool2d(1)(out)
34 |         out = out.view(-1, out.size(1))
35 |         return out
36 | 
37 | def weight_init(m):
38 |     # if isinstance(m, nn.Linear):
39 |     #     nn.init.xavier_normal_(m.weight)
40 |     #     nn.init.constant_(m.bias, 0)
41 |     # 也可以判断是否为conv2d，使用相应的初始化方式
42 |     if isinstance(m, nn.Conv3d):
43 |         print("using kaiming")
44 |         nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
45 |     # 是否为批归一化层
46 |     # elif isinstance(m, nn.BatchNorm3d):
47 |     #     nn.init.constant_(m.weight, 1)
48 |     #     nn.init.constant_(m.bias, 0)
49 | def hidden50(**kwargs):
50 |     """Constructs a ResNet-50 model.
51 |     """
52 |     model = Hidden(2304,2304,2)
53 |     # model.apply(weight_init)
54 |     print('model', model)
55 |     return model


--------------------------------------------------------------------------------
/backbone/hidden_for_roi2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | import numpy as np
  6 | 
  7 | class Bottleneck(nn.Module):
  8 |     expansion = 4
  9 | 
 10 |     def __init__(self, inplanes, planes, stride=1, downsample=None, head_conv=1):
 11 |         super(Bottleneck, self).__init__()
 12 |         # 2d 1*1
 13 |         if head_conv == 1:
 14 |             self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=1, bias=False,dilation=2)
 15 |             self.bn1 = nn.BatchNorm3d(planes)
 16 | 
 17 |         #3d 1*1
 18 |         elif head_conv == 3:
 19 |             self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=(3, 1, 1), bias=False, padding=(2, 0, 0),dilation=2)
 20 |             self.bn1 = nn.BatchNorm3d(planes)
 21 |         else:
 22 |             raise ValueError("Unsupported head_conv!")
 23 |         self.conv2 = nn.Conv3d(
 24 |             planes, planes, kernel_size=(1, 3, 3), stride=(1,stride,stride), padding=(0, 2, 2), bias=False,dilation=2)
 25 |         self.bn2 = nn.BatchNorm3d(planes)
 26 |         self.conv3 = nn.Conv3d(planes, planes * 4, kernel_size=1, bias=False,dilation=2)
 27 |         self.bn3 = nn.BatchNorm3d(planes * 4)
 28 |         self.relu = nn.ReLU(inplace=True)
 29 |         self.downsample = downsample
 30 |         self.stride = stride
 31 | 
 32 |     def forward(self, x):
 33 |         residual = x
 34 | 
 35 |         out = self.conv1(x)
 36 |         out = self.bn1(out)
 37 |         out = self.relu(out)
 38 | 
 39 |         out = self.conv2(out)
 40 |         out = self.bn2(out)
 41 |         out = self.relu(out)
 42 | 
 43 |         out = self.conv3(out)
 44 |         out = self.bn3(out)
 45 | 
 46 |         if self.downsample is not None:
 47 |             residual = self.downsample(x)
 48 |         out += residual
 49 |         out = self.relu(out)
 50 | 
 51 |         return out
 52 | 
 53 | 
 54 | class Hidden(nn.Module):
 55 |     def __init__(self, block=Bottleneck, layers=[3, 4, 6, 3], class_num=10, dropout=0.5):
 56 |         super(Hidden, self).__init__()
 57 |         self.slow_inplanes = 1280
 58 |         self.fast_inplanes = 128
 59 |         self.fast_res5 = self._make_layer_fast(
 60 |             block, 64, layers[3], stride=1, head_conv=3)
 61 |         self.slow_res5 = self._make_layer_slow(
 62 |             block, 512, layers[3], stride=1, head_conv=3)
 63 | 
 64 | 
 65 |     def _make_layer_fast(self, block, planes, blocks, stride=1, head_conv=1):
 66 |         downsample = None
 67 |         if stride != 1 or self.fast_inplanes != planes * block.expansion:
 68 |             downsample = nn.Sequential(
 69 |                 nn.Conv3d(
 70 |                     self.fast_inplanes,
 71 |                     planes * block.expansion,
 72 |                     kernel_size=1,
 73 |                     stride=(1,stride,stride),
 74 |                     bias=False,dilation=2), nn.BatchNorm3d(planes * block.expansion))
 75 | 
 76 |         layers = []
 77 |         layers.append(block(self.fast_inplanes, planes, stride, downsample, head_conv=head_conv))
 78 |         self.fast_inplanes = planes * block.expansion
 79 |         for i in range(1, blocks):
 80 |             layers.append(block(self.fast_inplanes, planes, head_conv=head_conv))
 81 |         return nn.Sequential(*layers)
 82 | 
 83 |     def _make_layer_slow(self, block, planes, blocks, stride=1, head_conv=1):
 84 |         #print('_make_layer_slow',planes)
 85 |         downsample = None
 86 |         if stride != 1 or self.slow_inplanes != planes * block.expansion:
 87 |             #print('self.slow_inplanes',self.slow_inplanes)
 88 |             downsample = nn.Sequential(
 89 |                 nn.Conv3d(
 90 |                     self.slow_inplanes,
 91 |                     planes * block.expansion,
 92 |                     kernel_size=1,
 93 |                     stride=(1,stride,stride),
 94 |                     bias=False,dilation=2), nn.BatchNorm3d(planes * block.expansion))
 95 |         layers = []
 96 |         layers.append(block(self.slow_inplanes, planes, stride, downsample, head_conv=head_conv))
 97 |         self.slow_inplanes = planes * block.expansion
 98 |         for i in range(1, blocks):
 99 |             layers.append(block(self.slow_inplanes, planes, head_conv=head_conv))
100 |         #self.slow_inplanes = planes * block.expansion + planes * block.expansion // 8 * 2
101 |         self.slow_inplanes = planes * block.expansion
102 |         return nn.Sequential(*layers)
103 | 
104 |     def forward(self,fast_input,slow_input):
105 |         fast_output=self.fast_res5(fast_input)
106 |         slow_output=self.slow_res5(slow_input)
107 |         x1 = nn.AdaptiveAvgPool3d(1)(fast_output)
108 |         x2 = nn.AdaptiveAvgPool3d(1)(slow_output)
109 |         x1 = x1.view(-1, x1.size(1))
110 |         x2 = x2.view(-1, x2.size(1))
111 |         x = torch.cat([x1, x2], dim=1)
112 |         return x
113 | 
114 | 
115 | def hidden50(**kwargs):
116 |     """Constructs a ResNet-50 model.
117 |     """
118 |     model = Hidden(Bottleneck, [3, 4, 6, 3], **kwargs)
119 |     print('model', model)
120 |     return model


--------------------------------------------------------------------------------
/backbone/hidden_for_roi_maxpool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.autograd import Variable
 5 | import numpy as np
 6 | 
 7 | 
 8 | class Hidden(nn.Module):
 9 | 
10 |     def __init__(self, inplanes, planes, stride=1):
11 |         super(Hidden, self).__init__()
12 |         # self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size=3,padding=1, bias=False)
13 |         # self.bn1 = nn.BatchNorm2d(planes)
14 |         # self.conv2 = nn.Conv2d(inplanes, inplanes, kernel_size=3, stride=stride,
15 |         #                        padding=1, bias=False)
16 |         # self.bn2 = nn.BatchNorm2d(planes)
17 |         # self.conv3 = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1,bias=False)
18 |         # self.bn3 = nn.BatchNorm2d(planes)
19 |         # self.relu = nn.ReLU(inplace=True)
20 |         #self.fc=nn.Linear(in_features=2304*3*3,out_features=4096)
21 | 
22 |     def forward(self, x):
23 |         # out = self.conv1(x)
24 |         # out = self.bn1(out)
25 |         # out = self.relu(out)
26 |         #
27 |         # out = self.conv2(out)
28 |         # out = self.bn2(out)
29 |         # out = self.relu(out)
30 |         #
31 |         # out = self.conv3(out)
32 |         # out = self.bn3(out)
33 |         # out = self.relu(out)
34 |         #x = nn.MaxPool2d(2,2)(x)
35 |         out=x.view(x.shape[0],-1)
36 |         #print(x.shape)
37 |         #out=self.fc(x)
38 |         out = out.view(-1, out.size(1))
39 |         return out
40 | 
41 | def weight_init(m):
42 |     # if isinstance(m, nn.Linear):
43 |     #     nn.init.xavier_normal_(m.weight)
44 |     #     nn.init.constant_(m.bias, 0)
45 |     # 也可以判断是否为conv2d，使用相应的初始化方式
46 |     if isinstance(m, nn.Conv3d):
47 |         print("using kaiming")
48 |         nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
49 |     # 是否为批归一化层
50 |     # elif isinstance(m, nn.BatchNorm3d):
51 |     #     nn.init.constant_(m.weight, 1)
52 |     #     nn.init.constant_(m.bias, 0)
53 | def hidden50(**kwargs):
54 |     """Constructs a ResNet-50 model.
55 |     """
56 |     model = Hidden(2304,2304,2)
57 |     # model.apply(weight_init)
58 |     #print('model', model)
59 |     return model


--------------------------------------------------------------------------------
/backbone/resnet101.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import torchvision
 4 | from torch import nn
 5 | 
 6 | import backbone.base
 7 | 
 8 | 
 9 | class ResNet101(backbone.base.Base):
10 | 
11 |     def __init__(self, pretrained: bool):
12 |         super().__init__(pretrained)
13 | 
14 |     def features(self) -> Tuple[nn.Module, nn.Module, int, int]:
15 |         resnet101 = torchvision.models.resnet101(pretrained=self._pretrained)
16 | 
17 |         # list(resnet101.children()) consists of following modules
18 |         #   [0] = Conv2d, [1] = BatchNorm2d, [2] = ReLU,
19 |         #   [3] = MaxPool2d, [4] = Sequential(Bottleneck...),
20 |         #   [5] = Sequential(Bottleneck...),
21 |         #   [6] = Sequential(Bottleneck...),
22 |         #   [7] = Sequential(Bottleneck...),
23 |         #   [8] = AvgPool2d, [9] = Linear
24 |         children = list(resnet101.children())
25 |         features = children[:-3]
26 |         num_features_out = 1024
27 |         print('children',type(children))
28 |         hidden = children[-3]
29 |         num_hidden_out = 2048
30 | 
31 |         for parameters in [feature.parameters() for i, feature in enumerate(features) if i <= 4]:
32 |             for parameter in parameters:
33 |                 parameter.requires_grad = False
34 | 
35 |         features = nn.Sequential(*features)
36 |         print('features',type(features))
37 |         print('hidden',type(hidden))
38 |         return features, hidden, num_features_out, num_hidden_out
39 | 


--------------------------------------------------------------------------------
/backbone/resnet18.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import torchvision
 4 | from torch import nn
 5 | 
 6 | import backbone.base
 7 | 
 8 | 
 9 | class ResNet18(backbone.base.Base):
10 | 
11 |     def __init__(self, pretrained: bool):
12 |         super().__init__(pretrained)
13 | 
14 |     def features(self) -> Tuple[nn.Module, nn.Module, int, int]:
15 |         resnet18 = torchvision.models.resnet18(pretrained=self._pretrained)
16 | 
17 |         # list(resnet18.children()) consists of following modules
18 |         #   [0] = Conv2d, [1] = BatchNorm2d, [2] = ReLU,
19 |         #   [3] = MaxPool2d, [4] = Sequential(Bottleneck...),
20 |         #   [5] = Sequential(Bottleneck...),
21 |         #   [6] = Sequential(Bottleneck...),
22 |         #   [7] = Sequential(Bottleneck...),
23 |         #   [8] = AvgPool2d, [9] = Linear
24 |         children = list(resnet18.children())
25 |         features = children[:-3]
26 |         num_features_out = 256
27 | 
28 |         hidden = children[-3]
29 |         num_hidden_out = 512
30 | 
31 |         for parameters in [feature.parameters() for i, feature in enumerate(features) if i <= 4]:
32 |             for parameter in parameters:
33 |                 parameter.requires_grad = False
34 | 
35 |         features = nn.Sequential(*features)
36 | 
37 |         return features, hidden, num_features_out, num_hidden_out
38 | 


--------------------------------------------------------------------------------
/backbone/resnet50.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import torchvision
 4 | from torch import nn
 5 | 
 6 | import backbone.base
 7 | 
 8 | 
 9 | class ResNet50(backbone.base.Base):
10 | 
11 |     def __init__(self, pretrained: bool):
12 |         super().__init__(pretrained)
13 | 
14 |     def features(self) -> Tuple[nn.Module, nn.Module, int, int]:
15 |         resnet50 = torchvision.models.resnet50(pretrained=self._pretrained)
16 |         print("resnet50")
17 |         # list(resnet50.children()) consists of following modules
18 |         #   [0] = Conv2d, [1] = BatchNorm2d, [2] = ReLU,
19 |         #   [3] = MaxPool2d, [4] = Sequential(Bottleneck...),
20 |         #   [5] = Sequential(Bottleneck...),
21 |         #   [6] = Sequential(Bottleneck...),
22 |         #   [7] = Sequential(Bottleneck...),
23 |         #   [8] = AvgPool2d, [9] = Linear
24 |         children = list(resnet50.children())
25 |         features = children[:-3]
26 |         num_features_out = 1024
27 | 
28 |         hidden = children[-3]
29 |         num_hidden_out = 2048
30 | 
31 |         for parameters in [feature.parameters() for i, feature in enumerate(features) if i <= 4]:
32 |             for parameter in parameters:
33 |                 parameter.requires_grad = False
34 | 
35 |         features = nn.Sequential(*features)
36 | 
37 |         return features, hidden, num_features_out, num_hidden_out
38 | 


--------------------------------------------------------------------------------
/backbone/slowfast_res101.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import torchvision
 4 | from torch import nn
 5 | 
 6 | import backbone.base
 7 | from backbone.slowfastnet import resnet101 as rs101
 8 | from backbone.slowfastnet import resnet50 as rs50
 9 | from backbone.hidden_for_roi import hidden50
10 | class slowfast_res101(backbone.base.Base):
11 | 
12 |     def __init__(self):
13 |         super().__init__(False)
14 | 
15 |     def features(self):
16 |         resnet101 = rs101()
17 |         num_features_out = 1280
18 |         hidden = hidden50()
19 |         num_hidden_out = 2048 + 256
20 |         return resnet101, hidden, num_features_out, num_hidden_out
21 | 
22 | if __name__ == '__main__':
23 |     s=slowfast_res101()
24 |     s.features()
25 | 


--------------------------------------------------------------------------------
/backbone/slowfast_res50.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | import torchvision
 4 | from torch import nn
 5 | 
 6 | import backbone.base
 7 | from backbone.slowfastnet import resnet101 as rs101
 8 | from backbone.slowfastnet import resnet50 as rs50
 9 | from backbone.hidden_for_roi_maxpool import hidden50
10 | class slowfast_res50(backbone.base.Base):
11 | 
12 |     def __init__(self):
13 |         super().__init__(False)
14 | 
15 |     def features(self):
16 |         print("slowfast_res50")
17 |         resnet50 = rs50()
18 |         hidden = hidden50()
19 |         num_features_out = 2304
20 |         num_hidden_out = 2304*3*3
21 | 
22 |         return resnet50, hidden, num_features_out, num_hidden_out
23 | 
24 | if __name__ == '__main__':
25 |     s=slowfast_res50()
26 |     s.features()
27 | 


--------------------------------------------------------------------------------
/bbox.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import torch 
  4 | import random
  5 | 
  6 | import numpy as np
  7 | import cv2
  8 | 
  9 | def confidence_filter(result, confidence):
 10 |     conf_mask = (result[:,:,4] > confidence).float().unsqueeze(2)
 11 |     result = result*conf_mask    
 12 |     
 13 |     return result
 14 | 
 15 | def confidence_filter_cls(result, confidence):
 16 |     max_scores = torch.max(result[:,:,5:25], 2)[0]
 17 |     res = torch.cat((result, max_scores),2)
 18 |     print(res.shape)
 19 |     
 20 |     
 21 |     cond_1 = (res[:,:,4] > confidence).float()
 22 |     cond_2 = (res[:,:,25] > 0.995).float()
 23 |     
 24 |     conf = cond_1 + cond_2
 25 |     conf = torch.clamp(conf, 0.0, 1.0)
 26 |     conf = conf.unsqueeze(2)
 27 |     result = result*conf   
 28 |     return result
 29 | 
 30 | 
 31 | 
 32 | def get_abs_coord(box):
 33 |     box[2], box[3] = abs(box[2]), abs(box[3])
 34 |     x1 = (box[0] - box[2]/2) - 1 
 35 |     y1 = (box[1] - box[3]/2) - 1 
 36 |     x2 = (box[0] + box[2]/2) - 1 
 37 |     y2 = (box[1] + box[3]/2) - 1
 38 |     return x1, y1, x2, y2
 39 |     
 40 | 
 41 | 
 42 | def sanity_fix(box):
 43 |     if (box[0] > box[2]):
 44 |         box[0], box[2] = box[2], box[0]
 45 |     
 46 |     if (box[1] >  box[3]):
 47 |         box[1], box[3] = box[3], box[1]
 48 |         
 49 |     return box
 50 | 
 51 | def bbox_iou(box1, box2):
 52 |     """
 53 |     Returns the IoU of two bounding boxes 
 54 |     
 55 |     
 56 |     """
 57 |     #Get the coordinates of bounding boxes
 58 |     b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
 59 |     b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
 60 |     
 61 |     #get the corrdinates of the intersection rectangle
 62 |     inter_rect_x1 =  torch.max(b1_x1, b2_x1)
 63 |     inter_rect_y1 =  torch.max(b1_y1, b2_y1)
 64 |     inter_rect_x2 =  torch.min(b1_x2, b2_x2)
 65 |     inter_rect_y2 =  torch.min(b1_y2, b2_y2)
 66 |     
 67 |     #Intersection area
 68 |     if torch.cuda.is_available():
 69 |             inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape).cuda())*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape).cuda())
 70 |     else:
 71 |             inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape))*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape))
 72 |     
 73 |     #Union Area
 74 |     b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
 75 |     b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
 76 |     
 77 |     iou = inter_area / (b1_area + b2_area - inter_area)
 78 |     
 79 |     return iou
 80 | 
 81 | 
 82 | def pred_corner_coord(prediction):
 83 |     #Get indices of non-zero confidence bboxes
 84 |     ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
 85 |     
 86 |     box = prediction[ind_nz[0], ind_nz[1]]
 87 |     
 88 |     
 89 |     box_a = box.new(box.shape)
 90 |     box_a[:,0] = (box[:,0] - box[:,2]/2)
 91 |     box_a[:,1] = (box[:,1] - box[:,3]/2)
 92 |     box_a[:,2] = (box[:,0] + box[:,2]/2) 
 93 |     box_a[:,3] = (box[:,1] + box[:,3]/2)
 94 |     box[:,:4] = box_a[:,:4]
 95 |     
 96 |     prediction[ind_nz[0], ind_nz[1]] = box
 97 |     
 98 |     return prediction
 99 | 
100 | 
101 | 
102 | 
103 | def write(x, batches, results, colors, classes):
104 |     c1 = tuple(x[1:3].int())
105 |     c2 = tuple(x[3:5].int())
106 |     img = results[int(x[0])]
107 |     cls = int(x[-1])
108 |     label = "{0}".format(classes[cls])
109 |     color = random.choice(colors)
110 |     cv2.rectangle(img, c1, c2,color, 1)
111 |     t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
112 |     c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
113 |     cv2.rectangle(img, c1, c2,color, -1)
114 |     cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
115 |     return img
116 | 


--------------------------------------------------------------------------------
/bbox1.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | from torch import Tensor
 5 | 
 6 | 
 7 | class BBox(object):
 8 | 
 9 |     def __init__(self, left: float, top: float, right: float, bottom: float):
10 |         super().__init__()
11 |         self.left = left
12 |         self.top = top
13 |         self.right = right
14 |         self.bottom = bottom
15 | 
16 |     def __repr__(self) -> str:
17 |         return 'BBox[l={:.1f}, t={:.1f}, r={:.1f}, b={:.1f}]'.format(
18 |             self.left, self.top, self.right, self.bottom)
19 | 
20 |     def tolist(self) -> List[float]:
21 |         return [self.left, self.top, self.right, self.bottom]
22 | 
23 |     @staticmethod
24 |     def to_center_base(bboxes: Tensor) -> Tensor:
25 |         return torch.stack([
26 |             (bboxes[..., 0] + bboxes[..., 2]) / 2,
27 |             (bboxes[..., 1] + bboxes[..., 3]) / 2,
28 |             bboxes[..., 2] - bboxes[..., 0],
29 |             bboxes[..., 3] - bboxes[..., 1]
30 |         ], dim=-1)
31 | 
32 |     @staticmethod
33 |     def from_center_base(center_based_bboxes: Tensor) -> Tensor:
34 |         return torch.stack([
35 |             center_based_bboxes[..., 0] - center_based_bboxes[..., 2] / 2,
36 |             center_based_bboxes[..., 1] - center_based_bboxes[..., 3] / 2,
37 |             center_based_bboxes[..., 0] + center_based_bboxes[..., 2] / 2,
38 |             center_based_bboxes[..., 1] + center_based_bboxes[..., 3] / 2
39 |         ], dim=-1)
40 | 
41 |     @staticmethod
42 |     def calc_transformer(src_bboxes: Tensor, dst_bboxes: Tensor) -> Tensor:
43 |         center_based_src_bboxes = BBox.to_center_base(src_bboxes)
44 |         center_based_dst_bboxes = BBox.to_center_base(dst_bboxes)
45 |         transformers = torch.stack([
46 |             (center_based_dst_bboxes[..., 0] - center_based_src_bboxes[..., 0]) / center_based_dst_bboxes[..., 2],
47 |             (center_based_dst_bboxes[..., 1] - center_based_src_bboxes[..., 1]) / center_based_dst_bboxes[..., 3],
48 |             torch.log(center_based_dst_bboxes[..., 2] / center_based_src_bboxes[..., 2]),
49 |             torch.log(center_based_dst_bboxes[..., 3] / center_based_src_bboxes[..., 3])
50 |         ], dim=-1)
51 |         return transformers
52 | 
53 |     @staticmethod
54 |     def apply_transformer(src_bboxes: Tensor, transformers: Tensor) -> Tensor:
55 |         center_based_src_bboxes = BBox.to_center_base(src_bboxes)
56 |         center_based_dst_bboxes = torch.stack([
57 |             transformers[..., 0] * center_based_src_bboxes[..., 2] + center_based_src_bboxes[..., 0],
58 |             transformers[..., 1] * center_based_src_bboxes[..., 3] + center_based_src_bboxes[..., 1],
59 |             torch.exp(transformers[..., 2]) * center_based_src_bboxes[..., 2],
60 |             torch.exp(transformers[..., 3]) * center_based_src_bboxes[..., 3]
61 |         ], dim=-1)
62 |         dst_bboxes = BBox.from_center_base(center_based_dst_bboxes)
63 |         return dst_bboxes
64 | 
65 |     @staticmethod
66 |     def iou(source: Tensor, other: Tensor) -> Tensor:
67 |         source, other = source.unsqueeze(dim=-2).repeat(1, 1, other.shape[-2], 1), \
68 |                         other.unsqueeze(dim=-3).repeat(1, source.shape[-2], 1, 1)
69 | 
70 |         source_area = (source[..., 2] - source[..., 0]) * (source[..., 3] - source[..., 1])
71 |         other_area = (other[..., 2] - other[..., 0]) * (other[..., 3] - other[..., 1])
72 | 
73 |         intersection_left = torch.max(source[..., 0], other[..., 0])
74 |         intersection_top = torch.max(source[..., 1], other[..., 1])
75 |         intersection_right = torch.min(source[..., 2], other[..., 2])
76 |         intersection_bottom = torch.min(source[..., 3], other[..., 3])
77 |         intersection_width = torch.clamp(intersection_right - intersection_left, min=0)
78 |         intersection_height = torch.clamp(intersection_bottom - intersection_top, min=0)
79 |         intersection_area = intersection_width * intersection_height
80 | 
81 |         return intersection_area / (source_area + other_area - intersection_area)
82 | 
83 |     @staticmethod
84 |     def inside(bboxes: Tensor, left: float, top: float, right: float, bottom: float) -> Tensor:
85 |         return ((bboxes[..., 0] >= left) * (bboxes[..., 1] >= top) *
86 |                 (bboxes[..., 2] <= right) * (bboxes[..., 3] <= bottom))
87 | 
88 |     @staticmethod
89 |     def clip(bboxes: Tensor, left: float, top: float, right: float, bottom: float) -> Tensor:
90 |         bboxes[..., [0, 2]] = bboxes[..., [0, 2]].clamp(min=left, max=right)
91 |         bboxes[..., [1, 3]] = bboxes[..., [1, 3]].clamp(min=top, max=bottom)
92 |         return bboxes
93 | 


--------------------------------------------------------------------------------
/cfg/tiny-yolo-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | batch=64
  3 | subdivisions=8
  4 | width=416
  5 | height=416
  6 | channels=3
  7 | momentum=0.9
  8 | decay=0.0005
  9 | angle=0
 10 | saturation = 1.5
 11 | exposure = 1.5
 12 | hue=.1
 13 | 
 14 | learning_rate=0.001
 15 | max_batches = 40200
 16 | policy=steps
 17 | steps=-1,100,20000,30000
 18 | scales=.1,10,.1,.1
 19 | 
 20 | [convolutional]
 21 | batch_normalize=1
 22 | filters=16
 23 | size=3
 24 | stride=1
 25 | pad=1
 26 | activation=leaky
 27 | 
 28 | [maxpool]
 29 | size=2
 30 | stride=2
 31 | 
 32 | [convolutional]
 33 | batch_normalize=1
 34 | filters=32
 35 | size=3
 36 | stride=1
 37 | pad=1
 38 | activation=leaky
 39 | 
 40 | [maxpool]
 41 | size=2
 42 | stride=2
 43 | 
 44 | [convolutional]
 45 | batch_normalize=1
 46 | filters=64
 47 | size=3
 48 | stride=1
 49 | pad=1
 50 | activation=leaky
 51 | 
 52 | [maxpool]
 53 | size=2
 54 | stride=2
 55 | 
 56 | [convolutional]
 57 | batch_normalize=1
 58 | filters=128
 59 | size=3
 60 | stride=1
 61 | pad=1
 62 | activation=leaky
 63 | 
 64 | [maxpool]
 65 | size=2
 66 | stride=2
 67 | 
 68 | [convolutional]
 69 | batch_normalize=1
 70 | filters=256
 71 | size=3
 72 | stride=1
 73 | pad=1
 74 | activation=leaky
 75 | 
 76 | [maxpool]
 77 | size=2
 78 | stride=2
 79 | 
 80 | [convolutional]
 81 | batch_normalize=1
 82 | filters=512
 83 | size=3
 84 | stride=1
 85 | pad=1
 86 | activation=leaky
 87 | 
 88 | [maxpool]
 89 | size=2
 90 | stride=1
 91 | 
 92 | [convolutional]
 93 | batch_normalize=1
 94 | filters=1024
 95 | size=3
 96 | stride=1
 97 | pad=1
 98 | activation=leaky
 99 | 
100 | ###########
101 | 
102 | [convolutional]
103 | batch_normalize=1
104 | size=3
105 | stride=1
106 | pad=1
107 | filters=1024
108 | activation=leaky
109 | 
110 | [convolutional]
111 | size=1
112 | stride=1
113 | pad=1
114 | filters=125
115 | activation=linear
116 | 
117 | [region]
118 | anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52
119 | bias_match=1
120 | classes=20
121 | coords=4
122 | num=5
123 | softmax=1
124 | jitter=.2
125 | rescore=1
126 | 
127 | object_scale=5
128 | noobject_scale=1
129 | class_scale=1
130 | coord_scale=1
131 | 
132 | absolute=1
133 | thresh = .6
134 | random=1
135 | 


--------------------------------------------------------------------------------
/cfg/yolo-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=64
  4 | subdivisions=8
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | height=416
  9 | width=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 80200
 21 | policy=steps
 22 | steps=-1,500,40000,60000
 23 | scales=0.1,10,.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=125
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
243 | bias_match=1
244 | classes=20
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/cfg/yolo.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=425
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
243 | bias_match=1
244 | classes=80
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/config/config.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | from typing import Tuple, List
 3 | 
 4 | from roi.pooler_ import Pooler
 5 | 
 6 | 
 7 | class Config(object):
 8 |     ANCHOR_RATIOS = [(1, 2), (1, 1), (2, 1)]
 9 |     #ANCHOR_SIZES = [128, 256, 512]
10 |     ANCHOR_SIZES = [64, 128]
11 |     POOLER_MODE = Pooler.Mode.POOLING
12 |     BACKBONE_NAME='slowfastnet50'
13 |     #DETECTOR_RESULT_PATH='detection_train_result.txt'
14 |     DETECTOR_RESULT_PATH = 'detection_train_result.txt'
15 |     @classmethod
16 |     def describe(cls):
17 |         text = '\nConfig:\n'
18 |         attrs = [attr for attr in dir(cls) if not callable(getattr(cls, attr)) and not attr.startswith('__')]
19 |         text += '\n'.join(['\t{:s} = {:s}'.format(attr, str(getattr(cls, attr))) for attr in attrs]) + '\n'
20 |         return text
21 |     @classmethod
22 |     def setup(cls, image_min_side: float = None, image_max_side: float = None,
23 |               anchor_ratios: List[Tuple[int, int]] = None, anchor_sizes: List[int] = None, pooler_mode: str = None):
24 |         if image_min_side is not None:
25 |             cls.IMAGE_MIN_SIDE = image_min_side
26 |         if image_max_side is not None:
27 |             cls.IMAGE_MAX_SIDE = image_max_side
28 | 
29 |         if anchor_ratios is not None:
30 |             cls.ANCHOR_RATIOS = ast.literal_eval(anchor_ratios)
31 |         if anchor_sizes is not None:
32 |             cls.ANCHOR_SIZES = ast.literal_eval(anchor_sizes)
33 |         if pooler_mode is not None:
34 |             cls.POOLER_MODE = Pooler.Mode(pooler_mode)
35 | 


--------------------------------------------------------------------------------
/config/eval_config.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | from config.config import Config
 4 | 
 5 | 
 6 | class EvalConfig(Config):
 7 | 
 8 |     RPN_PRE_NMS_TOP_N = 6000
 9 |     RPN_POST_NMS_TOP_N = 300
10 |     VAL_DATA='ava_train_v2.2_sub_5.txt'
11 |     PATH_TO_CHECKPOINT='/home/aiuser/Downloads/NEW-FRCNN-rewrite_with_yolo/temp_3/model-20700-v100.pth'
12 |     PATH_TO_RESULTS='result.txt'
13 |     #PATH_TO_ACTION_LIST='ava_action_list_v2.2.pbtxt'
14 |     PATH_TO_ACTION_LIST='ava_action_list_v2.2_for_activitynet_2019.pbtxt'
15 |     PATH_TO_LABLE='ava_train_v2.2_sub_5.txt'
16 |     KEEP=0.05
17 | 
18 | 


--------------------------------------------------------------------------------
/config/train_config.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | from typing import List, Tuple
 3 | 
 4 | from config.config import Config
 5 | 
 6 | 
 7 | class TrainConfig(Config):
 8 | 
 9 |     RPN_PRE_NMS_TOP_N= 12000
10 |     RPN_POST_NMS_TOP_N = 2000
11 | 
12 |     ANCHOR_SMOOTH_L1_LOSS_BETA = 1.0
13 |     PROPOSAL_SMOOTH_L1_LOSS_BETA = 1.0
14 | 
15 |     BATCH_SIZE=4
16 |     LEARNING_RATE = 0.0001
17 |     MOMENTUM = 0.9
18 |     WEIGHT_DECAY = 0.0005
19 |     STEP_LR_SIZES = [90000,180000]
20 |     STEP_LR_GAMMA = 0.1
21 |     WARM_UP_FACTOR = 0.3333
22 |     WARM_UP_NUM_ITERS = 500
23 |     NUM_STEPS_TO_DISPLAY = 20
24 |     NUM_STEPS_TO_SNAPSHOT = 20000
25 |     NUM_STEPS_TO_FINISH = 222670
26 |     TRAIN_DATA='ava_train_v2.2_remove_badlist.csv'
27 | 
28 |     #PATH_TO_RESUMEING_CHECKPOINT='/home/aiuser/Downloads/NEW-FRCNN-rewrite/temp_3/model-19800.pth'
29 |     PATH_TO_RESUMEING_CHECKPOINT =None
30 |     PATH_TO_OUTPUTS_DIR = '/home/aiuser/Downloads/NEW-FRCNN-rewrite/outputs/'
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/data/pam.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/data/pam.gif


--------------------------------------------------------------------------------
/deep/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/deep/__init__.py


--------------------------------------------------------------------------------
/deep/checkpoint/original_ckpt.t7:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/deep/checkpoint/original_ckpt.t7


--------------------------------------------------------------------------------
/deep/evaluate.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | features = torch.load("features.pth")
 4 | qf = features["qf"]
 5 | ql = features["ql"]
 6 | gf = features["gf"]
 7 | gl = features["gl"]
 8 | 
 9 | scores = qf.mm(gf.t())
10 | res = scores.topk(5, dim=1)[1][:,0]
11 | top1correct = gl[res].eq(ql).sum().item()
12 | 
13 | print("Acc top1:{:.3f}".format(top1correct/ql.size(0)))
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/deep/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.transforms as transforms
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | from .model import Net
 7 | 
 8 | class Extractor(object):
 9 |     def __init__(self, model_path, use_cuda=True):
10 |         self.net = Net(reid=True)
11 |         self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
12 |         state_dict = torch.load(model_path)['net_dict']
13 |         self.net.load_state_dict(state_dict)
14 |         print("Loading weights from {}... Done!".format(model_path))
15 |         self.net.to(self.device)
16 |         self.norm = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
17 | 
18 |     def __call__(self, img):
19 |         assert isinstance(img, np.ndarray), "type error"
20 |         img = img.astype(np.float)#/255.
21 |         img = cv2.resize(img, (64,128))
22 |         img = torch.from_numpy(img).float().permute(2,0,1)
23 |         img = self.norm(img).unsqueeze(0)
24 |         with torch.no_grad():
25 |             img = img.to(self.device)
26 |             feature = self.net(img)
27 |         return feature.cpu().numpy()
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     img = cv2.imread("demo.jpg")[:,:,(2,1,0)]
32 |     extr = Extractor("checkpoint/ckpt.t7")
33 |     feature = extr(img)
34 |     print(feature.shape)
35 | 
36 | 


--------------------------------------------------------------------------------
/deep/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class BasicBlock(nn.Module):
  6 |     def __init__(self, c_in, c_out,is_downsample=False):
  7 |         super(BasicBlock,self).__init__()
  8 |         self.is_downsample = is_downsample
  9 |         if is_downsample:
 10 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
 11 |         else:
 12 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
 13 |         self.bn1 = nn.BatchNorm2d(c_out)
 14 |         self.relu = nn.ReLU(True)
 15 |         self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(c_out)
 17 |         if is_downsample:
 18 |             self.downsample = nn.Sequential(
 19 |                 nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
 20 |                 nn.BatchNorm2d(c_out)
 21 |             )
 22 |         elif c_in != c_out:
 23 |             self.downsample = nn.Sequential(
 24 |                 nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
 25 |                 nn.BatchNorm2d(c_out)
 26 |             )
 27 |             self.is_downsample = True
 28 | 
 29 |     def forward(self,x):
 30 |         y = self.conv1(x)
 31 |         y = self.bn1(y)
 32 |         y = self.relu(y)
 33 |         y = self.conv2(y)
 34 |         y = self.bn2(y)
 35 |         if self.is_downsample:
 36 |             x = self.downsample(x)
 37 |         return F.relu(x.add(y),True)
 38 | 
 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False):
 40 |     blocks = []
 41 |     for i in range(repeat_times):
 42 |         if i ==0:
 43 |             blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
 44 |         else:
 45 |             blocks += [BasicBlock(c_out,c_out),]
 46 |     return nn.Sequential(*blocks)
 47 | 
 48 | class Net(nn.Module):
 49 |     def __init__(self, num_classes=751 ,reid=False):
 50 |         super(Net,self).__init__()
 51 |         # 3 128 64
 52 |         self.conv = nn.Sequential(
 53 |             nn.Conv2d(3,64,3,stride=1,padding=1),
 54 |             nn.BatchNorm2d(64),
 55 |             nn.ReLU(inplace=True),
 56 |             # nn.Conv2d(32,32,3,stride=1,padding=1),
 57 |             # nn.BatchNorm2d(32),
 58 |             # nn.ReLU(inplace=True),
 59 |             nn.MaxPool2d(3,2,padding=1),
 60 |         )
 61 |         # 32 64 32
 62 |         self.layer1 = make_layers(64,64,2,False)
 63 |         # 32 64 32
 64 |         self.layer2 = make_layers(64,128,2,True)
 65 |         # 64 32 16
 66 |         self.layer3 = make_layers(128,256,2,True)
 67 |         # 128 16 8
 68 |         self.layer4 = make_layers(256,512,2,True)
 69 |         # 256 8 4
 70 |         self.avgpool = nn.AvgPool2d((8,4),1)
 71 |         # 256 1 1 
 72 |         self.reid = reid
 73 |         self.classifier = nn.Sequential(
 74 |             nn.Linear(512, 256),
 75 |             nn.BatchNorm1d(256),
 76 |             nn.ReLU(inplace=True),
 77 |             nn.Dropout(),
 78 |             nn.Linear(256, num_classes),
 79 |         )
 80 |     
 81 |     def forward(self, x):
 82 |         x = self.conv(x)
 83 |         x = self.layer1(x)
 84 |         x = self.layer2(x)
 85 |         x = self.layer3(x)
 86 |         x = self.layer4(x)
 87 |         x = self.avgpool(x)
 88 |         x = x.view(x.size(0),-1)
 89 |         # B x 128
 90 |         if self.reid:
 91 |             x = x.div(x.norm(p=2,dim=1,keepdim=True))
 92 |             return x
 93 |         # classifier
 94 |         x = self.classifier(x)
 95 |         return x
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     net = Net()
100 |     x = torch.randn(4,3,128,64)
101 |     y = net(x)
102 |     import ipdb; ipdb.set_trace()
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/deep/original_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class BasicBlock(nn.Module):
  6 |     def __init__(self, c_in, c_out,is_downsample=False):
  7 |         super(BasicBlock,self).__init__()
  8 |         self.is_downsample = is_downsample
  9 |         if is_downsample:
 10 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
 11 |         else:
 12 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
 13 |         self.bn1 = nn.BatchNorm2d(c_out)
 14 |         self.relu = nn.ReLU(True)
 15 |         self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(c_out)
 17 |         if is_downsample:
 18 |             self.downsample = nn.Sequential(
 19 |                 nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
 20 |                 nn.BatchNorm2d(c_out)
 21 |             )
 22 |         elif c_in != c_out:
 23 |             self.downsample = nn.Sequential(
 24 |                 nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
 25 |                 nn.BatchNorm2d(c_out)
 26 |             )
 27 |             self.is_downsample = True
 28 | 
 29 |     def forward(self,x):
 30 |         y = self.conv1(x)
 31 |         y = self.bn1(y)
 32 |         y = self.relu(y)
 33 |         y = self.conv2(y)
 34 |         y = self.bn2(y)
 35 |         if self.is_downsample:
 36 |             x = self.downsample(x)
 37 |         return F.relu(x.add(y),True)
 38 | 
 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False):
 40 |     blocks = []
 41 |     for i in range(repeat_times):
 42 |         if i ==0:
 43 |             blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
 44 |         else:
 45 |             blocks += [BasicBlock(c_out,c_out),]
 46 |     return nn.Sequential(*blocks)
 47 | 
 48 | class Net(nn.Module):
 49 |     def __init__(self, num_classes=625 ,reid=False):
 50 |         super(Net,self).__init__()
 51 |         # 3 128 64
 52 |         self.conv = nn.Sequential(
 53 |             nn.Conv2d(3,32,3,stride=1,padding=1),
 54 |             nn.BatchNorm2d(32),
 55 |             nn.ELU(inplace=True),
 56 |             nn.Conv2d(32,32,3,stride=1,padding=1),
 57 |             nn.BatchNorm2d(32),
 58 |             nn.ELU(inplace=True),
 59 |             nn.MaxPool2d(3,2,padding=1),
 60 |         )
 61 |         # 32 64 32
 62 |         self.layer1 = make_layers(32,32,2,False)
 63 |         # 32 64 32
 64 |         self.layer2 = make_layers(32,64,2,True)
 65 |         # 64 32 16
 66 |         self.layer3 = make_layers(64,128,2,True)
 67 |         # 128 16 8
 68 |         self.dense = nn.Sequential(
 69 |             nn.Dropout(p=0.6),
 70 |             nn.Linear(128*16*8, 128),
 71 |             nn.BatchNorm1d(128),
 72 |             nn.ELU(inplace=True)
 73 |         )
 74 |         # 256 1 1 
 75 |         self.reid = reid
 76 |         self.batch_norm = nn.BatchNorm1d(128)
 77 |         self.classifier = nn.Sequential(
 78 |             nn.Linear(128, num_classes),
 79 |         )
 80 |     
 81 |     def forward(self, x):
 82 |         x = self.conv(x)
 83 |         x = self.layer1(x)
 84 |         x = self.layer2(x)
 85 |         x = self.layer3(x)
 86 | 
 87 |         x = x.view(x.size(0),-1)
 88 |         if self.reid:
 89 |             x = self.dense[0](x)
 90 |             x = self.dense[1](x)
 91 |             x = x.div(x.norm(p=2,dim=1,keepdim=True))
 92 |             return x
 93 |         x = self.dense(x)
 94 |         # B x 128
 95 |         # classifier
 96 |         x = self.classifier(x)
 97 |         return x
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     net = Net(reid=True)
102 |     x = torch.randn(4,3,128,64)
103 |     y = net(x)
104 |     import ipdb; ipdb.set_trace()
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/deep/test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.backends.cudnn as cudnn
 3 | import torchvision
 4 | 
 5 | import argparse
 6 | import os
 7 | 
 8 | from model import Net
 9 | 
10 | parser = argparse.ArgumentParser(description="Train on market1501")
11 | parser.add_argument("--data-dir",default='data',type=str)
12 | parser.add_argument("--no-cuda",action="store_true")
13 | parser.add_argument("--gpu-id",default=0,type=int)
14 | args = parser.parse_args()
15 | 
16 | # device
17 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
18 | if torch.cuda.is_available() and not args.no_cuda:
19 |     cudnn.benchmark = True
20 | 
21 | # data loader
22 | root = args.data_dir
23 | query_dir = os.path.join(root,"query")
24 | gallery_dir = os.path.join(root,"gallery")
25 | transform = torchvision.transforms.Compose([
26 |     torchvision.transforms.Resize((128,64)),
27 |     torchvision.transforms.ToTensor(),
28 |     torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
29 | ])
30 | queryloader = torch.utils.data.DataLoader(
31 |     torchvision.datasets.ImageFolder(query_dir, transform=transform),
32 |     batch_size=64, shuffle=False
33 | )
34 | galleryloader = torch.utils.data.DataLoader(
35 |     torchvision.datasets.ImageFolder(gallery_dir, transform=transform),
36 |     batch_size=64, shuffle=False
37 | )
38 | 
39 | # net definition
40 | net = Net(reid=True)
41 | assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
42 | print('Loading from checkpoint/ckpt.t7')
43 | checkpoint = torch.load("./checkpoint/ckpt.t7")
44 | net_dict = checkpoint['net_dict']
45 | net.load_state_dict(net_dict)
46 | net.eval()
47 | net.to(device)
48 | 
49 | # compute features
50 | query_features = torch.tensor([]).float()
51 | query_labels = torch.tensor([]).long()
52 | gallery_features = torch.tensor([]).float()
53 | gallery_labels = torch.tensor([]).long()
54 | 
55 | with torch.no_grad():
56 |     for idx,(inputs,labels) in enumerate(queryloader):
57 |         inputs = inputs.to(device)
58 |         features = net(inputs).cpu()
59 |         query_features = torch.cat((query_features, features), dim=0)
60 |         query_labels = torch.cat((query_labels, labels))
61 | 
62 |     for idx,(inputs,labels) in enumerate(galleryloader):
63 |         inputs = inputs.to(device)
64 |         features = net(inputs).cpu()
65 |         gallery_features = torch.cat((gallery_features, features), dim=0)
66 |         gallery_labels = torch.cat((gallery_labels, labels))
67 | 
68 | gallery_labels -= 2
69 | 
70 | # save features
71 | features = {
72 |     "qf": query_features,
73 |     "ql": query_labels,
74 |     "gf": gallery_features,
75 |     "gl": gallery_labels
76 | }
77 | torch.save(features,"features.pth")


--------------------------------------------------------------------------------
/deep/train.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/deep/train.jpg


--------------------------------------------------------------------------------
/deep/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import time
  4 | 
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | import torch
  8 | import torch.backends.cudnn as cudnn
  9 | import torchvision
 10 | 
 11 | from model import Net
 12 | 
 13 | parser = argparse.ArgumentParser(description="Train on market1501")
 14 | parser.add_argument("--data-dir",default='data',type=str)
 15 | parser.add_argument("--no-cuda",action="store_true")
 16 | parser.add_argument("--gpu-id",default=0,type=int)
 17 | parser.add_argument("--lr",default=0.1, type=float)
 18 | parser.add_argument("--interval",'-i',default=20,type=int)
 19 | parser.add_argument('--resume', '-r',action='store_true')
 20 | args = parser.parse_args()
 21 | 
 22 | # device
 23 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
 24 | if torch.cuda.is_available() and not args.no_cuda:
 25 |     cudnn.benchmark = True
 26 | 
 27 | # data loading
 28 | root = args.data_dir
 29 | train_dir = os.path.join(root,"train")
 30 | test_dir = os.path.join(root,"test")
 31 | transform_train = torchvision.transforms.Compose([
 32 |     torchvision.transforms.RandomCrop((128,64),padding=4),
 33 |     torchvision.transforms.RandomHorizontalFlip(),
 34 |     torchvision.transforms.ToTensor(),
 35 |     torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 36 | ])
 37 | transform_test = torchvision.transforms.Compose([
 38 |     torchvision.transforms.Resize((128,64)),
 39 |     torchvision.transforms.ToTensor(),
 40 |     torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 41 | ])
 42 | trainloader = torch.utils.data.DataLoader(
 43 |     torchvision.datasets.ImageFolder(train_dir, transform=transform_train),
 44 |     batch_size=64,shuffle=True
 45 | )
 46 | testloader = torch.utils.data.DataLoader(
 47 |     torchvision.datasets.ImageFolder(test_dir, transform=transform_test),
 48 |     batch_size=64,shuffle=True
 49 | )
 50 | num_classes = len(trainloader.dataset.classes)
 51 | 
 52 | # net definition
 53 | start_epoch = 0
 54 | net = Net(num_classes=num_classes)
 55 | if args.resume:
 56 |     assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
 57 |     print('Loading from checkpoint/ckpt.t7')
 58 |     checkpoint = torch.load("./checkpoint/ckpt.t7")
 59 |     # import ipdb; ipdb.set_trace()
 60 |     net_dict = checkpoint['net_dict']
 61 |     net.load_state_dict(net_dict)
 62 |     best_acc = checkpoint['acc']
 63 |     start_epoch = checkpoint['epoch']
 64 | net.to(device)
 65 | 
 66 | # loss and optimizer
 67 | criterion = torch.nn.CrossEntropyLoss()
 68 | optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4)
 69 | best_acc = 0.
 70 | 
 71 | # train function for each epoch
 72 | def train(epoch):
 73 |     print("\nEpoch : %d"%(epoch+1))
 74 |     net.train()
 75 |     training_loss = 0.
 76 |     train_loss = 0.
 77 |     correct = 0
 78 |     total = 0
 79 |     interval = args.interval
 80 |     start = time.time()
 81 |     for idx, (inputs, labels) in enumerate(trainloader):
 82 |         # forward
 83 |         inputs,labels = inputs.to(device),labels.to(device)
 84 |         outputs = net(inputs)
 85 |         loss = criterion(outputs, labels)
 86 | 
 87 |         # backward
 88 |         optimizer.zero_grad()
 89 |         loss.backward()
 90 |         optimizer.step()
 91 | 
 92 |         # accumurating
 93 |         training_loss += loss.item()
 94 |         train_loss += loss.item()
 95 |         correct += outputs.max(dim=1)[1].eq(labels).sum().item()
 96 |         total += labels.size(0)
 97 | 
 98 |         # print 
 99 |         if (idx+1)%interval == 0:
100 |             end = time.time()
101 |             print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
102 |                 100.*(idx+1)/len(trainloader), end-start, training_loss/interval, correct, total, 100.*correct/total
103 |             ))
104 |             training_loss = 0.
105 |             start = time.time()
106 |     
107 |     return train_loss/len(trainloader), 1.- correct/total
108 | 
109 | def test(epoch):
110 |     global best_acc
111 |     net.eval()
112 |     test_loss = 0.
113 |     correct = 0
114 |     total = 0
115 |     start = time.time()
116 |     with torch.no_grad():
117 |         for idx, (inputs, labels) in enumerate(testloader):
118 |             inputs, labels = inputs.to(device), labels.to(device)
119 |             outputs = net(inputs)
120 |             loss = criterion(outputs, labels)
121 | 
122 |             test_loss += loss.item()
123 |             correct += outputs.max(dim=1)[1].eq(labels).sum().item()
124 |             total += labels.size(0)
125 |         
126 |         print("Testing ...")
127 |         end = time.time()
128 |         print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
129 |                 100.*(idx+1)/len(testloader), end-start, test_loss/len(testloader), correct, total, 100.*correct/total
130 |             ))
131 | 
132 |     # saving checkpoint
133 |     acc = 100.*correct/total
134 |     if acc > best_acc:
135 |         best_acc = acc
136 |         print("Saving parameters to checkpoint/ckpt.t7")
137 |         checkpoint = {
138 |             'net_dict':net.state_dict(),
139 |             'acc':acc,
140 |             'epoch':epoch,
141 |         }
142 |         if not os.path.isdir('checkpoint'):
143 |             os.mkdir('checkpoint')
144 |         torch.save(checkpoint, './checkpoint/ckpt.t7')
145 | 
146 |     return test_loss/len(testloader), 1.- correct/total
147 | 
148 | # plot figure
149 | x_epoch = []
150 | record = {'train_loss':[], 'train_err':[], 'test_loss':[], 'test_err':[]}
151 | fig = plt.figure()
152 | ax0 = fig.add_subplot(121, title="loss")
153 | ax1 = fig.add_subplot(122, title="top1err")
154 | def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
155 |     global record
156 |     record['train_loss'].append(train_loss)
157 |     record['train_err'].append(train_err)
158 |     record['test_loss'].append(test_loss)
159 |     record['test_err'].append(test_err)
160 | 
161 |     x_epoch.append(epoch)
162 |     ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
163 |     ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
164 |     ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
165 |     ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
166 |     if epoch == 0:
167 |         ax0.legend()
168 |         ax1.legend()
169 |     fig.savefig("train.jpg")
170 | 
171 | # lr decay
172 | def lr_decay():
173 |     global optimizer
174 |     for params in optimizer.param_groups:
175 |         params['lr'] *= 0.1
176 |         lr = params['lr']
177 |         print("Learning rate adjusted to {}".format(lr))
178 | 
179 | def main():
180 |     for epoch in range(start_epoch, start_epoch+40):
181 |         train_loss, train_err = train(epoch)
182 |         test_loss, test_err = test(epoch)
183 |         draw_curve(epoch, train_loss, train_err, test_loss, test_err)
184 |         if (epoch+1)%20==0:
185 |             lr_decay()
186 | 
187 | 
188 | if __name__ == '__main__':
189 |     main()


--------------------------------------------------------------------------------
/deep_sort.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | from deep.feature_extractor import Extractor
 5 | from sort.nn_matching import NearestNeighborDistanceMetric
 6 | from sort.preprocessing import non_max_suppression
 7 | from sort.detection import Detection
 8 | from sort.tracker import Tracker
 9 | 
10 | 
11 | class DeepSort(object):
12 |     def __init__(self, model_path):
13 |         self.min_confidence = 0.3
14 |         self.nms_max_overlap = 1.0
15 | 
16 |         self.extractor = Extractor(model_path, use_cuda=True)
17 | 
18 |         max_cosine_distance = 0.2
19 |         nn_budget = 100
20 |         metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
21 |         self.tracker = Tracker(metric)
22 | 
23 |     def update(self, bbox_xywh, confidences, ori_img):
24 |         self.height, self.width = ori_img.shape[:2]
25 |         # generate detections
26 |         # features:为特征向量
27 |         features = self._get_features(bbox_xywh, ori_img)
28 |         # dectections包含 self.tlwh(左上角xy),self.confidence,self.feature
29 |         # dectections为ndarray格式
30 |         # 置信度筛选和nms可以考虑删除
31 |         detections = [Detection(bbox_xywh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence]
32 | 
33 |         # run on non-maximum supression
34 |         boxes = np.array([d.tlwh for d in detections])
35 |         scores = np.array([d.confidence for d in detections])
36 |         indices = non_max_suppression( boxes, self.nms_max_overlap, scores)
37 |         detections = [detections[i] for i in indices]
38 | 
39 |         # update tracker
40 |         self.tracker.predict()
41 |         self.tracker.update(detections)
42 | 
43 |         # output bbox identities
44 |         outputs = []
45 |         for track in self.tracker.tracks:
46 |             if not track.is_confirmed() or track.time_since_update > 1:
47 |                 continue
48 |             box = track.to_tlwh()
49 |             x1,y1,x2,y2 = self._xywh_to_xyxy(box)
50 |             track_id = track.track_id
51 |             outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int))
52 |         if len(outputs) > 0:
53 |             outputs = np.stack(outputs,axis=0)
54 |         return outputs
55 | 
56 |     def _xywh_to_xyxy(self, bbox_xywh):
57 |         x,y,w,h = bbox_xywh
58 |         x1 = max(int(x-w/2),0)
59 |         x2 = min(int(x+w/2),self.width-1)
60 |         y1 = max(int(y-h/2),0)
61 |         y2 = min(int(y+h/2),self.height-1)
62 |         return x1,y1,x2,y2
63 |     
64 |     def _get_features(self, bbox_xywh, ori_img):
65 |         features = []
66 |         for box in bbox_xywh:
67 |             x1,y1,x2,y2 = self._xywh_to_xyxy(box)
68 |             # print(y1,y2,x1,x2)
69 |             im = ori_img[y1:y2,x1:x2]
70 |             #cv2.imshow("d",im)
71 |             #cv2.waitKey(0)
72 |             feature = self.extractor(im)[0]
73 |             features.append(feature)
74 |         if len(features):
75 |             features = np.stack(features, axis=0)
76 |         else:
77 |             features = np.array([])
78 |         return features
79 | 
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     pass
84 | 


--------------------------------------------------------------------------------
/det/det_dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/det/det_dog.jpg


--------------------------------------------------------------------------------
/det/det_eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/det/det_eagle.jpg


--------------------------------------------------------------------------------
/det/det_giraffe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/det/det_giraffe.jpg


--------------------------------------------------------------------------------
/det/det_herd_of_horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/det/det_herd_of_horses.jpg


--------------------------------------------------------------------------------
/det/det_img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/det/det_img1.jpg


--------------------------------------------------------------------------------
/det/det_img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/det/det_img2.jpg


--------------------------------------------------------------------------------
/det/det_img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/det/det_img3.jpg


--------------------------------------------------------------------------------
/det/det_img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/det/det_img4.jpg


--------------------------------------------------------------------------------
/det/det_messi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/det/det_messi.jpg


--------------------------------------------------------------------------------
/det/det_person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/det/det_person.jpg


--------------------------------------------------------------------------------
/det/det_scream.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/det/det_scream.jpg


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import time
 4 | 
 5 | import uuid
 6 | 
 7 | from backbone.base import Base as BackboneBase
 8 | from config.train_config import TrainConfig
 9 | from config.eval_config import EvalConfig
10 | from config.config import Config
11 | from dataset.base import Base as DatasetBase
12 | from evaluator import Evaluator
13 | from logger import Logger as Log
14 | from model import Model
15 | from roi.pooler_ import Pooler
16 | from dataset.AVA_video_v2 import AVA_video
17 | def _eval(path_to_checkpoint, backbone_name, path_to_results_dir):
18 |     dataset = AVA_video(EvalConfig.VAL_DATA)
19 |     evaluator = Evaluator(dataset, path_to_results_dir)
20 | 
21 |     Log.i('Found {:d} samples'.format(len(dataset)))
22 | 
23 |     backbone = BackboneBase.from_name(backbone_name)()
24 |     model = Model(backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE,
25 |                   anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
26 |                   rpn_pre_nms_top_n=TrainConfig.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=TrainConfig.RPN_POST_NMS_TOP_N).cuda()
27 |     model.load(path_to_checkpoint)
28 |     print("load from:",path_to_checkpoint)
29 |     Log.i('Start evaluating with 1 GPU (1 batch per GPU)')
30 |     mean_ap, detail = evaluator.evaluate(model)
31 |     Log.i('Done')
32 |     Log.i('mean AP = {:.4f}'.format(mean_ap))
33 |     Log.i('\n' + detail)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     def main():
38 |         path_to_checkpoint = EvalConfig.PATH_TO_CHECKPOINT
39 |         backbone_name = Config.BACKBONE_NAME
40 |         path_to_results_dir='/home/aiuser/ava/ava/'+EvalConfig.PATH_TO_RESULTS
41 |         Log.initialize(os.path.join('/home/aiuser/ava_v2.2', 'eval.log'))
42 |         _eval(path_to_checkpoint, backbone_name, path_to_results_dir)
43 | 
44 |     main()
45 | 


--------------------------------------------------------------------------------
/extention/functional.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torch import Tensor
 4 | 
 5 | 
 6 | def beta_smooth_l1_loss(input: Tensor, target: Tensor, beta: float) -> Tensor:
 7 |     diff = torch.abs(input - target)
 8 |     loss = torch.where(diff < beta, 0.5 * diff ** 2 / beta, diff - 0.5 * beta)
 9 |     loss = loss.sum() / (input.numel() + 1e-8)
10 |     return loss
11 | 


--------------------------------------------------------------------------------
/extention/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from torch.optim import Optimizer
 3 | from torch.optim.lr_scheduler import MultiStepLR
 4 | 
 5 | 
 6 | class WarmUpMultiStepLR(MultiStepLR):
 7 |     def __init__(self, optimizer: Optimizer, milestones: List[int], gamma: float = 0.1,
 8 |                  factor: float = 0.3333, num_iters: int = 500, last_epoch: int = -1):
 9 |         self.factor = factor
10 |         self.num_iters = num_iters
11 |         super().__init__(optimizer, milestones, gamma, last_epoch)
12 | 
13 |     def get_lr(self) -> List[float]:
14 |         if self.last_epoch < self.num_iters:
15 |             alpha = self.last_epoch / self.num_iters
16 |             factor = (1 - self.factor) * alpha + self.factor
17 |         else:
18 |             factor = 1
19 | 
20 |         return [lr * factor for lr in super().get_lr()]
21 | 


--------------------------------------------------------------------------------
/f.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from __future__ import division
 3 | 
 4 | import warnings
 5 | import math
 6 | import types
 7 | from torch.nn import functional as F
 8 | import torch
 9 | from torch._C import _infer_size, _add_docstr
10 | torch.nn.MultiLabelSoftMarginLoss
11 | @torch._jit_internal.weak_script
12 | def multilabel_soft_margin_loss(input, target, weight=None, size_average=None,
13 |                                 reduce=None, reduction='mean'):
14 |     # type: (Tensor, Tensor, Optional[Tensor], Optional[bool], Optional[bool], str) -> Tensor
15 |     r"""multilabel_soft_margin_loss(input, target, weight=None, size_average=None) -> Tensor
16 | 
17 |     See :class:`~torch.nn.MultiLabelSoftMarginLoss` for details.
18 |     """
19 |     loss = -(target * torch.log(input) + (1 - target) *  torch.log(-input))
20 | 
21 |     if weight is not None:
22 |         loss = loss * torch.jit._unwrap_optional(weight)
23 |     loss = loss.sum(dim=1) / input.size(1)  # only return N loss values
24 |     #loss = loss.sum(dim=1)
25 |     if reduction == 'none':
26 |         ret = loss
27 |     elif reduction == 'mean':
28 |         ret = loss.mean()
29 |     elif reduction == 'sum':
30 |         ret = loss.sum()
31 |     else:
32 |         ret = input
33 |         raise ValueError(reduction + " is not valid")
34 |     return ret
35 | 
36 | def focal_cross_entropy(input, target, weight=None, ignore_index=-100,reduction='mean'):
37 |     input=torch.mul(torch.mul((1-F.softmax(input, 1)),(1-F.softmax(input, 1))),(F.log_softmax(input, 1)))
38 |     return  F.nll_loss(input, target, weight, None, ignore_index, None, reduction)
39 | 
40 | if __name__ == '__main__':
41 |     input=[[0.4,0.9]]
42 |     input=torch.tensor(input,dtype=torch.float)
43 |     target=[0]
44 |     target=torch.tensor(target,dtype=torch.long)
45 |     print(F.softmax(input, 1))
46 |     print((1-F.softmax(input, 1)))
47 |     print(torch.mul((1-F.softmax(input, 1)),(1-F.softmax(input, 1))))
48 |     print(F.log_softmax(input, 1))
49 |     print(F.cross_entropy(input,target))
50 |     print(focal_cross_entropy(input,target))


--------------------------------------------------------------------------------
/img_to_video.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | img_root = '/home/aiuser/frames/'#这里写你的文件夹路径，比如：/home/youname/data/img/,注意最后一个文件夹要有斜杠
 4 | fps = 15   #保存视频的FPS，可以适当调整
 5 | fourcc = cv2.VideoWriter_fourcc(*'MJPG')
 6 | videoWriter = cv2.VideoWriter('/home/aiuser/frames/saveVideo.avi',fourcc,fps,(656,480))
 7 | for i in range(121):
 8 |     if i>=10:
 9 |         frame = cv2.imread(img_root + str(i) + '.jpg')
10 |         videoWriter.write(frame)
11 | videoWriter.release()


--------------------------------------------------------------------------------
/imgs/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/imgs/dog.jpg


--------------------------------------------------------------------------------
/imgs/eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/imgs/eagle.jpg


--------------------------------------------------------------------------------
/imgs/giraffe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/imgs/giraffe.jpg


--------------------------------------------------------------------------------
/imgs/herd_of_horses.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/imgs/herd_of_horses.jpg


--------------------------------------------------------------------------------
/imgs/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/imgs/img1.jpg


--------------------------------------------------------------------------------
/imgs/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/imgs/img2.jpg


--------------------------------------------------------------------------------
/imgs/img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/imgs/img3.jpg


--------------------------------------------------------------------------------
/imgs/img4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/imgs/img4.jpg


--------------------------------------------------------------------------------
/imgs/messi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/imgs/messi.jpg


--------------------------------------------------------------------------------
/imgs/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/imgs/person.jpg


--------------------------------------------------------------------------------
/imgs/scream.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/imgs/scream.jpg


--------------------------------------------------------------------------------
/infer.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import random
 4 | import torch
 5 | 
 6 | from PIL import ImageDraw
 7 | from torchvision.transforms import transforms
 8 | from dataset.base import Base as DatasetBase
 9 | from backbone.base import Base as BackboneBase
10 | from bbox import BBox
11 | from model import Model
12 | from roi.pooler_ import Pooler
13 | from config.eval_config import EvalConfig as Config
14 | 
15 | 
16 | def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float):
17 |     #dataset_class = DatasetBase.from_name(dataset_name)
18 |     dataset_class=80
19 |     backbone = BackboneBase.from_name(backbone_name)()
20 |     # model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE,
21 |     #               anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
22 |     #               rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
23 |     model = Model(backbone, dataset_class, pooler_mode=Config.POOLER_MODE,
24 |                   anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
25 |                   rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
26 |     model.load(path_to_checkpoint)
27 | 
28 |     with torch.no_grad():
29 |         image = transforms.Image.open(path_to_input_image)
30 |         image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)
31 | 
32 |         detection_bboxes, detection_classes, detection_probs, _ = \
33 |             model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
34 |         detection_bboxes /= scale
35 | 
36 |         kept_indices = detection_probs > prob_thresh
37 |         detection_bboxes = detection_bboxes[kept_indices]
38 |         detection_classes = detection_classes[kept_indices]
39 |         detection_probs = detection_probs[kept_indices]
40 | 
41 |         draw = ImageDraw.Draw(image)
42 | 
43 |         for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()):
44 |             color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white'])
45 |             bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3])
46 |             category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]
47 | 
48 |             draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color)
49 |             draw.text((bbox.left, bbox.top), text='{category:s} {prob:.3f}', fill=color)
50 | 
51 |         image.save(path_to_output_image)
52 |         print('Output image is saved to {path_to_output_image}')
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     def main():
57 |         parser = argparse.ArgumentParser()
58 |         parser.add_argument('-s', '--dataset', type=str, choices=DatasetBase.OPTIONS, required=True, help='name of dataset')
59 |         parser.add_argument('-b', '--backbone', type=str, choices=BackboneBase.OPTIONS, required=True, help='name of backbone model')
60 |         parser.add_argument('-c', '--checkpoint', type=str, required=True, help='path to checkpoint')
61 |         parser.add_argument('-p', '--probability_threshold', type=float, default=0.6, help='threshold of detection probability')
62 |         parser.add_argument('--image_min_side', type=float, help='default: {:g}'.format(Config.IMAGE_MIN_SIDE))
63 |         parser.add_argument('--image_max_side', type=float, help='default: {:g}'.format(Config.IMAGE_MAX_SIDE))
64 |         parser.add_argument('--anchor_ratios', type=str, help='default: "{!s}"'.format(Config.ANCHOR_RATIOS))
65 |         parser.add_argument('--anchor_sizes', type=str, help='default: "{!s}"'.format(Config.ANCHOR_SIZES))
66 |         parser.add_argument('--pooler_mode', type=str, choices=Pooler.OPTIONS, help='default: {.value:s}'.format(Config.POOLER_MODE))
67 |         parser.add_argument('--rpn_pre_nms_top_n', type=int, help='default: {:d}'.format(Config.RPN_PRE_NMS_TOP_N))
68 |         parser.add_argument('--rpn_post_nms_top_n', type=int, help='default: {:d}'.format(Config.RPN_POST_NMS_TOP_N))
69 |         parser.add_argument('input', type=str, help='path to input image')
70 |         parser.add_argument('output', type=str, help='path to output result image')
71 |         args = parser.parse_args()
72 | 
73 |         path_to_input_image = args.input
74 |         path_to_output_image = args.output
75 |         dataset_name = args.dataset
76 |         backbone_name = args.backbone
77 |         path_to_checkpoint = args.checkpoint
78 |         prob_thresh = args.probability_threshold
79 | 
80 |         os.makedirs(os.path.join(os.path.curdir, os.path.dirname(path_to_output_image)), exist_ok=True)
81 | 
82 |         Config.setup(image_min_side=args.image_min_side, image_max_side=args.image_max_side,
83 |                      anchor_ratios=args.anchor_ratios, anchor_sizes=args.anchor_sizes, pooler_mode=args.pooler_mode,
84 |                      rpn_pre_nms_top_n=args.rpn_pre_nms_top_n, rpn_post_nms_top_n=args.rpn_post_nms_top_n)
85 | 
86 |         print('Arguments:')
87 |         for k, v in vars(args).items():
88 |             print('\t{k} = {v}')
89 |         print(Config.describe())
90 | 
91 |         _infer(path_to_input_image, path_to_output_image, path_to_checkpoint, dataset_name, backbone_name, prob_thresh)
92 | 
93 |     main()


--------------------------------------------------------------------------------
/infer_stream.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import itertools
  3 | import random
  4 | import time
  5 | import torch
  6 | 
  7 | import cv2
  8 | import numpy as np
  9 | from PIL import ImageDraw, Image
 10 | 
 11 | from backbone.base import Base as BackboneBase
 12 | from config.eval_config import EvalConfig as Config
 13 | from dataset.base import Base as DatasetBase
 14 | from bbox import BBox
 15 | from model import Model
 16 | from roi.pooler_ import Pooler
 17 | 
 18 | 
 19 | def _infer_stream(path_to_input_stream_endpoint: str, period_of_inference: int, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float):
 20 |     dataset_class = DatasetBase.from_name(dataset_name)
 21 |     backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
 22 |     model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE,
 23 |                   anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
 24 |                   rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
 25 |     model.load(path_to_checkpoint)
 26 | 
 27 |     if path_to_input_stream_endpoint.isdigit():
 28 |         path_to_input_stream_endpoint = int(path_to_input_stream_endpoint)
 29 |     video_capture = cv2.VideoCapture(path_to_input_stream_endpoint)
 30 | 
 31 |     with torch.no_grad():
 32 |         for sn in itertools.count(start=1):
 33 |             _, frame = video_capture.read()
 34 | 
 35 |             if sn % period_of_inference != 0:
 36 |                 continue
 37 | 
 38 |             timestamp = time.time()
 39 | 
 40 |             image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 41 |             image = Image.fromarray(image)
 42 |             image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)
 43 | 
 44 |             detection_bboxes, detection_classes, detection_probs, _ = \
 45 |                 model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
 46 |             detection_bboxes /= scale
 47 | 
 48 |             kept_indices = detection_probs > prob_thresh
 49 |             detection_bboxes = detection_bboxes[kept_indices]
 50 |             detection_classes = detection_classes[kept_indices]
 51 |             detection_probs = detection_probs[kept_indices]
 52 | 
 53 |             draw = ImageDraw.Draw(image)
 54 | 
 55 |             for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()):
 56 |                 color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white'])
 57 |                 bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3])
 58 |                 category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]
 59 | 
 60 |                 draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color)
 61 |                 draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color)
 62 | 
 63 |             image = np.array(image)
 64 |             frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
 65 | 
 66 |             elapse = time.time() - timestamp
 67 |             fps = 1 / elapse
 68 |             cv2.putText(frame, f'FPS = {fps:.1f}', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
 69 | 
 70 |             cv2.imshow('easy-faster-rcnn.pytorch', frame)
 71 |             if cv2.waitKey(10) == 27:
 72 |                 break
 73 | 
 74 |     video_capture.release()
 75 |     cv2.destroyAllWindows()
 76 | 
 77 | 
 78 | if __name__ == '__main__':
 79 |     def main():
 80 |         parser = argparse.ArgumentParser()
 81 |         parser.add_argument('-s', '--dataset', type=str, choices=DatasetBase.OPTIONS, required=True, help='name of dataset')
 82 |         parser.add_argument('-b', '--backbone', type=str, choices=BackboneBase.OPTIONS, required=True, help='name of backbone model')
 83 |         parser.add_argument('-c', '--checkpoint', type=str, required=True, help='path to checkpoint')
 84 |         parser.add_argument('-p', '--probability_threshold', type=float, default=0.6, help='threshold of detection probability')
 85 |         parser.add_argument('--image_min_side', type=float, help='default: {:g}'.format(Config.IMAGE_MIN_SIDE))
 86 |         parser.add_argument('--image_max_side', type=float, help='default: {:g}'.format(Config.IMAGE_MAX_SIDE))
 87 |         parser.add_argument('--anchor_ratios', type=str, help='default: "{!s}"'.format(Config.ANCHOR_RATIOS))
 88 |         parser.add_argument('--anchor_sizes', type=str, help='default: "{!s}"'.format(Config.ANCHOR_SIZES))
 89 |         parser.add_argument('--pooler_mode', type=str, choices=Pooler.OPTIONS, help='default: {.value:s}'.format(Config.POOLER_MODE))
 90 |         parser.add_argument('--rpn_pre_nms_top_n', type=int, help='default: {:d}'.format(Config.RPN_PRE_NMS_TOP_N))
 91 |         parser.add_argument('--rpn_post_nms_top_n', type=int, help='default: {:d}'.format(Config.RPN_POST_NMS_TOP_N))
 92 |         parser.add_argument('input', type=str, help='path to input stream endpoint')
 93 |         parser.add_argument('period', type=int, help='period of inference')
 94 |         args = parser.parse_args()
 95 | 
 96 |         path_to_input_stream_endpoint = args.input
 97 |         period_of_inference = args.period
 98 |         dataset_name = args.dataset
 99 |         backbone_name = args.backbone
100 |         path_to_checkpoint = args.checkpoint
101 |         prob_thresh = args.probability_threshold
102 | 
103 |         Config.setup(image_min_side=args.image_min_side, image_max_side=args.image_max_side,
104 |                      anchor_ratios=args.anchor_ratios, anchor_sizes=args.anchor_sizes, pooler_mode=args.pooler_mode,
105 |                      rpn_pre_nms_top_n=args.rpn_pre_nms_top_n, rpn_post_nms_top_n=args.rpn_post_nms_top_n)
106 | 
107 |         print('Arguments:')
108 |         for k, v in vars(args).items():
109 |             print(f'\t{k} = {v}')
110 |         print(Config.describe())
111 | 
112 |         _infer_stream(path_to_input_stream_endpoint, period_of_inference, path_to_checkpoint, dataset_name, backbone_name, prob_thresh)
113 | 
114 |     main()
115 | 


--------------------------------------------------------------------------------
/infer_websocket.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import asyncio
  3 | import json
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import websockets
  8 | from PIL import Image
  9 | 
 10 | from backbone.base import Base as BackboneBase
 11 | from bbox import BBox
 12 | from config.eval_config import EvalConfig as Config
 13 | from dataset.base import Base as DatasetBase
 14 | from model import Model
 15 | from roi.pooler_ import Pooler
 16 | 
 17 | 
 18 | def _infer_websocket(path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float):
 19 |     dataset_class = DatasetBase.from_name(dataset_name)
 20 |     backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
 21 |     model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE,
 22 |                   anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
 23 |                   rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
 24 |     model.load(path_to_checkpoint)
 25 | 
 26 |     async def handler(websocket, path):
 27 |         print('Connection established:', path)
 28 | 
 29 |         with torch.no_grad():
 30 |             while True:
 31 |                 frame = await websocket.recv()
 32 |                 frame = np.frombuffer(frame, dtype=np.uint8).reshape(480, 640, 3)
 33 | 
 34 |                 image = Image.fromarray(frame)
 35 |                 image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)
 36 | 
 37 |                 detection_bboxes, detection_classes, detection_probs, _ = \
 38 |                     model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
 39 |                 detection_bboxes /= scale
 40 | 
 41 |                 kept_indices = detection_probs > prob_thresh
 42 |                 detection_bboxes = detection_bboxes[kept_indices]
 43 |                 detection_classes = detection_classes[kept_indices]
 44 |                 detection_probs = detection_probs[kept_indices]
 45 | 
 46 |                 message = []
 47 | 
 48 |                 for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()):
 49 |                     bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3])
 50 |                     category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]
 51 | 
 52 |                     message.append({
 53 |                         'left': int(bbox.left),
 54 |                         'top': int(bbox.top),
 55 |                         'right': int(bbox.right),
 56 |                         'bottom': int(bbox.bottom),
 57 |                         'category': category
 58 |                     })
 59 | 
 60 |                 message = json.dumps(message)
 61 |                 await websocket.send(message)
 62 | 
 63 |     server = websockets.serve(handler, host='*', port=8765, max_size=2 ** 32, compression=None)
 64 |     asyncio.get_event_loop().run_until_complete(server)
 65 |     print('Service is ready. Please navigate to http://127.0.0.1:8000/')
 66 |     asyncio.get_event_loop().run_forever()
 67 | 
 68 | 
 69 | if __name__ == '__main__':
 70 |     def main():
 71 |         parser = argparse.ArgumentParser()
 72 |         parser.add_argument('-s', '--dataset', type=str, choices=DatasetBase.OPTIONS, required=True, help='name of dataset')
 73 |         parser.add_argument('-b', '--backbone', type=str, choices=BackboneBase.OPTIONS, required=True, help='name of backbone model')
 74 |         parser.add_argument('-c', '--checkpoint', type=str, required=True, help='path to checkpoint')
 75 |         parser.add_argument('-p', '--probability_threshold', type=float, default=0.6, help='threshold of detection probability')
 76 |         parser.add_argument('--image_min_side', type=float, help='default: {:g}'.format(Config.IMAGE_MIN_SIDE))
 77 |         parser.add_argument('--image_max_side', type=float, help='default: {:g}'.format(Config.IMAGE_MAX_SIDE))
 78 |         parser.add_argument('--anchor_ratios', type=str, help='default: "{!s}"'.format(Config.ANCHOR_RATIOS))
 79 |         parser.add_argument('--anchor_sizes', type=str, help='default: "{!s}"'.format(Config.ANCHOR_SIZES))
 80 |         parser.add_argument('--pooler_mode', type=str, choices=Pooler.OPTIONS, help='default: {.value:s}'.format(Config.POOLER_MODE))
 81 |         parser.add_argument('--rpn_pre_nms_top_n', type=int, help='default: {:d}'.format(Config.RPN_PRE_NMS_TOP_N))
 82 |         parser.add_argument('--rpn_post_nms_top_n', type=int, help='default: {:d}'.format(Config.RPN_POST_NMS_TOP_N))
 83 |         args = parser.parse_args()
 84 | 
 85 |         dataset_name = args.dataset
 86 |         backbone_name = args.backbone
 87 |         path_to_checkpoint = args.checkpoint
 88 |         prob_thresh = args.probability_threshold
 89 | 
 90 |         Config.setup(image_min_side=args.image_min_side, image_max_side=args.image_max_side,
 91 |                      anchor_ratios=args.anchor_ratios, anchor_sizes=args.anchor_sizes, pooler_mode=args.pooler_mode,
 92 |                      rpn_pre_nms_top_n=args.rpn_pre_nms_top_n, rpn_post_nms_top_n=args.rpn_post_nms_top_n)
 93 | 
 94 |         print('Arguments:')
 95 |         for k, v in vars(args).items():
 96 |             print(f'\t{k} = {v}')
 97 |         print(Config.describe())
 98 | 
 99 |         _infer_websocket(path_to_checkpoint, dataset_name, backbone_name, prob_thresh)
100 | 
101 |     main()
102 | 


--------------------------------------------------------------------------------
/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | class Logger(object):
 5 |     Initialized = False
 6 | 
 7 |     @staticmethod
 8 |     def initialize(path_to_log_file):
 9 |         logging.basicConfig(level=logging.INFO,
10 |                             format='%(asctime)s %(levelname)-8s %(message)s',
11 |                             datefmt='%Y-%m-%d %H:%M:%S',
12 |                             handlers=[logging.FileHandler(path_to_log_file),
13 |                                       logging.StreamHandler()])
14 |         Logger.Initialized = True
15 | 
16 |     @staticmethod
17 |     def log(level, message):
18 |         assert Logger.Initialized, 'Logger has not been initialized'
19 |         logging.log(level, message)
20 | 
21 |     @staticmethod
22 |     def d(message):
23 |         Logger.log(logging.DEBUG, message)
24 | 
25 |     @staticmethod
26 |     def i(message):
27 |         Logger.log(logging.INFO, message)
28 | 
29 |     @staticmethod
30 |     def w(message):
31 |         Logger.log(logging.WARNING, message)
32 | 
33 |     @staticmethod
34 |     def e(message):
35 |         Logger.log(logging.ERROR, message)
36 | 


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1555900792.aiuser-Z390-GAMING-X:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/logs/events.out.tfevents.1555900792.aiuser-Z390-GAMING-X


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1555900949.aiuser-Z390-GAMING-X:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/logs/events.out.tfevents.1555900949.aiuser-Z390-GAMING-X


--------------------------------------------------------------------------------
/outputs/frames/blank.TXT:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/outputs/frames/blank.TXT


--------------------------------------------------------------------------------
/preprocess.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import torch 
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F 
 6 | from torch.autograd import Variable
 7 | import numpy as np
 8 | import cv2 
 9 | import matplotlib.pyplot as plt
10 | from util import count_parameters as count
11 | from util import convert2cpu as cpu
12 | from PIL import Image, ImageDraw
13 | 
14 | 
15 | def letterbox_image(img, inp_dim):
16 |     '''resize image with unchanged aspect ratio using padding'''
17 |     img_w, img_h = img.shape[1], img.shape[0]
18 |     w, h = inp_dim
19 |     new_w = int(img_w * min(w/img_w, h/img_h))
20 |     new_h = int(img_h * min(w/img_w, h/img_h))
21 |     resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
22 |     
23 |     canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
24 | 
25 |     canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
26 |     
27 |     return canvas
28 | 
29 | 
30 |         
31 | def prep_image(img, inp_dim):
32 |     """
33 |     Prepare image for inputting to the neural network. 
34 |     
35 |     Returns a Variable 
36 |     """
37 | 
38 |     orig_im = cv2.imread(img)
39 |     dim = orig_im.shape[1], orig_im.shape[0]
40 |     img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
41 |     img_ = img[:,:,::-1].transpose((2,0,1)).copy()
42 |     img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
43 |     return img_, orig_im, dim
44 | 
45 | def prep_image_pil(img, network_dim):
46 |     orig_im = Image.open(img)
47 |     img = orig_im.convert('RGB')
48 |     dim = img.size
49 |     img = img.resize(network_dim)
50 |     img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
51 |     img = img.view(*network_dim, 3).transpose(0,1).transpose(0,2).contiguous()
52 |     img = img.view(1, 3,*network_dim)
53 |     img = img.float().div(255.0)
54 |     return (img, orig_im, dim)
55 | 
56 | def inp_to_image(inp):
57 |     inp = inp.cpu().squeeze()
58 |     inp = inp*255
59 |     try:
60 |         inp = inp.data.numpy()
61 |     except RuntimeError:
62 |         inp = inp.numpy()
63 |     inp = inp.transpose(1,2,0)
64 | 
65 |     inp = inp[:,:,::-1]
66 |     return inp
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python==4.2.0.32
2 | torch==1.0.0
3 | torchvision==0.2.1
4 | msgpack==0.6.1
5 | 


--------------------------------------------------------------------------------
/roi/pooler.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | import math
 3 | import torch
 4 | from torch import Tensor
 5 | from torch.nn import functional as F
 6 | 
 7 | from support.layer.roi_align import ROIAlign
 8 | 
 9 | 
10 | class Pooler(object):
11 | 
12 |     class Mode(Enum):
13 |         POOLING = 'pooling'
14 |         ALIGN = 'align'
15 | 
16 |     OPTIONS = ['pooling', 'align']
17 | 
18 |     @staticmethod
19 |     def apply(features: Tensor, proposal_bboxes: Tensor, proposal_batch_indices: Tensor, mode: Mode) -> Tensor:
20 |         _, _, feature_map_height, feature_map_width = features.shape
21 |         scale = 1 / 16
22 |         output_size = (7, 7)
23 |         # sure 2
24 |         #print("proposal_batch_indices:",proposal_batch_indices)
25 |         if mode == Pooler.Mode.POOLING:
26 |             pool = []
27 |             #print("debug_pooling:",proposal_batch_indices.shape)
28 |             for (proposal_bbox, proposal_batch_index) in zip(proposal_bboxes, proposal_batch_indices):
29 |                 start_x = max(min(round(proposal_bbox[0].item() * scale), feature_map_width - 1), 0)      # [0, feature_map_width)
30 |                 start_y = max(min(round(proposal_bbox[1].item() * scale), feature_map_height - 1), 0)     # (0, feature_map_height]
31 |                 end_x = max(min(round(proposal_bbox[2].item() * scale) + 1, feature_map_width), 1)        # [0, feature_map_width)
32 |                 end_y = max(min(round(proposal_bbox[3].item() * scale) + 1, feature_map_height), 1)       # (0, feature_map_height]
33 |                 # sure 3
34 |                 #print("position:",start_x,start_y,end_x,end_y)
35 |                 h=end_y-start_y
36 |                 w=end_x-start_x
37 |                 if h<7:
38 |                    change_h=math.ceil((7-h)/2)
39 |                    start_y=max(start_y-change_h,0)
40 |                    end_y=min(end_y+change_h,feature_map_height)
41 |                 if w<7:
42 |                    change_w=math.ceil((7-w)/2)
43 |                    start_x =max(start_x-change_w,0)
44 |                    end_x = min(end_x+change_w,feature_map_width)
45 |                 # sure 4
46 |                 #print("changed_position:", start_x, start_y, end_x, end_y)
47 |                 roi_feature_map = features[proposal_batch_index, :, start_y:end_y, start_x:end_x]
48 |                 pool.append(F.adaptive_max_pool2d(input=roi_feature_map, output_size=output_size))
49 |                 shape=pool[-1].shape
50 |             pool = torch.stack(pool, dim=0)
51 |         elif mode == Pooler.Mode.ALIGN:
52 |             pool = ROIAlign(output_size, spatial_scale=scale, sampling_ratio=0)(
53 |                 features,
54 |                 torch.cat([proposal_batch_indices.view(-1, 1).float(), proposal_bboxes], dim=1)
55 |             )
56 |         else:
57 |             raise ValueError
58 | 
59 |         pool = F.max_pool2d(input=pool, kernel_size=2, stride=2)
60 |         return pool
61 | 
62 | 


--------------------------------------------------------------------------------
/roi/pooler_.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | import torch
 4 | from torch import Tensor
 5 | from torch.nn import functional as F
 6 | 
 7 | from support.layer.roi_align import ROIAlign
 8 | 
 9 | 
10 | class Pooler(object):
11 | 
12 |     class Mode(Enum):
13 |         POOLING = 'pooling'
14 |         ALIGN = 'align'
15 | 
16 |     OPTIONS = ['pooling', 'align']
17 | 
18 |     # @staticmethod
19 |     # def apply(features: Tensor, proposal_bboxes: Tensor, proposal_batch_indices: Tensor, mode: Mode) -> Tensor:
20 |     #     _, _,feature_map_t, feature_map_height, feature_map_width = features.shape
21 |     #     scale = 1 / 16
22 |     #     output_size = (feature_map_t,3 * 2, 3 * 2)
23 |     #
24 |     #     if mode == Pooler.Mode.POOLING:
25 |     #         pool = []
26 |     #         for (proposal_bbox, proposal_batch_index) in zip(proposal_bboxes, proposal_batch_indices):
27 |     #             start_x = max(min(round(proposal_bbox[0].item() * scale), feature_map_width - 1), 0)      # [0, feature_map_width)
28 |     #             start_y = max(min(round(proposal_bbox[1].item() * scale), feature_map_height - 1), 0)     # (0, feature_map_height]
29 |     #             end_x = max(min(round(proposal_bbox[2].item() * scale) + 1, feature_map_width), 1)        # [0, feature_map_width)
30 |     #             end_y = max(min(round(proposal_bbox[3].item() * scale) + 1, feature_map_height), 1)       # (0, feature_map_height]
31 |     #             roi_feature_map = features[proposal_batch_index, :,:, start_y:end_y, start_x:end_x]
32 |     #             pool.append(F.adaptive_max_pool3d(input=roi_feature_map,output_size=output_size))
33 |     #         pool = torch.stack(pool, dim=0)
34 |     #     elif mode == Pooler.Mode.ALIGN:
35 |     #         pool = ROIAlign(output_size, spatial_scale=scale, sampling_ratio=0)(
36 |     #             features,
37 |     #             torch.cat([proposal_batch_indices.view(-1, 1).float(), proposal_bboxes], dim=1)
38 |     #         )
39 |     #     else:
40 |     #         raise ValueError
41 |     #
42 |     #     pool = F.max_pool3d(input=pool, kernel_size=(1,2,2), stride=(1,2,2))
43 |     #     return pool
44 | 
45 |     @staticmethod
46 |     def apply(features: Tensor, proposal_bboxes: Tensor, proposal_batch_indices: Tensor, mode: Mode) -> Tensor:
47 |         _, _, feature_map_t, feature_map_height, feature_map_width = features.shape
48 |         scale = 1 / 16
49 |         output_size = (feature_map_t, 7, 7)
50 | 
51 |         if mode == Pooler.Mode.POOLING:
52 |             pool = []
53 |             for (proposal_bbox, proposal_batch_index) in zip(proposal_bboxes, proposal_batch_indices):
54 |                 start_x = max(min(round(proposal_bbox[0].item() * scale), feature_map_width - 1),
55 |                               0)  # [0, feature_map_width)
56 |                 start_y = max(min(round(proposal_bbox[1].item() * scale), feature_map_height - 1),
57 |                               0)  # (0, feature_map_height]
58 |                 end_x = max(min(round(proposal_bbox[2].item() * scale) + 1, feature_map_width),
59 |                             1)  # [0, feature_map_width)
60 |                 end_y = max(min(round(proposal_bbox[3].item() * scale) + 1, feature_map_height),
61 |                             1)  # (0, feature_map_height]
62 | 
63 |                 roi_feature_map = features[proposal_batch_index, :, :, start_y:end_y, start_x:end_x]
64 |                 pool.append(F.adaptive_max_pool3d(input=roi_feature_map, output_size=output_size))
65 |             pool = torch.stack(pool, dim=0)
66 |         else:
67 |             raise ValueError
68 | 
69 |         #pool = F.max_pool3d(input=pool, kernel_size=(1, 2, 2), stride=(1, 2, 2))
70 |         return pool
71 | 
72 | 


--------------------------------------------------------------------------------
/rpn/mkf.py:
--------------------------------------------------------------------------------
1 | def make_image_key(video_id, timestamp):
2 |   """Returns a unique identifier for a video id & timestamp."""
3 |   return "%s,%04d" % (video_id, int(timestamp))
4 | 
5 | if __name__ == '__main__':
6 |     video_id="aaaa"
7 |     timestamp="930"
8 |     print(make_image_key(video_id,timestamp))


--------------------------------------------------------------------------------
/runs/Apr15_19-42-07_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555328527.aiuser-Z390-GAMING-X:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/runs/Apr15_19-42-07_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555328527.aiuser-Z390-GAMING-X


--------------------------------------------------------------------------------
/runs/Apr15_19-42-31_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555328551.aiuser-Z390-GAMING-X:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/runs/Apr15_19-42-31_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555328551.aiuser-Z390-GAMING-X


--------------------------------------------------------------------------------
/runs/Apr15_19-42-47_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555328567.aiuser-Z390-GAMING-X:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/runs/Apr15_19-42-47_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555328567.aiuser-Z390-GAMING-X


--------------------------------------------------------------------------------
/runs/Apr15_19-44-13_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555328653.aiuser-Z390-GAMING-X:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/runs/Apr15_19-44-13_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555328653.aiuser-Z390-GAMING-X


--------------------------------------------------------------------------------
/runs/Apr15_19-47-03_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555328823.aiuser-Z390-GAMING-X:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/runs/Apr15_19-47-03_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555328823.aiuser-Z390-GAMING-X


--------------------------------------------------------------------------------
/runs/Apr15_19-53-21_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555329201.aiuser-Z390-GAMING-X:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/runs/Apr15_19-53-21_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555329201.aiuser-Z390-GAMING-X


--------------------------------------------------------------------------------
/runs/Apr15_19-56-51_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555329411.aiuser-Z390-GAMING-X:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/runs/Apr15_19-56-51_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555329411.aiuser-Z390-GAMING-X


--------------------------------------------------------------------------------
/runs/Apr15_20-00-31_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555329631.aiuser-Z390-GAMING-X:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/runs/Apr15_20-00-31_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555329631.aiuser-Z390-GAMING-X


--------------------------------------------------------------------------------
/runs/Apr15_20-12-31_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555330351.aiuser-Z390-GAMING-X:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/runs/Apr15_20-12-31_aiuser-Z390-GAMING-XNet1/events.out.tfevents.1555330351.aiuser-Z390-GAMING-X


--------------------------------------------------------------------------------
/scripts/coco2017/eval.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | CHECKPOINT=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${CHECKPOINT}" ]]); then
5 |     echo "Argument BACKBONE or CHECKPOINT is missing"
6 |     exit
7 | fi
8 | 
9 | python eval.py -s=coco2017 -b=${BACKBONE} --image_min_side=800 --image_max_side=1333 --anchor_sizes="[64, 128, 256, 512]" --rpn_post_nms_top_n=1000 ${CHECKPOINT}


--------------------------------------------------------------------------------
/scripts/coco2017/infer.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | BACKBONE=$1
 3 | CHECKPOINT=$2
 4 | INPUT_IMAGE=$3
 5 | OUTPUT_IMAGE=$4
 6 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${CHECKPOINT}" ]] && [[ -n "${INPUT_IMAGE}" ]] && [[ -n "${OUTPUT_IMAGE}" ]]); then
 7 |     echo "Argument BACKBONE or CHECKPOINT or INPUT_IMAGE or OUTPUT_IMAGE is missing"
 8 |     exit
 9 | fi
10 | 
11 | python infer.py -s=coco2017 -b=${BACKBONE} -c=${CHECKPOINT} --image_min_side=800 --image_max_side=1333 --anchor_sizes="[64, 128, 256, 512]" --rpn_post_nms_top_n=1000 ${INPUT_IMAGE} ${OUTPUT_IMAGE}


--------------------------------------------------------------------------------
/scripts/coco2017/train-bs1.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | OUTPUTS_DIR=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${OUTPUTS_DIR}" ]]); then
5 |     echo "Argument BACKBONE or OUTPUTS_DIR is missing"
6 |     exit
7 | fi
8 | 
9 | python train.py -s=coco2017 -b=${BACKBONE} -o=${OUTPUTS_DIR} --image_min_side=800 --image_max_side=1333 --anchor_sizes="[64, 128, 256, 512]" --anchor_smooth_l1_loss_beta=0.1111 --batch_size=1 --learning_rate=0.00125 --weight_decay=0.0001 --step_lr_sizes="[960000, 1280000]" --num_steps_to_snapshot=320000 --num_steps_to_finish=1440000


--------------------------------------------------------------------------------
/scripts/coco2017/train-bs16.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | OUTPUTS_DIR=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${OUTPUTS_DIR}" ]]); then
5 |     echo "Argument BACKBONE or OUTPUTS_DIR is missing"
6 |     exit
7 | fi
8 | 
9 | python train.py -s=coco2017 -b=${BACKBONE} -o=${OUTPUTS_DIR} --image_min_side=800 --image_max_side=1333 --anchor_sizes="[64, 128, 256, 512]" --anchor_smooth_l1_loss_beta=0.1111 --batch_size=16 --learning_rate=0.02 --weight_decay=0.0001 --step_lr_sizes="[60000, 80000]" --num_steps_to_snapshot=20000 --num_steps_to_finish=90000


--------------------------------------------------------------------------------
/scripts/coco2017/train-bs2.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | OUTPUTS_DIR=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${OUTPUTS_DIR}" ]]); then
5 |     echo "Argument BACKBONE or OUTPUTS_DIR is missing"
6 |     exit
7 | fi
8 | 
9 | python train.py -s=coco2017 -b=${BACKBONE} -o=${OUTPUTS_DIR} --image_min_side=800 --image_max_side=1333 --anchor_sizes="[64, 128, 256, 512]" --anchor_smooth_l1_loss_beta=0.1111 --batch_size=2 --learning_rate=0.0025 --weight_decay=0.0001 --step_lr_sizes="[480000, 640000]" --num_steps_to_snapshot=160000 --num_steps_to_finish=720000


--------------------------------------------------------------------------------
/scripts/coco2017/train-bs4.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | OUTPUTS_DIR=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${OUTPUTS_DIR}" ]]); then
5 |     echo "Argument BACKBONE or OUTPUTS_DIR is missing"
6 |     exit
7 | fi
8 | 
9 | python train.py -s=coco2017 -b=${BACKBONE} -o=${OUTPUTS_DIR} --image_min_side=800 --image_max_side=1333 --anchor_sizes="[64, 128, 256, 512]" --anchor_smooth_l1_loss_beta=0.1111 --batch_size=4 --learning_rate=0.005 --weight_decay=0.0001 --step_lr_sizes="[240000, 320000]" --num_steps_to_snapshot=80000 --num_steps_to_finish=360000


--------------------------------------------------------------------------------
/scripts/coco2017/train-bs8.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | OUTPUTS_DIR=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${OUTPUTS_DIR}" ]]); then
5 |     echo "Argument BACKBONE or OUTPUTS_DIR is missing"
6 |     exit
7 | fi
8 | 
9 | python train.py -s=coco2017 -b=${BACKBONE} -o=${OUTPUTS_DIR} --image_min_side=800 --image_max_side=1333 --anchor_sizes="[64, 128, 256, 512]" --anchor_smooth_l1_loss_beta=0.1111 --batch_size=8 --learning_rate=0.01 --weight_decay=0.0001 --step_lr_sizes="[120000, 160000]" --num_steps_to_snapshot=40000 --num_steps_to_finish=180000


--------------------------------------------------------------------------------
/scripts/voc2007/eval.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | CHECKPOINT=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${CHECKPOINT}" ]]); then
5 |     echo "Argument BACKBONE or CHECKPOINT is missing"
6 |     exit
7 | fi
8 | 
9 | python eval.py -s=voc2007 -b=${BACKBONE} ${CHECKPOINT}


--------------------------------------------------------------------------------
/scripts/voc2007/infer.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | BACKBONE=$1
 3 | CHECKPOINT=$2
 4 | INPUT_IMAGE=$3
 5 | OUTPUT_IMAGE=$4
 6 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${CHECKPOINT}" ]] && [[ -n "${INPUT_IMAGE}" ]] && [[ -n "${OUTPUT_IMAGE}" ]]); then
 7 |     echo "Argument BACKBONE or CHECKPOINT or INPUT_IMAGE or OUTPUT_IMAGE is missing"
 8 |     exit
 9 | fi
10 | 
11 | python infer.py -s=voc2007 -b=${BACKBONE} -c=${CHECKPOINT} ${INPUT_IMAGE} ${OUTPUT_IMAGE}


--------------------------------------------------------------------------------
/scripts/voc2007/train-bs1.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | OUTPUTS_DIR=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${OUTPUTS_DIR}" ]]); then
5 |     echo "Argument BACKBONE or OUTPUTS_DIR is missing"
6 |     exit
7 | fi
8 | 
9 | python train.py -s=voc2007 -b=${BACKBONE} -o=${OUTPUTS_DIR} --batch_size=1 --learning_rate=0.001 --step_lr_sizes="[50000, 70000]" --num_steps_to_snapshot=10000 --num_steps_to_finish=90000


--------------------------------------------------------------------------------
/scripts/voc2007/train-bs16.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | OUTPUTS_DIR=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${OUTPUTS_DIR}" ]]); then
5 |     echo "Argument BACKBONE or OUTPUTS_DIR is missing"
6 |     exit
7 | fi
8 | 
9 | python train.py -s=voc2007 -b=${BACKBONE} -o=${OUTPUTS_DIR} --batch_size=16 --learning_rate=0.016 --step_lr_sizes="[3125, 4375]" --num_steps_to_snapshot=625 --num_steps_to_finish=5625


--------------------------------------------------------------------------------
/scripts/voc2007/train-bs2.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | OUTPUTS_DIR=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${OUTPUTS_DIR}" ]]); then
5 |     echo "Argument BACKBONE or OUTPUTS_DIR is missing"
6 |     exit
7 | fi
8 | 
9 | python train.py -s=voc2007 -b=${BACKBONE} -o=${OUTPUTS_DIR} --batch_size=2 --learning_rate=0.002 --step_lr_sizes="[25000, 35000]" --num_steps_to_snapshot=5000 --num_steps_to_finish=45000


--------------------------------------------------------------------------------
/scripts/voc2007/train-bs4.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | OUTPUTS_DIR=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${OUTPUTS_DIR}" ]]); then
5 |     echo "Argument BACKBONE or OUTPUTS_DIR is missing"
6 |     exit
7 | fi
8 | 
9 | python train.py -s=voc2007 -b=${BACKBONE} -o=${OUTPUTS_DIR} --batch_size=4 --learning_rate=0.004 --step_lr_sizes="[12500, 17500]" --num_steps_to_snapshot=2500 --num_steps_to_finish=22500


--------------------------------------------------------------------------------
/scripts/voc2007/train-bs8.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | BACKBONE=$1
3 | OUTPUTS_DIR=$2
4 | if ! ([[ -n "${BACKBONE}" ]] && [[ -n "${OUTPUTS_DIR}" ]]); then
5 |     echo "Argument BACKBONE or OUTPUTS_DIR is missing"
6 |     exit
7 | fi
8 | 
9 | python train.py -s=voc2007 -b=${BACKBONE} -o=${OUTPUTS_DIR} --batch_size=8 --learning_rate=0.008 --step_lr_sizes="[6250, 8750]" --num_steps_to_snapshot=1250 --num_steps_to_finish=11250


--------------------------------------------------------------------------------
/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/sort/__init__.py


--------------------------------------------------------------------------------
/sort/detection.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 |     """
 7 |     This class represents a bounding box detection in a single image.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     tlwh : array_like
12 |         Bounding box in format `(x, y, w, h)`.
13 |     confidence : float
14 |         Detector confidence score.
15 |     feature : array_like
16 |         A feature vector that describes the object contained in this image.
17 | 
18 |     Attributes
19 |     ----------
20 |     tlwh : ndarray
21 |         Bounding box in format `(top left x, top left y, width, height)`.
22 |     confidence : ndarray
23 |         Detector confidence score.
24 |     feature : ndarray | NoneType
25 |         A feature vector that describes the object contained in this image.
26 | 
27 |     """
28 | 
29 |     def __init__(self, tlwh, confidence, feature):
30 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
31 |         self.confidence = float(confidence)
32 |         self.feature = np.asarray(feature, dtype=np.float32)
33 | 
34 |     def to_tlbr(self):
35 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
36 |         `(top left, bottom right)`.
37 |         """
38 |         ret = self.tlwh.copy()
39 |         ret[2:] += ret[:2]
40 |         return ret
41 | 
42 |     def to_xyah(self):
43 |         """Convert bounding box to format `(center x, center y, aspect ratio,
44 |         height)`, where the aspect ratio is `width / height`.
45 |         """
46 |         ret = self.tlwh.copy()
47 |         ret[:2] += ret[2:] / 2
48 |         ret[2] /= ret[3]
49 |         return ret
50 | 


--------------------------------------------------------------------------------
/sort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | from __future__ import absolute_import
 3 | import numpy as np
 4 | from . import linear_assignment
 5 | 
 6 | 
 7 | def iou(bbox, candidates):
 8 |     """Computer intersection over union.
 9 | 
10 |     Parameters
11 |     ----------
12 |     bbox : ndarray
13 |         A bounding box in format `(top left x, top left y, width, height)`.
14 |     candidates : ndarray
15 |         A matrix of candidate bounding boxes (one per row) in the same format
16 |         as `bbox`.
17 | 
18 |     Returns
19 |     -------
20 |     ndarray
21 |         The intersection over union in [0, 1] between the `bbox` and each
22 |         candidate. A higher score means a larger fraction of the `bbox` is
23 |         occluded by the candidate.
24 | 
25 |     """
26 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27 |     candidates_tl = candidates[:, :2]
28 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
29 | 
30 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34 |     wh = np.maximum(0., br - tl)
35 | 
36 |     area_intersection = wh.prod(axis=1)
37 |     area_bbox = bbox[2:].prod()
38 |     area_candidates = candidates[:, 2:].prod(axis=1)
39 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
40 | 
41 | 
42 | def iou_cost(tracks, detections, track_indices=None,
43 |              detection_indices=None):
44 |     """An intersection over union distance metric.
45 | 
46 |     Parameters
47 |     ----------
48 |     tracks : List[deep_sort.track.Track]
49 |         A list of tracks.
50 |     detections : List[deep_sort.detection.Detection]
51 |         A list of detections.
52 |     track_indices : Optional[List[int]]
53 |         A list of indices to tracks that should be matched. Defaults to
54 |         all `tracks`.
55 |     detection_indices : Optional[List[int]]
56 |         A list of indices to detections that should be matched. Defaults
57 |         to all `detections`.
58 | 
59 |     Returns
60 |     -------
61 |     ndarray
62 |         Returns a cost matrix of shape
63 |         len(track_indices), len(detection_indices) where entry (i, j) is
64 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65 | 
66 |     """
67 |     if track_indices is None:
68 |         track_indices = np.arange(len(tracks))
69 |     if detection_indices is None:
70 |         detection_indices = np.arange(len(detections))
71 | 
72 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73 |     for row, track_idx in enumerate(track_indices):
74 |         if tracks[track_idx].time_since_update > 1:
75 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
76 |             continue
77 | 
78 |         bbox = tracks[track_idx].to_tlwh()
79 |         candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
81 |     return cost_matrix
82 | 


--------------------------------------------------------------------------------
/sort/nn_matching.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | 
  4 | 
  5 | def _pdist(a, b):
  6 |     """Compute pair-wise squared distance between points in `a` and `b`.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     a : array_like
 11 |         An NxM matrix of N samples of dimensionality M.
 12 |     b : array_like
 13 |         An LxM matrix of L samples of dimensionality M.
 14 | 
 15 |     Returns
 16 |     -------
 17 |     ndarray
 18 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 19 |         contains the squared distance between `a[i]` and `b[j]`.
 20 | 
 21 |     """
 22 |     a, b = np.asarray(a), np.asarray(b)
 23 |     if len(a) == 0 or len(b) == 0:
 24 |         return np.zeros((len(a), len(b)))
 25 |     a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
 26 |     r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
 27 |     r2 = np.clip(r2, 0., float(np.inf))
 28 |     return r2
 29 | 
 30 | 
 31 | def _cosine_distance(a, b, data_is_normalized=False):
 32 |     """Compute pair-wise cosine distance between points in `a` and `b`.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     a : array_like
 37 |         An NxM matrix of N samples of dimensionality M.
 38 |     b : array_like
 39 |         An LxM matrix of L samples of dimensionality M.
 40 |     data_is_normalized : Optional[bool]
 41 |         If True, assumes rows in a and b are unit length vectors.
 42 |         Otherwise, a and b are explicitly normalized to lenght 1.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     ndarray
 47 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 48 |         contains the squared distance between `a[i]` and `b[j]`.
 49 | 
 50 |     """
 51 |     if not data_is_normalized:
 52 |         a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
 53 |         b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
 54 |     return 1. - np.dot(a, b.T)
 55 | 
 56 | 
 57 | def _nn_euclidean_distance(x, y):
 58 |     """ Helper function for nearest neighbor distance metric (Euclidean).
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     x : ndarray
 63 |         A matrix of N row-vectors (sample points).
 64 |     y : ndarray
 65 |         A matrix of M row-vectors (query points).
 66 | 
 67 |     Returns
 68 |     -------
 69 |     ndarray
 70 |         A vector of length M that contains for each entry in `y` the
 71 |         smallest Euclidean distance to a sample in `x`.
 72 | 
 73 |     """
 74 |     distances = _pdist(x, y)
 75 |     return np.maximum(0.0, distances.min(axis=0))
 76 | 
 77 | 
 78 | def _nn_cosine_distance(x, y):
 79 |     """ Helper function for nearest neighbor distance metric (cosine).
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     x : ndarray
 84 |         A matrix of N row-vectors (sample points).
 85 |     y : ndarray
 86 |         A matrix of M row-vectors (query points).
 87 | 
 88 |     Returns
 89 |     -------
 90 |     ndarray
 91 |         A vector of length M that contains for each entry in `y` the
 92 |         smallest cosine distance to a sample in `x`.
 93 | 
 94 |     """
 95 |     distances = _cosine_distance(x, y)
 96 |     return distances.min(axis=0)
 97 | 
 98 | 
 99 | class NearestNeighborDistanceMetric(object):
100 |     """
101 |     A nearest neighbor distance metric that, for each target, returns
102 |     the closest distance to any sample that has been observed so far.
103 | 
104 |     Parameters
105 |     ----------
106 |     metric : str
107 |         Either "euclidean" or "cosine".
108 |     matching_threshold: float
109 |         The matching threshold. Samples with larger distance are considered an
110 |         invalid match.
111 |     budget : Optional[int]
112 |         If not None, fix samples per class to at most this number. Removes
113 |         the oldest samples when the budget is reached.
114 | 
115 |     Attributes
116 |     ----------
117 |     samples : Dict[int -> List[ndarray]]
118 |         A dictionary that maps from target identities to the list of samples
119 |         that have been observed so far.
120 | 
121 |     """
122 | 
123 |     def __init__(self, metric, matching_threshold, budget=None):
124 | 
125 | 
126 |         if metric == "euclidean":
127 |             self._metric = _nn_euclidean_distance
128 |         elif metric == "cosine":
129 |             self._metric = _nn_cosine_distance
130 |         else:
131 |             raise ValueError(
132 |                 "Invalid metric; must be either 'euclidean' or 'cosine'")
133 |         self.matching_threshold = matching_threshold
134 |         self.budget = budget
135 |         self.samples = {}
136 | 
137 |     def partial_fit(self, features, targets, active_targets):
138 |         """Update the distance metric with new data.
139 | 
140 |         Parameters
141 |         ----------
142 |         features : ndarray
143 |             An NxM matrix of N features of dimensionality M.
144 |         targets : ndarray
145 |             An integer array of associated target identities.
146 |         active_targets : List[int]
147 |             A list of targets that are currently present in the scene.
148 | 
149 |         """
150 |         for feature, target in zip(features, targets):
151 |             self.samples.setdefault(target, []).append(feature)
152 |             if self.budget is not None:
153 |                 self.samples[target] = self.samples[target][-self.budget:]
154 |         self.samples = {k: self.samples[k] for k in active_targets}
155 | 
156 |     def distance(self, features, targets):
157 |         """Compute distance between features and targets.
158 | 
159 |         Parameters
160 |         ----------
161 |         features : ndarray
162 |             An NxM matrix of N features of dimensionality M.
163 |         targets : List[int]
164 |             A list of targets to match the given `features` against.
165 | 
166 |         Returns
167 |         -------
168 |         ndarray
169 |             Returns a cost matrix of shape len(targets), len(features), where
170 |             element (i, j) contains the closest squared distance between
171 |             `targets[i]` and `features[j]`.
172 | 
173 |         """
174 |         cost_matrix = np.zeros((len(targets), len(features)))
175 |         for i, target in enumerate(targets):
176 |             cost_matrix[i, :] = self._metric(self.samples[target], features)
177 |         return cost_matrix
178 | 


--------------------------------------------------------------------------------
/sort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | 
 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None):
 7 |     """Suppress overlapping detections.
 8 | 
 9 |     Original code from [1]_ has been adapted to include confidence score.
10 | 
11 |     .. [1] http://www.pyimagesearch.com/2015/02/16/
12 |            faster-non-maximum-suppression-python/
13 | 
14 |     Examples
15 |     --------
16 | 
17 |         >>> boxes = [d.roi for d in detections]
18 |         >>> scores = [d.confidence for d in detections]
19 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20 |         >>> detections = [detections[i] for i in indices]
21 | 
22 |     Parameters
23 |     ----------
24 |     boxes : ndarray
25 |         Array of ROIs (x, y, width, height).
26 |     max_bbox_overlap : float
27 |         ROIs that overlap more than this values are suppressed.
28 |     scores : Optional[array_like]
29 |         Detector confidence score.
30 | 
31 |     Returns
32 |     -------
33 |     List[int]
34 |         Returns indices of detections that have survived non-maxima suppression.
35 | 
36 |     """
37 |     if len(boxes) == 0:
38 |         return []
39 | 
40 |     boxes = boxes.astype(np.float)
41 |     pick = []
42 | 
43 |     x1 = boxes[:, 0]
44 |     y1 = boxes[:, 1]
45 |     x2 = boxes[:, 2] + boxes[:, 0]
46 |     y2 = boxes[:, 3] + boxes[:, 1]
47 | 
48 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
49 |     if scores is not None:
50 |         idxs = np.argsort(scores)
51 |     else:
52 |         idxs = np.argsort(y2)
53 | 
54 |     while len(idxs) > 0:
55 |         last = len(idxs) - 1
56 |         i = idxs[last]
57 |         pick.append(i)
58 | 
59 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
60 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
61 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
62 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
63 | 
64 |         w = np.maximum(0, xx2 - xx1 + 1)
65 |         h = np.maximum(0, yy2 - yy1 + 1)
66 | 
67 |         overlap = (w * h) / area[idxs[:last]]
68 | 
69 |         idxs = np.delete(
70 |             idxs, np.concatenate(
71 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
72 | 
73 |     return pick
74 | 


--------------------------------------------------------------------------------
/sort/track.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | 
  3 | 
  4 | class TrackState:
  5 |     """
  6 |     Enumeration type for the single target track state. Newly created tracks are
  7 |     classified as `tentative` until enough evidence has been collected. Then,
  8 |     the track state is changed to `confirmed`. Tracks that are no longer alive
  9 |     are classified as `deleted` to mark them for removal from the set of active
 10 |     tracks.
 11 | 
 12 |     """
 13 | 
 14 |     Tentative = 1
 15 |     Confirmed = 2
 16 |     Deleted = 3
 17 | 
 18 | 
 19 | class Track:
 20 |     """
 21 |     A single target track with state space `(x, y, a, h)` and associated
 22 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
 23 |     aspect ratio and `h` is the height.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     mean : ndarray
 28 |         Mean vector of the initial state distribution.
 29 |     covariance : ndarray
 30 |         Covariance matrix of the initial state distribution.
 31 |     track_id : int
 32 |         A unique track identifier.
 33 |     n_init : int
 34 |         Number of consecutive detections before the track is confirmed. The
 35 |         track state is set to `Deleted` if a miss occurs within the first
 36 |         `n_init` frames.
 37 |     max_age : int
 38 |         The maximum number of consecutive misses before the track state is
 39 |         set to `Deleted`.
 40 |     feature : Optional[ndarray]
 41 |         Feature vector of the detection this track originates from. If not None,
 42 |         this feature is added to the `features` cache.
 43 | 
 44 |     Attributes
 45 |     ----------
 46 |     mean : ndarray
 47 |         Mean vector of the initial state distribution.
 48 |     covariance : ndarray
 49 |         Covariance matrix of the initial state distribution.
 50 |     track_id : int
 51 |         A unique track identifier.
 52 |     hits : int
 53 |         Total number of measurement updates.
 54 |     age : int
 55 |         Total number of frames since first occurance.
 56 |     time_since_update : int
 57 |         Total number of frames since last measurement update.
 58 |     state : TrackState
 59 |         The current track state.
 60 |     features : List[ndarray]
 61 |         A cache of features. On each measurement update, the associated feature
 62 |         vector is added to this list.
 63 | 
 64 |     """
 65 | 
 66 |     def __init__(self, mean, covariance, track_id, n_init, max_age,
 67 |                  feature=None):
 68 |         self.mean = mean
 69 |         self.covariance = covariance
 70 |         self.track_id = track_id
 71 |         self.hits = 1
 72 |         self.age = 1
 73 |         self.time_since_update = 0
 74 | 
 75 |         self.state = TrackState.Tentative
 76 |         self.features = []
 77 |         if feature is not None:
 78 |             self.features.append(feature)
 79 | 
 80 |         self._n_init = n_init
 81 |         self._max_age = max_age
 82 | 
 83 |     def to_tlwh(self):
 84 |         """Get current position in bounding box format `(top left x, top left y,
 85 |         width, height)`.
 86 | 
 87 |         Returns
 88 |         -------
 89 |         ndarray
 90 |             The bounding box.
 91 | 
 92 |         """
 93 |         ret = self.mean[:4].copy()
 94 |         ret[2] *= ret[3]
 95 |         ret[:2] -= ret[2:] / 2
 96 |         return ret
 97 | 
 98 |     def to_tlbr(self):
 99 |         """Get current position in bounding box format `(min x, miny, max x,
100 |         max y)`.
101 | 
102 |         Returns
103 |         -------
104 |         ndarray
105 |             The bounding box.
106 | 
107 |         """
108 |         ret = self.to_tlwh()
109 |         ret[2:] = ret[:2] + ret[2:]
110 |         return ret
111 | 
112 |     def predict(self, kf):
113 |         """Propagate the state distribution to the current time step using a
114 |         Kalman filter prediction step.
115 | 
116 |         Parameters
117 |         ----------
118 |         kf : kalman_filter.KalmanFilter
119 |             The Kalman filter.
120 | 
121 |         """
122 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
123 |         self.age += 1
124 |         self.time_since_update += 1
125 | 
126 |     def update(self, kf, detection):
127 |         """Perform Kalman filter measurement update step and update the feature
128 |         cache.
129 | 
130 |         Parameters
131 |         ----------
132 |         kf : kalman_filter.KalmanFilter
133 |             The Kalman filter.
134 |         detection : Detection
135 |             The associated detection.
136 | 
137 |         """
138 |         self.mean, self.covariance = kf.update(
139 |             self.mean, self.covariance, detection.to_xyah())
140 |         self.features.append(detection.feature)
141 | 
142 |         self.hits += 1
143 |         self.time_since_update = 0
144 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
145 |             self.state = TrackState.Confirmed
146 | 
147 |     def mark_missed(self):
148 |         """Mark this track as missed (no association at the current time step).
149 |         """
150 |         if self.state == TrackState.Tentative:
151 |             self.state = TrackState.Deleted
152 |         elif self.time_since_update > self._max_age:
153 |             self.state = TrackState.Deleted
154 | 
155 |     def is_tentative(self):
156 |         """Returns True if this track is tentative (unconfirmed).
157 |         """
158 |         return self.state == TrackState.Tentative
159 | 
160 |     def is_confirmed(self):
161 |         """Returns True if this track is confirmed."""
162 |         return self.state == TrackState.Confirmed
163 | 
164 |     def is_deleted(self):
165 |         """Returns True if this track is dead and should be deleted."""
166 |         return self.state == TrackState.Deleted
167 | 


--------------------------------------------------------------------------------
/sort/tracker.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | from . import kalman_filter
  5 | from . import linear_assignment
  6 | from . import iou_matching
  7 | from .track import Track
  8 | 
  9 | 
 10 | class Tracker:
 11 |     """
 12 |     This is the multi-target tracker.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     metric : nn_matching.NearestNeighborDistanceMetric
 17 |         A distance metric for measurement-to-track association.
 18 |     max_age : int
 19 |         Maximum number of missed misses before a track is deleted.
 20 |     n_init : int
 21 |         Number of consecutive detections before the track is confirmed. The
 22 |         track state is set to `Deleted` if a miss occurs within the first
 23 |         `n_init` frames.
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     metric : nn_matching.NearestNeighborDistanceMetric
 28 |         The distance metric used for measurement to track association.
 29 |     max_age : int
 30 |         Maximum number of missed misses before a track is deleted.
 31 |     n_init : int
 32 |         Number of frames that a track remains in initialization phase.
 33 |     kf : kalman_filter.KalmanFilter
 34 |         A Kalman filter to filter target trajectories in image space.
 35 |     tracks : List[Track]
 36 |         The list of active tracks at the current time step.
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self, metric, max_iou_distance=0.7, max_age=30, n_init=3):
 41 |         self.metric = metric
 42 |         self.max_iou_distance = max_iou_distance
 43 |         self.max_age = max_age
 44 |         self.n_init = n_init
 45 | 
 46 |         self.kf = kalman_filter.KalmanFilter()
 47 |         self.tracks = []
 48 |         self._next_id = 1
 49 | 
 50 |     def predict(self):
 51 |         """Propagate track state distributions one time step forward.
 52 | 
 53 |         This function should be called once every time step, before `update`.
 54 |         """
 55 |         for track in self.tracks:
 56 |             track.predict(self.kf)
 57 | 
 58 |     def update(self, detections):
 59 |         """Perform measurement update and track management.
 60 | 
 61 |         Parameters
 62 |         ----------
 63 |         detections : List[deep_sort.detection.Detection]
 64 |             A list of detections at the current time step.
 65 | 
 66 |         """
 67 |         # Run matching cascade.
 68 |         matches, unmatched_tracks, unmatched_detections = \
 69 |             self._match(detections)
 70 | ##############################################
 71 |         #print('match = {}'.format(matches))
 72 | ###############################################
 73 |         # Update track set.
 74 |         for track_idx, detection_idx in matches:
 75 |             self.tracks[track_idx].update(
 76 |                 self.kf, detections[detection_idx])
 77 |         for track_idx in unmatched_tracks:
 78 |             self.tracks[track_idx].mark_missed()
 79 |         for detection_idx in unmatched_detections:
 80 |             self._initiate_track(detections[detection_idx])
 81 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
 82 | 
 83 |         # Update distance metric.
 84 |         active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
 85 |         features, targets = [], []
 86 |         for track in self.tracks:
 87 |             if not track.is_confirmed():
 88 |                 continue
 89 |             features += track.features
 90 |             targets += [track.track_id for _ in track.features]
 91 |             track.features = []
 92 |         self.metric.partial_fit(
 93 |             np.asarray(features), np.asarray(targets), active_targets)
 94 | 
 95 |     def _match(self, detections):
 96 | 
 97 |         def gated_metric(tracks, dets, track_indices, detection_indices):
 98 |             features = np.array([dets[i].feature for i in detection_indices])
 99 |             targets = np.array([tracks[i].track_id for i in track_indices])
100 |             cost_matrix = self.metric.distance(features, targets)
101 |             cost_matrix = linear_assignment.gate_cost_matrix(
102 |                 self.kf, cost_matrix, tracks, dets, track_indices,
103 |                 detection_indices)
104 | 
105 |             return cost_matrix
106 | 
107 |         # Split track set into confirmed and unconfirmed tracks.
108 |         confirmed_tracks = [
109 |             i for i, t in enumerate(self.tracks) if t.is_confirmed()]
110 |         unconfirmed_tracks = [
111 |             i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
112 | 
113 |         # Associate confirmed tracks using appearance features.
114 |         matches_a, unmatched_tracks_a, unmatched_detections = \
115 |             linear_assignment.matching_cascade(
116 |                 gated_metric, self.metric.matching_threshold, self.max_age,
117 |                 self.tracks, detections, confirmed_tracks)
118 | 
119 |         # Associate remaining tracks together with unconfirmed tracks using IOU.
120 |         iou_track_candidates = unconfirmed_tracks + [
121 |             k for k in unmatched_tracks_a if
122 |             self.tracks[k].time_since_update == 1]
123 |         unmatched_tracks_a = [
124 |             k for k in unmatched_tracks_a if
125 |             self.tracks[k].time_since_update != 1]
126 |         matches_b, unmatched_tracks_b, unmatched_detections = \
127 |             linear_assignment.min_cost_matching(
128 |                 iou_matching.iou_cost, self.max_iou_distance, self.tracks,
129 |                 detections, iou_track_candidates, unmatched_detections)
130 | 
131 |         matches = matches_a + matches_b
132 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
133 |         return matches, unmatched_tracks, unmatched_detections
134 | 
135 |     def _initiate_track(self, detection):
136 |         mean, covariance = self.kf.initiate(detection.to_xyah())
137 |         self.tracks.append(Track(
138 |             mean, covariance, self._next_id, self.n_init, self.max_age,
139 |             detection.feature))
140 |         self._next_id += 1
141 | 


--------------------------------------------------------------------------------
/support/layer/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 
3 | from support import _C
4 | 
5 | nms = _C.nms
6 | 


--------------------------------------------------------------------------------
/support/layer/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from support import _C
 9 | 
10 | 
11 | class _ROIAlign(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 |         ctx.save_for_backward(roi)
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.sampling_ratio = sampling_ratio
18 |         ctx.input_shape = input.size()
19 |         output = _C.roi_align_forward(
20 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
21 |         )
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         rois, = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.roi_align_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |         )
44 |         return grad_input, None, None, None, None
45 | 
46 | 
47 | roi_align = _ROIAlign.apply
48 | 
49 | 
50 | class ROIAlign(nn.Module):
51 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
52 |         super(ROIAlign, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 |         self.sampling_ratio = sampling_ratio
56 | 
57 |     def forward(self, input, rois):
58 |         return roi_align(
59 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
60 |         )
61 | 
62 |     def __repr__(self):
63 |         tmpstr = self.__class__.__name__ + "("
64 |         tmpstr += "output_size=" + str(self.output_size)
65 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
66 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
67 |         tmpstr += ")"
68 |         return tmpstr
69 | 


--------------------------------------------------------------------------------
/support/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import glob
 4 | import os
 5 | 
 6 | import torch
 7 | from setuptools import setup
 8 | from torch.utils.cpp_extension import CUDA_HOME
 9 | from torch.utils.cpp_extension import CppExtension
10 | from torch.utils.cpp_extension import CUDAExtension
11 | 
12 | requirements = ["torch", "torchvision"]
13 | 
14 | 
15 | def get_extensions():
16 |     this_dir = os.path.dirname(os.path.abspath(__file__))
17 |     extensions_dir = os.path.join(this_dir, "src")
18 | 
19 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
20 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
21 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
22 | 
23 |     sources = main_file + source_cpu
24 |     extension = CppExtension
25 | 
26 |     extra_compile_args = {"cxx": []}
27 |     define_macros = []
28 | 
29 |     if torch.cuda.is_available() and CUDA_HOME is not None:
30 |         extension = CUDAExtension
31 |         sources += source_cuda
32 |         define_macros += [("WITH_CUDA", None)]
33 |         extra_compile_args["nvcc"] = [
34 |             "-DCUDA_HAS_FP16=1",
35 |             "-D__CUDA_NO_HALF_OPERATORS__",
36 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
37 |             "-D__CUDA_NO_HALF2_OPERATORS__",
38 |         ]
39 | 
40 |     sources = [os.path.join(extensions_dir, s) for s in sources]
41 | 
42 |     include_dirs = [extensions_dir]
43 | 
44 |     ext_modules = [
45 |         extension(
46 |             "support._C",
47 |             sources,
48 |             include_dirs=include_dirs,
49 |             define_macros=define_macros,
50 |             extra_compile_args=extra_compile_args,
51 |         )
52 |     ]
53 | 
54 |     return ext_modules
55 | 
56 | 
57 | setup(
58 |     name="support",
59 |     version="0.1",
60 |     ext_modules=get_extensions(),
61 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
62 | )
63 | 


--------------------------------------------------------------------------------
/support/src/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/support/src/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/support/src/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/support/src/cuda/nms.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include <THC/THC.h>
  6 | #include <THC/THCDeviceUtils.cuh>
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 12 | 
 13 | __device__ inline float devIoU(float const * const a, float const * const b) {
 14 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 15 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 16 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 17 |   float interS = width * height;
 18 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 19 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 20 |   return interS / (Sa + Sb - interS);
 21 | }
 22 | 
 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 24 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 25 |   const int row_start = blockIdx.y;
 26 |   const int col_start = blockIdx.x;
 27 | 
 28 |   // if (row_start > col_start) return;
 29 | 
 30 |   const int row_size =
 31 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 32 |   const int col_size =
 33 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 34 | 
 35 |   __shared__ float block_boxes[threadsPerBlock * 5];
 36 |   if (threadIdx.x < col_size) {
 37 |     block_boxes[threadIdx.x * 5 + 0] =
 38 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 39 |     block_boxes[threadIdx.x * 5 + 1] =
 40 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 41 |     block_boxes[threadIdx.x * 5 + 2] =
 42 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 43 |     block_boxes[threadIdx.x * 5 + 3] =
 44 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 45 |     block_boxes[threadIdx.x * 5 + 4] =
 46 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 47 |   }
 48 |   __syncthreads();
 49 | 
 50 |   if (threadIdx.x < row_size) {
 51 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 52 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 53 |     int i = 0;
 54 |     unsigned long long t = 0;
 55 |     int start = 0;
 56 |     if (row_start == col_start) {
 57 |       start = threadIdx.x + 1;
 58 |     }
 59 |     for (i = start; i < col_size; i++) {
 60 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 61 |         t |= 1ULL << i;
 62 |       }
 63 |     }
 64 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 65 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 66 |   }
 67 | }
 68 | 
 69 | // boxes is a N x 5 tensor
 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 71 |   using scalar_t = float;
 72 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 73 |   auto scores = boxes.select(1, 4);
 74 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 75 |   auto boxes_sorted = boxes.index_select(0, order_t);
 76 | 
 77 |   int boxes_num = boxes.size(0);
 78 | 
 79 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 80 | 
 81 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 82 | 
 83 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 84 | 
 85 |   unsigned long long* mask_dev = NULL;
 86 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 87 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 88 | 
 89 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 90 | 
 91 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 92 |               THCCeilDiv(boxes_num, threadsPerBlock));
 93 |   dim3 threads(threadsPerBlock);
 94 |   nms_kernel<<<blocks, threads>>>(boxes_num,
 95 |                                   nms_overlap_thresh,
 96 |                                   boxes_dev,
 97 |                                   mask_dev);
 98 | 
 99 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
100 |   THCudaCheck(cudaMemcpy(&mask_host[0],
101 |                         mask_dev,
102 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
103 |                         cudaMemcpyDeviceToHost));
104 | 
105 |   std::vector<unsigned long long> remv(col_blocks);
106 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
107 | 
108 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
109 |   int64_t* keep_out = keep.data<int64_t>();
110 | 
111 |   int num_to_keep = 0;
112 |   for (int i = 0; i < boxes_num; i++) {
113 |     int nblock = i / threadsPerBlock;
114 |     int inblock = i % threadsPerBlock;
115 | 
116 |     if (!(remv[nblock] & (1ULL << inblock))) {
117 |       keep_out[num_to_keep++] = i;
118 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
119 |       for (int j = nblock; j < col_blocks; j++) {
120 |         remv[j] |= p[j];
121 |       }
122 |     }
123 |   }
124 | 
125 |   THCudaFree(state, mask_dev);
126 |   // TODO improve this part
127 |   return std::get<0>(order_t.index({
128 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
129 |                          order_t.device(), keep.scalar_type())
130 |                      }).sort(0, false));
131 | }
132 | 


--------------------------------------------------------------------------------
/support/src/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input,
 7 |                                  const at::Tensor& rois,
 8 |                                  const float spatial_scale,
 9 |                                  const int pooled_height,
10 |                                  const int pooled_width,
11 |                                  const int sampling_ratio);
12 | 
13 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad,
14 |                                   const at::Tensor& rois,
15 |                                   const float spatial_scale,
16 |                                   const int pooled_height,
17 |                                   const int pooled_width,
18 |                                   const int batch_size,
19 |                                   const int channels,
20 |                                   const int height,
21 |                                   const int width,
22 |                                   const int sampling_ratio);
23 | 
24 | 
25 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
26 | 
27 | 
28 | at::Tensor compute_flow_cuda(const at::Tensor& boxes,
29 |                              const int height,
30 |                              const int width);
31 | 


--------------------------------------------------------------------------------
/support/src/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/support/src/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | 
 5 | 
 6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 7 |   m.def("nms", &nms, "non-maximum suppression");
 8 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
 9 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
10 | }
11 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import os
  4 | from torch.utils.data import DataLoader, Dataset
  5 | import numpy as np
  6 | from scipy import interp
  7 | import matplotlib.pyplot as plt
  8 | import torch
  9 | from config import params
 10 | import torch.backends.cudnn as cudnn
 11 | from lib import slowfastnet
 12 | from Config import Config
 13 | 
 14 | class Test_video(Dataset):
 15 |     def __init__(self,short_side):
 16 |         self.short_side=short_side
 17 |     def normalize(self, buffer):
 18 |         # Normalize the buffer
 19 |         # buffer = (buffer - 128)/128.0
 20 |         for i, frame in enumerate(buffer):
 21 |             frame = (frame - np.array([[[128.0, 128.0, 128.0]]]))/128.0
 22 |             buffer[i] = frame
 23 |         return buffer
 24 | 
 25 |     def to_tensor(self, buffer):
 26 |         # convert from [D, H, W, C] format to [C, D, H, W] (what PyTorch uses)
 27 |         # D = Depth (in this case, time), H = Height, W = Width, C = Channels
 28 |         return buffer.transpose((3, 0, 1, 2))
 29 | 
 30 |     def crop(self, buffer, crop_size):
 31 |         # randomly select time index for temporal jittering
 32 |         # time_index = np.random.randint(buffer.shape[0] - clip_len)
 33 |         # Randomly select start indices in order to crop the video
 34 |         height_index = np.random.randint(buffer.shape[1] - crop_size)
 35 |         width_index = np.random.randint(buffer.shape[2] - crop_size)
 36 | 
 37 |         # crop and jitter the video using indexing. The spatial crop is performed on
 38 |         # the entire array, so each frame is cropped in the same location. The temporal
 39 |         # jitter takes place via the selection of consecutive frames
 40 |         buffer = buffer[:,
 41 |                  height_index:height_index + crop_size,
 42 |                  width_index:width_index + crop_size, :]
 43 | 
 44 |         return buffer
 45 | 
 46 |     def generate_video_clip(self,split_span,keep_num,fname="/home/aiuser/Desktop/_7oWZq_s_Sk.mkv"):
 47 |         capture = cv2.VideoCapture(fname)
 48 |         #获取视频的基本信息
 49 |         frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
 50 |         frame_width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
 51 |         frame_height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
 52 |         fps = int(capture.get(cv2.CAP_PROP_FPS))
 53 |         #计算要切多少段，每段切多少帧
 54 |         print(frame_count,frame_width)
 55 |         split_len=fps*split_span
 56 |         split_time=frame_count/split_len
 57 |         if frame_height < frame_width:
 58 |             resize_height = np.random.randint(self.short_side[0], self.short_side[1] + 1)
 59 |             resize_width = int(float(resize_height) / frame_height * frame_width)
 60 |         else:
 61 |             resize_width = np.random.randint(self.short_side[0], self.short_side[1] + 1)
 62 |             resize_height = int(float(resize_width) / frame_width * frame_height)
 63 |         start_idx = 0
 64 |         end_idx = start_idx + split_len
 65 |         skip_span = split_len // keep_num if end_idx // keep_num > 0 else 1
 66 |         rem = split_len - skip_span * keep_num if split_len - skip_span * keep_num >= 0 else 0
 67 |         while split_time>0: #切多少段
 68 |            split_time=split_time-1
 69 |            start_idx = start_idx + rem // 2
 70 |            buffer = []
 71 |            sample_count=0
 72 |            #处理每一段视频
 73 |            while (start_idx<end_idx):
 74 |                start_idx=start_idx+1
 75 |                retaining, frame = capture.read()
 76 |                if(sample_count>=keep_num):
 77 |                    continue
 78 |                if start_idx % skip_span != 0 and start_idx!=0:
 79 |                    continue
 80 |                if retaining is False:
 81 |                    break
 82 |                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 83 |                if (frame_height != resize_height) or (frame_width != resize_width):
 84 |                    frame = cv2.resize(frame, (resize_width, resize_height))
 85 |                buffer.append(frame)
 86 |                # if len(pa.isna(frame).nonzero()[1]) != 0 or np.max(frame) > 255:
 87 |                #     print("discard:", buffer)
 88 |                sample_count=sample_count+1
 89 |            print(np.shape(buffer))
 90 |            if len(buffer)<keep_num:
 91 |                for i in range(keep_num-len(buffer)):
 92 |                    buffer.append(buffer[-1])
 93 |                    print("warning appen -1")
 94 |            start_idx=end_idx
 95 |            end_idx=end_idx+split_len
 96 |            #一段处理完返回
 97 |            for v in buffer:
 98 |                cv2.imshow("video",v)
 99 |                cv2.waitKey(0)
100 |            list_buffer = buffer
101 | 
102 | 
103 |            buffer=np.array(buffer)
104 | 
105 |            buffer = self.crop(buffer, 196)
106 |            buffer = self.normalize(buffer)
107 |            buffer = self.to_tensor(buffer)
108 |            buffer=torch.tensor(buffer, dtype=torch.float).unsqueeze(0)
109 |            yield buffer,list_buffer
110 |         capture.release()
111 | 
112 | def validation(model, val_dataloader):
113 |     model.eval()
114 |     all_prob=[]
115 |     all_pre=[]
116 |     data=val_dataloader.generate_video_clip(20,64)
117 |     with torch.no_grad():
118 |         for step,(inputs,frame_list) in enumerate(data):
119 |             inputs = inputs.cuda()
120 |             outputs = model(inputs)
121 |             max = np.max(np.array(outputs.cpu()), axis=1)
122 |             all_prob.extend(max)
123 |             # for frame in frame_list:
124 |             #     cv2.imshow("frame",frame)
125 |             #     cv2.waitKey(0)
126 |             print("show over,pro=",torch.nn.functional.softmax(outputs))
127 |             for item in np.array(outputs.cpu()):
128 |                 all_pre.extend(np.where(item == max)[0])
129 |                 print(np.where(item == max)[0])
130 |     print(all_pre)
131 | 
132 | def main():
133 |     cudnn.benchmark = False
134 |     test_video=Test_video(short_side=[224,256])
135 |     model = slowfastnet.resnet50(class_num=Config.CLASS_NUM)
136 |     assert Config.LOAD_MODEL_PATH is not None
137 |     print("load model from:", Config.LOAD_MODEL_PATH)
138 |     pretrained_dict = torch.load(Config.LOAD_MODEL_PATH, map_location='cpu')
139 |     try:
140 |         model_dict = model.module.state_dict()
141 |     except AttributeError:
142 |         model_dict = model.state_dict()
143 |     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
144 |     model_dict.update(pretrained_dict)
145 |     model.load_state_dict(model_dict)
146 |     model = model.cuda(params['gpu'][0])
147 |     validation(model, test_video)
148 | 
149 | 
150 | if __name__ == '__main__':
151 |     main()
152 | 
153 | 


--------------------------------------------------------------------------------
/test/nms/nms-large-input.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/test/nms/nms-large-input.npy


--------------------------------------------------------------------------------
/test/nms/nms-large-output.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaib-saxena/Slow-Fast-pytorch-implementation/cd08ea735d501f0eedc2f2d4ed5e5c8b01a9f4a8/test/nms/nms-large-output.npy


--------------------------------------------------------------------------------
/test/nms/test_nms.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import unittest
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | 
 8 | from support.layer.nms import nms
 9 | 
10 | class TestNMS(unittest.TestCase):
11 |     def _run_nms(self, bboxes, scores):
12 |         start = time.time()
13 |         threshold = 0.7
14 |         kept_indices = nms(bboxes, scores, threshold)
15 |         print('%s in %.3fs, %d -> %d' % (self.id(), time.time() - start, len(bboxes), len(kept_indices)))
16 |         return kept_indices
17 | 
18 |     def test_nms_empty(self):
19 |         bboxes = torch.tensor([], dtype=torch.float).cuda()
20 |         scores = torch.tensor([], dtype=torch.float).cuda()
21 |         kept_indices = self._run_nms(bboxes, scores)
22 |         self.assertEqual(len(kept_indices), 0)
23 | 
24 |     def test_nms_single(self):
25 |         bboxes = torch.tensor([[5, 5, 10, 10]], dtype=torch.float).cuda()
26 |         scores  = torch.tensor([0.8], dtype=torch.float).cuda()
27 |         kept_indices = self._run_nms(bboxes, scores)
28 |         self.assertEqual(len(kept_indices), 1)
29 |         self.assertListEqual(kept_indices.tolist(), [0])
30 | 
31 |     def test_nms_small(self):
32 |         bboxes = torch.tensor([[5, 5, 10, 10], [5, 5, 10, 10], [5, 5, 30, 30]], dtype=torch.float).cuda()
33 |         scores = torch.tensor([0.6, 0.9, 0.4], dtype=torch.float).cuda()
34 |         kept_indices = self._run_nms(bboxes, scores)
35 |         self.assertEqual(len(kept_indices), 2)
36 |         self.assertListEqual(kept_indices.tolist(), [1, 2])
37 | 
38 |     def test_nms_large(self):
39 |         # detections format: [[left, top, right, bottom, score], ...], which (right, bottom) is included in area
40 |         cur_dir = os.path.dirname(os.path.abspath(__file__))
41 |         detections = np.load(os.path.join(cur_dir, 'nms-large-input.npy'))
42 |         detections = torch.tensor(detections, dtype=torch.float).cuda()
43 |         bboxes = detections[:, 0:4]
44 |         scores = detections[:, 4]
45 | 
46 |         kept_indices = self._run_nms(bboxes, scores)
47 |         self.assertEqual(len(kept_indices), 1934)
48 | 
49 |         expect = np.load(os.path.join(cur_dir, 'nms-large-output.npy'))
50 |         self.assertListEqual(sorted(kept_indices.tolist()),
51 |                              sorted(expect.tolist()))
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     assert torch.cuda.is_available(), 'NMS module requires CUDA support'
56 |     torch.tensor([]).cuda()  # dummy for initializing GPU
57 |     unittest.main()
58 | 


--------------------------------------------------------------------------------
/test_con.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.tensor as tensor
 3 | import torch
 4 | import f
 5 | # import numpy as np
 6 | # loss=nn.CrossEntropyLoss()
 7 | # a=tensor(([2,3],[4,5]),dtype=torch.float)
 8 | # w=tensor(torch.ones(2,1),dtype=torch.float,requires_grad=True)
 9 | # out=torch.mm(a,w)
10 | # print(out)
11 | # a=5
12 | # print(tensor(5,dtype=torch.float))
13 | # out=torch.mul(out.float(),tensor(5).float())
14 | # # print(out)
15 | # # print(tensor([1]).float)
16 | # sm=nn.Softmax(dim=0)
17 | # print(out.view(-1))
18 | # smo=sm(out.view(-1))
19 | # print(smo)
20 | # smo=torch.log(smo)
21 | # loss=nn.NLLLoss()
22 | # target=tensor([1])
23 | # loss=loss(smo.unsqueeze(0),target)
24 | # print(loss)
25 | # loss.backward()
26 | # print(w.grad.data)
27 | 
28 | def test_grad():
29 |     input=tensor(([1,2,3],[4,5,6],[7,8,9]),dtype=torch.float)
30 |     #weight=tensor(([0.1,0.2,0.3,0.4],[0.1,0.2,0.3,0.4],[0.1,0.2,0.3,0.4]),requires_grad=True)
31 |     weight=tensor(torch.rand(3, 4),requires_grad=True)
32 |     #input=input.unsqueeze(0)
33 |     print(input,weight)
34 |     pre=torch.mm(input,weight)
35 |     #loss1=f.multilabel_soft_margin_loss()
36 |     loss2=nn.MultiLabelMarginLoss()
37 |     lable1=tensor(([0, 1, 1,0],),dtype=torch.float)
38 |     lable2 = tensor(([0, 1, 1,0], [1, 0, 0,0], [1, 0,1 ,1]), dtype=torch.long)
39 |     print(pre,lable1)
40 |     loss1=f.multilabel_soft_margin_loss(pre,lable1,reduction='sum')
41 |     loss1.backward()
42 |     print('weight.grad.data1:',weight.grad.data)
43 | 
44 |     # loss2 = loss2(pre, lable2)
45 |     # loss2.backward()
46 |     # print('weight.grad.data2:', weight.grad.data)
47 | if __name__ == '__main__':
48 |     test_grad()


--------------------------------------------------------------------------------
/test_daptice.py:
--------------------------------------------------------------------------------
 1 | import torch as t
 2 | import math
 3 | import numpy as np
 4 | 
 5 | alist = t.randn(2, 3, 9)
 6 | 
 7 | inputsz = np.array(alist.shape[2:])
 8 | outputsz = np.array([9])
 9 | 
10 | stridesz = np.floor(inputsz / outputsz).astype(np.int32)
11 | print("stridesz",stridesz)
12 | kernelsz = inputsz - (outputsz - 1) * stridesz
13 | print("kernelsz",kernelsz)
14 | 
15 | adp = t.nn.AdaptiveMaxPool1d([10])
16 | avg = t.nn.MaxPool1d(kernel_size=list(kernelsz), stride=list(stridesz))
17 | adplist = adp(alist)
18 | avglist = avg(alist)
19 | 
20 | print(alist)
21 | print(adplist)
22 | print(avglist)
23 | 


--------------------------------------------------------------------------------
/test_nms.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import unittest
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | 
 8 | from support.layer.nms import nms
 9 | 
10 | class TestNMS(unittest.TestCase):
11 |     def _run_nms(self, bboxes, scores):
12 |         start = time.time()
13 |         threshold = 0.7
14 |         kept_indices = nms(bboxes, scores, threshold)
15 |         print('%s in %.3fs, %d -> %d' % (self.id(), time.time() - start, len(bboxes), len(kept_indices)))
16 |         return kept_indices
17 | 
18 |     def test_nms_empty(self):
19 |         bboxes = torch.tensor([], dtype=torch.float).cuda()
20 |         scores = torch.tensor([], dtype=torch.float).cuda()
21 |         kept_indices = self._run_nms(bboxes, scores)
22 |         self.assertEqual(len(kept_indices), 0)
23 | 
24 |     def test_nms_single(self):
25 |         bboxes = torch.tensor([[5, 5, 10, 10]], dtype=torch.float).cuda()
26 |         scores  = torch.tensor([0.8], dtype=torch.float).cuda()
27 |         kept_indices = self._run_nms(bboxes, scores)
28 |         self.assertEqual(len(kept_indices), 1)
29 |         self.assertListEqual(kept_indices.tolist(), [0])
30 | 
31 |     def test_nms_small(self):
32 |         bboxes = torch.tensor([[5, 5, 10, 10], [5, 5, 10, 10], [5, 5, 30, 30]], dtype=torch.float).cuda()
33 |         scores = torch.tensor([0.6, 0.9, 0.4], dtype=torch.float).cuda()
34 |         kept_indices = self._run_nms(bboxes, scores)
35 |         self.assertEqual(len(kept_indices), 2)
36 |         self.assertListEqual(kept_indices.tolist(), [1, 2])
37 | 
38 |     def test_nms_large(self):
39 |         # detections format: [[left, top, right, bottom, score], ...], which (right, bottom) is included in area
40 |         cur_dir = os.path.dirname(os.path.abspath(__file__))
41 |         detections = np.load(os.path.join(cur_dir, 'nms-large-input.npy'))
42 |         detections = torch.tensor(detections, dtype=torch.float).cuda()
43 |         bboxes = detections[:, 0:4]
44 |         scores = detections[:, 4]
45 | 
46 |         kept_indices = self._run_nms(bboxes, scores)
47 |         self.assertEqual(len(kept_indices), 1934)
48 | 
49 |         expect = np.load(os.path.join(cur_dir, 'nms-large-output.npy'))
50 |         self.assertListEqual(sorted(kept_indices.tolist()),
51 |                              sorted(expect.tolist()))
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     assert torch.cuda.is_available(), 'NMS module requires CUDA support'
56 |     torch.tensor([]).cuda()  # dummy for initializing GPU
57 |     unittest.main()
58 | 


--------------------------------------------------------------------------------
/tiny-yolo-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | batch=64
  3 | subdivisions=8
  4 | width=416
  5 | height=416
  6 | channels=3
  7 | momentum=0.9
  8 | decay=0.0005
  9 | angle=0
 10 | saturation = 1.5
 11 | exposure = 1.5
 12 | hue=.1
 13 | 
 14 | learning_rate=0.001
 15 | max_batches = 40200
 16 | policy=steps
 17 | steps=-1,100,20000,30000
 18 | scales=.1,10,.1,.1
 19 | 
 20 | [convolutional]
 21 | batch_normalize=1
 22 | filters=16
 23 | size=3
 24 | stride=1
 25 | pad=1
 26 | activation=leaky
 27 | 
 28 | [maxpool]
 29 | size=2
 30 | stride=2
 31 | 
 32 | [convolutional]
 33 | batch_normalize=1
 34 | filters=32
 35 | size=3
 36 | stride=1
 37 | pad=1
 38 | activation=leaky
 39 | 
 40 | [maxpool]
 41 | size=2
 42 | stride=2
 43 | 
 44 | [convolutional]
 45 | batch_normalize=1
 46 | filters=64
 47 | size=3
 48 | stride=1
 49 | pad=1
 50 | activation=leaky
 51 | 
 52 | [maxpool]
 53 | size=2
 54 | stride=2
 55 | 
 56 | [convolutional]
 57 | batch_normalize=1
 58 | filters=128
 59 | size=3
 60 | stride=1
 61 | pad=1
 62 | activation=leaky
 63 | 
 64 | [maxpool]
 65 | size=2
 66 | stride=2
 67 | 
 68 | [convolutional]
 69 | batch_normalize=1
 70 | filters=256
 71 | size=3
 72 | stride=1
 73 | pad=1
 74 | activation=leaky
 75 | 
 76 | [maxpool]
 77 | size=2
 78 | stride=2
 79 | 
 80 | [convolutional]
 81 | batch_normalize=1
 82 | filters=512
 83 | size=3
 84 | stride=1
 85 | pad=1
 86 | activation=leaky
 87 | 
 88 | [maxpool]
 89 | size=2
 90 | stride=1
 91 | 
 92 | [convolutional]
 93 | batch_normalize=1
 94 | filters=1024
 95 | size=3
 96 | stride=1
 97 | pad=1
 98 | activation=leaky
 99 | 
100 | ###########
101 | 
102 | [convolutional]
103 | batch_normalize=1
104 | size=3
105 | stride=1
106 | pad=1
107 | filters=1024
108 | activation=leaky
109 | 
110 | [convolutional]
111 | size=1
112 | stride=1
113 | pad=1
114 | filters=125
115 | activation=linear
116 | 
117 | [region]
118 | anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52
119 | bias_match=1
120 | classes=20
121 | coords=4
122 | num=5
123 | softmax=1
124 | jitter=.2
125 | rescore=1
126 | 
127 | object_scale=5
128 | noobject_scale=1
129 | class_scale=1
130 | coord_scale=1
131 | 
132 | absolute=1
133 | thresh = .6
134 | random=1
135 | 


--------------------------------------------------------------------------------
/yolo-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=64
  4 | subdivisions=8
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | height=416
  9 | width=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 80200
 21 | policy=steps
 22 | steps=-1,500,40000,60000
 23 | scales=0.1,10,.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=125
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
243 | bias_match=1
244 | classes=20
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/yolo.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=425
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
243 | bias_match=1
244 | classes=80
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------