├── LICENSE
├── Mantra_Net.py
├── NC2016_Test0613
    ├── 1-ReadData.py
    ├── 2-resizeData.py
    ├── 3-SplitDataset.py
    └── README.md
├── README.md
├── TestModel.py
├── TestModelAUC.py
├── Train.py
├── images
    ├── Bayar.png
    ├── Overview.png
    ├── ROC_100Epoch.png
    ├── SRMFilters.png
    └── result-100EPOCH.png
└── imports
    ├── CombindConv2D.py
    ├── ParametersManager.py
    ├── ZPool2D.py
    └── convlstm.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Mantra_Net.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from matplotlib.pyplot import imshow
  3 | import torch
  4 | from torch.utils.data import Dataset, DataLoader, TensorDataset
  5 | from torch import conv2d, dropout, nn, sigmoid, tensor
  6 | import numpy as np
  7 | from torch import nn
  8 | from matplotlib import pyplot as plt
  9 | import torchvision.transforms as transforms
 10 | # Layer files
 11 | from imports.ParametersManager import * # import training-help tools
 12 | from imports.CombindConv2D import *         # import defination of special layers
 13 | from imports.ZPool2D import *           # inport Z-Pooling layers
 14 | from imports.convlstm import * # Copied from https://github.com/ndrplz/ConvLSTM_pytorch
 15 | 
 16 | # Hyperparameter
 17 | ZPoolingWindows = [7, 15, 31]
 18 | 
 19 | # L2Norm Layer
 20 | class L2Norm(nn.Module):
 21 |     def __init__(self) -> None:
 22 |         super().__init__()
 23 |     def forward(self, x):
 24 |         a = torch.norm(x,2, keepdim=True) # 对于整个通道层求L2 Norm，并利用其进行标准化
 25 |         x = x / a
 26 |         return x
 27 |     
 28 | class ManTraNet(nn.Module):
 29 |     def __init__(self) -> None:
 30 |         super(ManTraNet, self).__init__()
 31 |         self.combind = CombindConv2D(32) # 此处填入数值n - 9（SRM） - 3（Bayer） 后是实际存在的卷积层个数
 32 |         self.vgg = nn.Sequential( # 全连接Conv2D，没有pooling 
 33 |             nn.Conv2d(32, 64, 3, 1, 1),
 34 |             nn.ReLU(inplace=True),
 35 | 
 36 |             nn.Conv2d(64, 64, 3, 1, 1),
 37 |             nn.ReLU(inplace=True),
 38 |             nn.Conv2d(64, 64, 3, 1, 1),
 39 |             nn.ReLU(inplace=True),
 40 |             
 41 |             nn.Conv2d(64, 128, 3, 1, 1),
 42 |             nn.ReLU(inplace=True),
 43 |             nn.Conv2d(128, 128, 3, 1, 1),
 44 |             nn.ReLU(inplace=True),
 45 |             nn.Conv2d(128, 128, 3, 1, 1),
 46 |             nn.ReLU(inplace=True),
 47 |             
 48 |             nn.Conv2d(128, 256, 3, 1, 1),
 49 |             nn.ReLU(inplace=True),
 50 |             nn.Conv2d(256, 256, 3, 1, 1),
 51 |             nn.ReLU(inplace=True),
 52 |             nn.Conv2d(256, 256, 3, 1, 1),
 53 |             nn.ReLU(inplace=True),
 54 |             
 55 |             nn.Conv2d(256, 256, 3, 1, 1),
 56 |             nn.ReLU(inplace=True),
 57 |             nn.Conv2d(256, 256, 3, 1, 1),
 58 |             nn.ReLU(inplace=True),
 59 |             nn.Conv2d(256, 256, 3, 1, 1),
 60 |             L2Norm()
 61 |         )
 62 |         # # To ensure that the model can converge, you need to use xavier initialization
 63 |         for m in self.vgg.modules():
 64 |             if isinstance(m, nn.Linear):
 65 |                 pass
 66 |             #  you can also init your conv2d layer here
 67 |             elif isinstance(m, nn.Conv2d):
 68 |                 nn.init.xavier_normal_(m.weight) 
 69 |                 
 70 |         self.Adaption = nn.Conv2d(256, 64, 1, 1, 0)
 71 |         self.BN = nn.BatchNorm2d(64, momentum=0.99, eps=0.001)
 72 |         self.ZPool = Zpool2D_Window(64, ZPoolingWindows)
 73 |         self.ConvLstm2D = ConvLSTM(input_dim = 64, hidden_dim=8, kernel_size=(7,7), num_layers=1, batch_first=True)
 74 |         self.decision = nn.Conv2d(8, 1, 7, 1, 3)
 75 |         self.sig = nn.Sigmoid()
 76 |     def forward(self, x):
 77 |         # Image H x W x 3
 78 |         x = self.combind(x)
 79 |         x = self.vgg(x)
 80 |         # Feat H x W x 256
 81 |         x = self.Adaption(x)
 82 |         x = self.BN(x)
 83 |         x = self.ZPool(x)
 84 |         _, last_states = self.ConvLstm2D(x)
 85 |         x = last_states[0][0] 
 86 |         x = self.decision(x)
 87 |         x = self.sig(x)
 88 |         return x
 89 | 
 90 | if __name__ == "__main__":
 91 |     a = torch.tensor(np.arange(0, 6 * 64 *64, 1).reshape(2,3,64,64), dtype=torch.float32)
 92 |     net = ManTraNet()
 93 |     print(net(a).shape)
 94 |     # a = torch.tensor(np.arange(0,60,1).reshape((5,3,2,2)), dtype=torch.float32 )
 95 |     # layer = L2Norm()
 96 |     # print(a.shape)
 97 |     # a = layer(a)
 98 |     # print(a)
 99 |     
100 | 


--------------------------------------------------------------------------------
/NC2016_Test0613/1-ReadData.py:
--------------------------------------------------------------------------------
 1 | import pandas,random
 2 | from matplotlib import pyplot as plt
 3 | from PIL import Image
 4 | import numpy as np
 5 | paths = {'mani' : './reference/manipulation/NC2016-manipulation-ref.csv',
 6 |   'removal' : './reference/removal/NC2016-removal-ref.csv',
 7 |   'splice' : './reference/splice/NC2016-splice-ref.csv'
 8 |   }
 9 | table = pandas.read_csv(paths['splice'])
10 | print(table)
11 | manupilated = table[table['IsTarget']== 'Y']
12 | manupilated = manupilated[['ProbeFileName','ProbeMaskFileName']]
13 | print(manupilated)
14 | print(manupilated[563:564])
15 | print(len(manupilated))
16 | 
17 | idx = random.randint(0,len(manupilated) - 1 - 10)
18 | for i in range(5):
19 |   data = manupilated.iloc[idx + i]
20 |   print(data)
21 |   image_dir = data['ProbeFileName']
22 |   mask_dir = data['ProbeMaskFileName']
23 |   image = Image.open(image_dir)
24 |   image = np.array(image)
25 |   plt.subplot(2,5,i + 1)
26 |   plt.imshow(image)
27 |   
28 |   mask = Image.open(mask_dir)
29 |   mask = np.array(mask)
30 |   plt.subplot(2,5 ,i + 5 + 1)
31 |   plt.imshow(mask,cmap='gray')
32 | 
33 | plt.show()


--------------------------------------------------------------------------------
/NC2016_Test0613/2-resizeData.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import pandas
 3 | from PIL import Image
 4 | import numpy as np
 5 | import csv
 6 | paths = {'mani' : './reference/manipulation/NC2016-manipulation-ref.csv',
 7 |   'removal' : './reference/remove/NC2016-removal-ref.csv',
 8 |   'splice' : './reference/splice/NC2016-splice-ref.csv'
 9 |   }
10 | globalIdx = 0
11 | SavePath = '../NIST2016_500/'
12 | TargetSize = 512
13 | 
14 | TableOfContent = []
15 | selected = []
16 | # mani removal splice
17 | def decodeByTask(taskID : string, idx):
18 |   print('--Start task {}---'.format(taskID))
19 |   table = pandas.read_csv(paths[taskID])
20 |   
21 |   # =====pictures with masks=====
22 |   manupilated = table[table['IsTarget']== 'Y']
23 |   manupilated = manupilated[['ProbeFileName','ProbeMaskFileName']]
24 |   length = len(manupilated)
25 |   
26 |   for i in range(length):
27 |     if i % 50 == 0:
28 |       print('{:.4f}% Has done'.format(i/len(manupilated) * 100))
29 |       
30 |     image_dir = manupilated.iloc[i]['ProbeFileName']
31 |     if image_dir in selected:
32 |       continue
33 |     else:
34 |       mask_dir = manupilated.iloc[i]['ProbeMaskFileName']
35 |       selected.append(image_dir)
36 |       
37 |       image = Image.open(image_dir)
38 |       (x, y) = image.size
39 |       y = int(y/x*512)
40 |       image = image.resize((TargetSize, y), Image.ANTIALIAS)
41 |       image_name = '{}.jpg'.format(idx)
42 |       image.save('{}{}.jpg'.format(SavePath, idx))
43 |       
44 |       mask = Image.open(mask_dir)
45 |       (x, y) = image.size
46 |       y = int(y/x*512)
47 |       mask = mask.resize((TargetSize,y),Image.ANTIALIAS)
48 |       mask_name = '{}_mask.jpg'.format(idx)
49 |       mask.save('{}{}_mask.jpg'.format(SavePath, idx))
50 |       
51 |       
52 |       TableOfContent.append([image_name, mask_name, taskID])
53 |       idx +=1
54 |   
55 |   # ======pictures without masks======
56 |   manupilated = table[table['IsTarget']== 'N']
57 |   manupilated = manupilated[['ProbeFileName']]
58 |   length = len(manupilated)
59 | 
60 |   for i in range(length):
61 |     if i % 50 == 0:
62 |       print('{:.4f}% Has done'.format(i/len(manupilated) * 100))
63 |       
64 |     image_dir = manupilated.iloc[i]['ProbeFileName']
65 |     if image_dir in selected:
66 |       continue
67 |     else:  
68 |       image = Image.open(image_dir)
69 |       (x, y) = image.size
70 |       y = int(y/x*512)
71 |       image = image.resize((TargetSize, y),Image.ANTIALIAS)
72 |       image_name = '{}.jpg'.format(idx)
73 |       image.save('{}{}.jpg'.format(SavePath, idx))
74 |       
75 |       TableOfContent.append([image_name, 'N', taskID])
76 |       selected.append(image_dir)
77 |       idx +=1
78 |   return idx
79 | 
80 | def decodeNist():
81 |   globalIdx = 0
82 |   globalIdx = decodeByTask("mani", globalIdx)
83 |   globalIdx = decodeByTask("removal", globalIdx)
84 |   globalIdx = decodeByTask("splice", globalIdx)
85 |   with open("{}index.csv".format(SavePath), 'w', newline="") as f:
86 |     writer = csv.writer(f)
87 |     writer.writerows(TableOfContent)
88 |     
89 | if __name__ == '__main__':
90 |   decodeNist()


--------------------------------------------------------------------------------
/NC2016_Test0613/3-SplitDataset.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | from random import shuffle
 3 | DIR = '../NIST2016/'
 4 | IndexDir = DIR + 'index.csv'
 5 | TrainDir = DIR + 'Train.csv'
 6 | TestDir = DIR + 'Test.csv'
 7 | SplitRate = 0.8   # rate between trainning set and the whole dataset
 8 | 
 9 | # Read data
10 | data = []
11 | with open(IndexDir, 'r') as f:
12 |     reader = csv.reader(f)
13 |     for i in reader:
14 |         data.append(i)
15 | 
16 | def countN(array):
17 |     cnt = 0
18 |     for i in array:
19 |         if i[1] == 'N':
20 |             cnt += 1
21 |     return cnt
22 | 
23 | cnt = countN(data)
24 | length = len(data)
25 | print('---Raw Dataset---')
26 | print("There are {} pictures in total.".format(length))
27 | print("There are {:.2f}% of No-manipulated pictures while {:.2f}% of manipulated.".format(cnt/ length * 100, 100 - cnt/length * 100))
28 | 
29 | print('---Splited dataset---')
30 | shuffle(data)
31 | splitPoint = int(length * SplitRate) + 1
32 | print("len(Train.csv): {}".format(splitPoint))
33 | print("len(test.csv): {}".format(length - splitPoint))
34 | 
35 | Train = data[:splitPoint]
36 | Test = data[splitPoint:]
37 | print('---Trainning set proportion---')
38 | cnt = countN(Train)
39 | length = len(Train)
40 | print("There are {:.2f}% of No-manipulated pictures while {:.2f}% of manipulated.".format(cnt/ length * 100, 100 - cnt/length * 100))
41 | print('---Testing set proportion---')
42 | cnt = countN(Test)
43 | length = len(Test)
44 | print("There are {:.2f}% of No-manipulated pictures while {:.2f}% of manipulated.".format(cnt/ length * 100, 100 - cnt/length * 100))
45 | 
46 | 
47 | def saveCSV(filePath, array):
48 |     with open(filePath, 'w', newline="") as f:
49 |         writer = csv.writer(f)
50 |         writer.writerows(array)
51 |         
52 | saveCSV(TrainDir, Train)
53 | saveCSV(TestDir, Test)
54 | 


--------------------------------------------------------------------------------
/NC2016_Test0613/README.md:
--------------------------------------------------------------------------------
1 | # README for NIST16 DATASET
2 | This Dir is used to place the dataset from NIST16, To reduce the size of git repo, I added all the *jpg files to the ".gitigore".
3 | 
4 | If you want to perform Mantra-Net with NIST16 data, you can download it on the official website of [NIST](https://www.nist.gov/itl/iad/mig/nimble-challenge-2017-evaluation), and view the data file or resize the file to (256, 256) with Python scriprt given in this dir:
5 | - [1-ReadData.py](1-ReadData.py) Shows and illustrate part of data in NIST16 dataset
6 | - [2-resizeData.py](2-resizeData.py) Will resize the raw pictures to a smaller scale in order to accelerate the training. However, in our test, resize can eliminate a large number of features used to identify modifications, which means reduce the performance of the net. You could set parameters and scale you like to reproduce your own sub-dataset. This script will also create an `index.csv` file to make an index for the output images in directory `./NIST2016`.
7 | - [3-SplitDataset.py](3-SplitDataset.py) This Python file will create two split index files **randomly** for Training dataset and Testing dataset, respectively. It is generated from `./NIST2016/index.csv` file, so you must run `2-resizeData.py` in advance to create `Train.csv` and `Test.csv` correctly.
8 |   > The `Train.py` in root directory needs to read `Train.csv` and `Test.csv` to run the trainning process. 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ManTra-Net reproducing with pytorch
 2 | ![Powered by](https://img.shields.io/badge/Based_on-Pytorch-blue?logo=pytorch) ![GitHub repo size](https://img.shields.io/github/repo-size/SunnyHaze/ManTra-Net_Pytorch?logo=hack%20the%20box) ![GitHub](https://img.shields.io/github/license/Sunnyhaze/ManTra-Net_Pytorch?logo=license)  [![Ask Me Anything !](https://img.shields.io/badge/Official%20-No-1abc9c.svg)](https://GitHub.com/Sunnyhaze) ![visitors](https://visitor-badge.glitch.me/badge?page_id=Sunnyhaze.ManTraNet-pytorch)
 3 | 
 4 | 
 5 | Un-official inplementation of ManTra-Net which was proposed in CVPR 2019 by Yue Wu et al, **includes code for training**!. 
 6 | 
 7 | What need to point out is that, the model in this Repo is mainly written by **Pytorch**, while the original paper provide a [Keras-Tensorflow sample on Github](https://github.com/ISICV/ManTraNet).
 8 | 
 9 | ## Introduction
10 | - Paper Titile : [ManTra-Net : Manipulation Tracing Network for Detection and Localization of Image Forgeries with Anomalous Features.](https://openaccess.thecvf.com/content_CVPR_2019/html/Wu_ManTra-Net_Manipulation_Tracing_Network_for_Detection_and_Localization_of_Image_CVPR_2019_paper.html)
11 | 
12 | - Part of Abstract: The ManTra-Net is an end-to-end network that performs both detection and localization without extra preprocessing and postprocessing. This model has the ability to fight against real-life image forgery, which commonly involves different types and combined manipulations.
13 | 
14 | - Overview of the architectue:
15 |   
16 |   ![](images/Overview.png)
17 | 
18 | ## Files in the repo
19 | - [Mantra_Net.py](Mantra_Net.py) is the class of ManTra-Net which inherit from torch.nn.Module. This script imports several layers from directory `./imports`, including:
20 |   - [CombindConv2D.py](imports/CombindConv2D.py): The combination of 3 kinds of different layers:
21 |     - BayarConv2D, first proposed by [Bayar in 2018](https://ieeexplore.ieee.org/ielaam/10206/8361165/8335799-aam.pdf). Its filters can be illustrate as the figure below. Has a stronger ability to detecte manipulations.
22 | 
23 |       ![](images/Bayar.png)
24 | 
25 |     - SRMConv2D, first proposed in [this paper](http://www.isihome.ir/freearticle/ISIHome.ir-26078.pdf) and implemented in [this paper.](https://openaccess.thecvf.com/content_cvpr_2018/papers/Zhou_Learning_Rich_Features_CVPR_2018_paper.pdf) Three fixed filters are used to extract **noise features**. And when doing the process on RGB pictures, there will be 9 output channel for a 3 channel inputs.
26 |      
27 |       ![](images/SRMFilters.png)
28 | 
29 |     - Other filters are the normal Conv2D filters. In this reproduction, I choose (32 - 3 - 9) of this kind of filters, which is more than the number shows in the paper, but same as the orignal code on Github. (Really puzzling.) 
30 |   - [ZPool2D.py](imports/ZPool2D.py) An algorithm proposed in this ManTra-Net article. Aiming to quantify the difference between a local feature and a reference dominant feature.
31 |   - [convlstm.py](imports/convlstm.py) An implementation of ConvLSTM layer, which was cited from [ndrplz's Github repo](https://github.com/ndrplz/ConvLSTM_pytorch).
32 | - What's more there is a [ParametersManager.py](imports/ParametersManager.py) file in `./imports` dir. This is a part of code helps me to save parameters after tarinning for several batches, and load parameters then.
33 | - [Train.py](Train.py) is the main script for model trainning on **NIST16 dataset**. If you want to run this file, you need to load NIST16 data in advance. Please read this [NIST16 Dataset README](NC2016_Test0613/README.md) for tutorials.
34 | - [TestModel.py](TestModel.py) can illustrate the performance of the model (`*.pt`) you trained. It shows 4 sub-figures in a plt, including(From left to right): Raw picture, label mask, predict feature, predict mask.
35 |  
36 |   ![](images/result-100EPOCH.png)
37 | - [TestModelAUC.py](TestModelAUC.py) can calculate the AUC and ROC for you on trained models.
38 | 
39 | ## Some Comments
40 | It should be noted that this model gives poor results if predicted without using the official pre-training parameters. The ROC curve below demonstrated that **simply training and validating from the NIST16 dataset does not give good results**. 
41 | 
42 | ![](images/ROC_100Epoch.png)
43 | 
44 | However, we need to point out that in order to accelerate the trainning process, we resized images to (256x256), which reduced a lot of features, especially noise features. This problem has been discussed in the ManTra-Net paper as well. (Resize and JPEG compress will vitally do harm to the performance of the model) 
45 | 
46 | >We speculate that it is the strong dataset with 385 types of fine-grained manipulation images that made the model performs well. Sadly this dataset is not open to public :(
47 | 
48 | ## Links
49 | If you want to train this Model with the CASIAv2 dataset, we provide a revised version of CASIAv2 datasets, which corrected several mistakes in the original datasets provided by the author. Details can find in the [link](https://github.com/SunnyHaze/CASIA2.0-Corrected-Groundtruth) shown below:
50 | 
51 | [![Readme Card](https://github-readme-stats.vercel.app/api/pin/?username=Sunnyhaze&repo=CASIA2.0-Corrected-Groundtruth)](https://github.com/SunnyHaze/CASIA2.0-Corrected-Groundtruth)
52 | 
53 | ## Cite
54 | [1] Wu, Y., AbdAlmageed, W., & Natarajan, P. (2019). Mantra-net: Manipulation tracing network for detection and localization of image forgeries with anomalous features. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (pp. 9543-9552).
55 |   ```
56 |   @InProceedings{Wu_2019_CVPR,
57 |     author = {Wu, Yue and AbdAlmageed, Wael and Natarajan, Premkumar},
58 |     title = {ManTra-Net: Manipulation Tracing Network for Detection and Localization of Image Forgeries With Anomalous Features},
59 |     booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
60 |     month = {June},
61 |     year = {2019}
62 |   }
63 |   ```
64 | 
65 | [2] Bayar, B., & Stamm, M. C. (2018). Constrained convolutional neural networks: A new approach towards general purpose image manipulation detection. IEEE Transactions on Information Forensics and Security, 13(11), 2691-2706.
66 | 
67 | [3] Fridrich, J., & Kodovsky, J. (2012). Rich models for steganalysis of digital images. IEEE Transactions on information Forensics and Security, 7(3), 868-882.
68 | 


--------------------------------------------------------------------------------
/TestModel.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from Train import MyDataset
 3 | from imports.ParametersManager import *
 4 | from Mantra_Net import *
 5 | from matplotlib import pyplot as plt
 6 | import matplotlib
 7 | import torchvision.transforms as transforms
 8 | 
 9 | # Enter the *.pt model file name here to load parameters
10 | DIR = './Pre_TrainedModel/'
11 | 
12 | # ===You need to change the name of the model here =====
13 | ModelName = DIR + 'MantraNet on NIST16_model.pt' 
14 | # ====================================================
15 | 
16 | parManager = ParametersManager('cuda')
17 | parManager.loadFromFile(ModelName)
18 | print("This model has done : {} Epochs.".format(parManager.EpochDone))
19 | model = ManTraNet()
20 | model.cuda()
21 | parManager.setModelParameters(model)
22 | 
23 | TrainSetDIR = './NIST2016/Train.csv'
24 | TestSetDIR = './NIST2016/Test.csv'
25 | 
26 | '''
27 | You can set the TrainSetDIR or TestSetDIR to validate on different dataset.
28 | '''
29 | data = MyDataset(TrainSetDIR)
30 | 
31 | with torch.no_grad():
32 |     model.eval()
33 |     Loader = DataLoader(data, pin_memory=True, batch_size=1, sampler= torch.utils.data.sampler.SubsetRandomSampler(range(len(data))))
34 |     trans = transforms.ToPILImage()
35 |     for (x,label) in Loader:
36 |         out = model(x.cuda())
37 |         x = trans(torch.squeeze(x,0))
38 |         label[0,0,0] = 1
39 |         y = trans(torch.squeeze(label,0))
40 |         z = trans(torch.squeeze(out.cpu(),0))
41 |         q = trans(torch.squeeze((out > 0.5).float().cpu(), 0 ))
42 |         plt.subplot(1,4,1)
43 |         plt.imshow(x, cmap='gray')
44 |         plt.subplot(1,4,2)
45 |         '''
46 |         NORM parameter here is to solve the problem of when you use plt.imshow(...) to show a Tensor is filled with '1' in every position, it will show a total black image. Because the function thought your input is all in int type, and will transform to the feild of [0-255], as the result, a '1' here is a nearly black image.
47 |         
48 |         So, here we need to do the norm manually.
49 |         '''
50 |         plt.imshow(y, cmap='gray', norm=matplotlib.colors.Normalize(0,255))
51 |         plt.subplot(1,4,3)
52 |         plt.imshow(z, cmap='gray', norm=matplotlib.colors.Normalize(0,255))
53 |         plt.subplot(1,4,4)
54 |         plt.imshow(q, cmap='gray', norm=matplotlib.colors.Normalize(0,255))
55 |         plt.show()
56 |         plt.close()
57 | 


--------------------------------------------------------------------------------
/TestModelAUC.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from Train import MyDataset
 3 | from imports.ParametersManager import *
 4 | from Mantra_Net import *
 5 | from matplotlib import pyplot as plt
 6 | import torchvision.transforms as transforms
 7 | '''
 8 | This python file is used to calculate the model' s ROC and AUC value of your trained model.
 9 | '''
10 | 
11 | # Enter the *.pt model file name here to load parameters
12 | DIR = './Pre_TrainedModel/'
13 | 
14 | # ===You need to change the name of the model here =====
15 | ModelName = DIR + 'MantraNet on NIST16_model (8).pt' 
16 | # ====================================================
17 | 
18 | 
19 | parManager = ParametersManager('cuda')
20 | 
21 | 
22 | parManager.loadFromFile(ModelName)
23 | print("This model has done : {} Epochs.".format(parManager.EpochDone))
24 | model = ManTraNet()
25 | model.cuda()
26 | parManager.setModelParameters(model)
27 | # ===========hyper parameters=============
28 | dataSetChoosen = 'Train'
29 | resolution = 100
30 | 
31 | # ========================================
32 | 
33 | dataDIR ={   
34 | 'Whole' : './NIST2016/index.csv',
35 | 'Train' : './NIST2016/Train.csv',
36 | 'Test' : './NIST2016/Test.csv'
37 | }
38 | 
39 | data = MyDataset(dataDIR[dataSetChoosen])
40 | 
41 | with torch.no_grad():
42 |     model.eval()
43 |     Loader = DataLoader(data, pin_memory=True, batch_size=1)
44 |     step = int(len(Loader) / 100)
45 |     print(step)
46 |     trans = transforms.ToPILImage()
47 |     labels = []
48 |     prediction = []
49 |     for id, (x,label) in enumerate(Loader):
50 |        labels.append(torch.squeeze(torch.squeeze(label , dim=0), dim=0) )
51 |        out = model(x.cuda())
52 |        prediction.append(torch.squeeze(torch.squeeze(out.cpu(), dim=0), dim=0) )
53 |        if id % step == 0:
54 |            print('{:.2f}%'.format(id/len(Loader) * 100))
55 | 
56 |     labels = torch.stack(labels, dim = 0)
57 |     prediction = torch.stack(prediction, dim=0)
58 |     
59 |     print(labels.shape)
60 |     print(prediction.shape)
61 |     
62 |     def cal_ROC_rate(labels, predict:torch.Tensor, threshold:float):
63 |         mask = (predict > threshold).float()
64 |         TP, TN, FP, FN = 0, 0, 0, 0
65 |         TP += torch.sum((mask == 1) & (labels == 1))
66 |         TN += torch.sum((mask == 0) & (labels == 0))
67 |         FP += torch.sum((mask == 1) & (labels == 0))
68 |         FN += torch.sum((mask == 0) & (labels == 1))
69 |         TPR = TP / (TP + FN) # True positive rate
70 |         FPR = FP / (TN + FP) # False Positive Rate
71 |         return TPR, FPR
72 | 
73 |     TPR = []
74 |     FPR = []
75 |     for threshold in range(resolution):
76 |         threshold /= resolution
77 |         # print(threshold)
78 |         t_TPR, t_FPR = cal_ROC_rate(labels, prediction, threshold)
79 |         TPR.append(t_TPR.cpu())
80 |         FPR.append(t_FPR.cpu())
81 |             
82 |     TPR_array = sorted(TPR)
83 |     FPR_array = sorted(FPR)
84 |     
85 |     AUC = np.trapz(TPR_array, FPR_array)   
86 |      
87 |     plt.xlabel('FPR')
88 |     plt.ylabel('TPR')
89 |     plt.xlim((0,1))
90 |     plt.ylim((0,1))
91 |     plt.title('ROC of {} Epoch ManTra-Net trainning on NIST16 {} dataset'.format(parManager.EpochDone, dataSetChoosen))
92 |     plt.text(0.7, 0.3,r'$AUC$:{:.6F}'.format(AUC))
93 |     
94 |     plt.plot(FPR, TPR) # front parameter is for x, back parameter is for y
95 |     plt.show()
96 |     
97 | 
98 |     


--------------------------------------------------------------------------------
/Train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from matplotlib.pyplot import imshow
  3 | 
  4 | import torch
  5 | from torch.utils.data import Dataset, DataLoader, TensorDataset
  6 | from torch import conv2d, dropout, nn, sigmoid, tensor
  7 | from torch.utils.data import random_split
  8 | import numpy as np
  9 | from imports.ParametersManager import *
 10 | from Mantra_Net import *
 11 | from matplotlib import pyplot as plt
 12 | import torchvision.transforms as transforms
 13 | import csv
 14 | 
 15 | '''
 16 | Notice: This Trainning script is used to train on NIST16 manipulation detect dataset.
 17 | 
 18 | Spliting rate of Trainning set and Test set is shown below: (You can create your own split by run codes in ./NC2016_Test0613/)
 19 | 
 20 |     ---Raw Dataset---
 21 |     There are 1124 pictures in total.
 22 |     There are 49.82% of No-manipulated pictures while 50.18% of manipulated.
 23 |     ---Splited dataset---
 24 |     len(Train.csv): 900
 25 |     len(test.csv): 224
 26 |         ---Trainning set proportion---
 27 |         There are 49.89% of No-manipulated pictures while 50.11% of manipulated.
 28 |         ---Testing set proportion---
 29 |         There are 49.55% of No-manipulated pictures while 50.45% of manipulated.
 30 | '''
 31 | 
 32 | # 超参数
 33 | # Super parameters
 34 | MODELNAME='MantraNet on NIST16'  # Name of the model
 35 | MODELFILEDIR = './'              # 模型参数存储路径  The saving dir for model parameters
 36 | BatchSize = 4
 37 | LEARNINGRATE = 1e-5
 38 | epochNums = 5
 39 | SaveModelEveryNEpoch = 2         # 每执行多少次保存一个模型 Save model when runing every n epoch
 40 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 41 | 
 42 | # 构建模型参数文件存取路径
 43 | # Constuct the path for saving and load the model parameters.
 44 | if not os.path.exists(MODELFILEDIR):
 45 |     os.mkdir(MODELFILEDIR)
 46 | MODELFILEPATH = os.path.join(MODELFILEDIR, MODELNAME+'_model.pt')
 47 | 
 48 | # 图片素材和索引路径：
 49 | # Image file path (Dataset) and the path of indexing csv file 
 50 | ImagePath = './NIST2016/'
 51 | TrainDatasetIndex = './NIST2016/Train.csv'
 52 | TestDatasetIndex = './NIST2016/Test.csv'
 53 | 
 54 | # 可以将数据线包装为Dataset，然后传入DataLoader中取样
 55 | # Build a Dataset for local datas
 56 | class MyDataset(Dataset):
 57 |     def __init__(self, Path) -> None:
 58 |         with open(Path, 'r') as f:
 59 |             reader = csv.reader(f)
 60 |             self.index = []
 61 |             for i in reader:
 62 |                 self.index.append(i)   
 63 |         self.trans = transforms.ToTensor()  
 64 |      
 65 |     def __getitem__(self, i):
 66 |         image = Image.open("{}{}".format(ImagePath, self.index[i][0]))
 67 |         image = self.trans(image)
 68 |         if self.index[i][1] != 'N':
 69 |             '''
 70 |             this part is to generate a mask for manipulated images
 71 |             '''
 72 |             mask = Image.open("{}{}".format(ImagePath, self.index[i][1]))
 73 |             mask = mask.convert("1") # convert to 0-1 image with PIL api
 74 |             mask = self.trans(mask)
 75 |         else:
 76 |             ''' 
 77 |             torch.ones(...) generates a totally white image which represent to a mask of NO manipulation 
 78 |             '''
 79 |             mask = torch.ones((1, image.shape[1], image.shape[2]))
 80 |         return image, mask
 81 |     
 82 |     def __len__(self):
 83 |         return len(self.index)
 84 | 
 85 | # 定义准确率函数
 86 | # defination of accracy function
 87 | def accuracy(output:torch.Tensor , mask):
 88 |     output = (output > 0.5).float()
 89 |     error = torch.sum(torch.abs(output - mask))
 90 |     acc = 1 - error / (BatchSize * mask.shape[2] * mask.shape[3])
 91 |     return acc
 92 | 
 93 | if __name__ == "__main__":    
 94 |     # 模型实例化
 95 |     # Instantiation of the model
 96 |     model = ManTraNet()
 97 |     model.cuda()
 98 |     # print(model)
 99 |     
100 |     # 如果有“半成品”则导入参数
101 |     # If there is a pre-trained model, load it.
102 |     parManager = ParametersManager(device)
103 |     if os.path.exists(MODELFILEPATH):
104 |         parManager.loadFromFile(MODELFILEPATH)
105 |         parManager.setModelParameters(model)
106 |     else:
107 |         print('===No pre-trained model found!===')
108 | 
109 |     criterion = nn.BCELoss()
110 |     optimizer = torch.optim.Adam(model.parameters(), LEARNINGRATE)
111 |     
112 |     # 构建数据集
113 |     # Constrct the dataset
114 |     TrainDataset = MyDataset(TrainDatasetIndex)
115 |     TestDataset = MyDataset(TestDatasetIndex)
116 | 
117 |     print('Trainset size: {}'.format(len(TrainDataset)))
118 |     print('Testset size: {}'.format(len(TestDataset)))
119 | 
120 |     # # 分割数据集
121 |     # # Split dataset in TrainingSet and TestSet
122 |     # TrainDataset, TestDataset = random_split(dataset = Datas, lengths = [train_size,test_size],generator=torch.Generator().manual_seed(0))
123 | 
124 |     # 构建训练集读取器
125 |     # Consruct the Dataloader for both datasets
126 |     TrainLoader = DataLoader(TrainDataset,num_workers=8, pin_memory=True, batch_size=BatchSize, sampler= torch.utils.data.sampler.SubsetRandomSampler(range(len(TrainDataset))))
127 |     # 构建测试集读取器：
128 |     TestLoader = DataLoader(TestDataset,num_workers=8, pin_memory=True, batch_size=BatchSize, sampler= torch.utils.data.sampler.SubsetRandomSampler(range(len(TestDataset))))
129 |     
130 |     # 输出训练集长度 print the length of training set
131 |     print('len(TrainLoader):{}'.format(len(TrainLoader)))
132 |     
133 |     TrainACC = []
134 |     TestACC = []
135 |     GlobalLoss = []
136 |     for epoch in range(epochNums):
137 |         print("===开始本轮的Epoch {} == 总计是Epoch {}===".format(epoch, parManager.EpochDone))
138 |         
139 |         # 收集训练参数
140 |         # Collect the tranning statistics
141 |         epochAccuracy = []
142 |         epochLoss = []
143 |         model.train()
144 |         #=============实际训练流程=================
145 |         #=============Trainning step start=================
146 |         for batch_id, (inputs,label) in enumerate(TrainLoader):
147 |             # torch.train()
148 |             optimizer.zero_grad()
149 |             output = model(inputs.cuda())          
150 |             loss = criterion(output,label.cuda())
151 |             loss.backward()
152 |             optimizer.step()
153 |             epochAccuracy.append(accuracy(output,label.cuda()).cpu())
154 |             epochLoss.append(loss.item())
155 |             # print status
156 |             if batch_id % (int(len(TrainLoader) / 20)) == 0: 
157 |                 print("    Now processing step[{}/{}], Current Epoch accuracy：{:.2f}%，Loss：{:.8f}".format(batch_id,len(TrainLoader), np.mean(epochAccuracy) * 100, loss))
158 |         #==============本轮训练结束==============
159 |         #=============Trainning step finish=================
160 |         # 收集训练集准确率
161 |         TrainACC.append(np.mean(epochAccuracy)) 
162 |         GlobalLoss.append(np.mean(epochLoss))
163 |         # ==========进行一次验证集测试============
164 |         # ==========Start a test set test============
165 |         localTestACC = []
166 |         model.eval() # 进入评估模式，节约开销
167 |         for inputs, label in TestLoader:
168 |             torch.no_grad() # 上下文管理器，此部分内不会追踪梯度/
169 |             output = model(inputs.cuda())
170 |             localTestACC.append(accuracy(output,label.cuda()).cpu())
171 |         # ==========验证集测试结束================
172 |         # ==========test set test done============
173 |         TestACC.append(np.mean(localTestACC))
174 |         print("Current Epoch Done, Train accuracy: {:3f}%, Test accuracy: {:3f}%".format(TrainACC[-1] * 100, TestACC[-1] * 100))
175 |         # 暂存结果到参数管理器
176 |         # Save results to parameters-manager
177 |         parManager.oneEpochDone(LEARNINGRATE,TrainACC[-1],TestACC[-1],GlobalLoss[-1])
178 |         # 周期性保存结果到文件
179 |         # Save model to file periodically
180 |         if epoch == epochNums - 1 or epoch % SaveModelEveryNEpoch == 0:
181 |             parManager.loadModelParameters(model)
182 |             parManager.saveToFile(MODELFILEPATH)
183 |             
184 |     # ===========view the results=============
185 |     parManager.show()
186 |     plt.figure(figsize=(10,7))
187 |     plt.subplots_adjust(left=0.1,bottom=0.1,top=0.9,right=0.9,wspace=0.1,hspace=0.3)
188 |     plt.subplot(2,1,1)
189 |     plt.plot(range(parManager.EpochDone),parManager.TrainACC,marker='*' ,color='r',label='Train')
190 |     plt.plot(range(parManager.EpochDone),parManager.TestACC,marker='*' ,color='b',label='Test')
191 | 
192 |     plt.xlabel('Epochs')
193 |     plt.ylabel('ACC')
194 |     plt.legend()
195 |     plt.title("{} on Nist".format(MODELNAME))
196 |     plt.text(int(parManager.EpochDone *0.8),0.5,'Train ACC: {:.6f}\nTest ACC: {:.6f}\nEpoch:{}'.format(parManager.TrainACC[-1],parManager.TestACC[-1], parManager.EpochDone))
197 |     plt.subplot(2,1,2)
198 |     plt.title('Learning Rates')
199 |     plt.xlabel('Epoch')
200 |     plt.ylabel('$log_{10}$(Learning Rates)')
201 |     plt.ylim(0,-5)
202 |     plt.plot([x for x in range(parManager.EpochDone)], np.log(parManager.LearningRate) / np.log(10))
203 |     plt.savefig('Train-{}-{}Epoch.jpg'.format(MODELNAME,parManager.EpochDone))
204 |     plt.show()


--------------------------------------------------------------------------------
/images/Bayar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SunnyHaze/ManTraNet-Pytorch/008b38329d9291e6fbcfb5695a7cade524dac575/images/Bayar.png


--------------------------------------------------------------------------------
/images/Overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SunnyHaze/ManTraNet-Pytorch/008b38329d9291e6fbcfb5695a7cade524dac575/images/Overview.png


--------------------------------------------------------------------------------
/images/ROC_100Epoch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SunnyHaze/ManTraNet-Pytorch/008b38329d9291e6fbcfb5695a7cade524dac575/images/ROC_100Epoch.png


--------------------------------------------------------------------------------
/images/SRMFilters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SunnyHaze/ManTraNet-Pytorch/008b38329d9291e6fbcfb5695a7cade524dac575/images/SRMFilters.png


--------------------------------------------------------------------------------
/images/result-100EPOCH.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SunnyHaze/ManTraNet-Pytorch/008b38329d9291e6fbcfb5695a7cade524dac575/images/result-100EPOCH.png


--------------------------------------------------------------------------------
/imports/CombindConv2D.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from turtle import forward
  3 | import torch
  4 | from torch import nn, tensor
  5 | import torch.nn.functional
  6 | import numpy as np
  7 | from matplotlib import pyplot as plt
  8 | import torchvision.transforms as transforms
  9 | from PIL import Image
 10 | '''
 11 |     In my Own under standing of the SRM layer, with input chanel 3 and output chanle3, but different with the Mantra-Net source code.
 12 | '''
 13 | # class SRMConv2D(nn.Module):
 14 | #     def __init__(self):
 15 | #         super(SRMConv2D,self).__init__()
 16 | #         q = [4, 12, 2] # coefficient of the kernels
 17 | #         self.kernel1 = np.array([
 18 | #             [0, 0, 0, 0, 0],
 19 | #             [0,-1, 2,-1, 0],
 20 | #             [0, 2,-4, 2, 0],
 21 | #             [0,-1, 2,-1, 0],
 22 | #             [0, 0, 0, 0, 0]
 23 | #         ],dtype=np.float32)
 24 | #         self.kernel2 = np.array([
 25 | #             [-1, 2,-2, 2,-1],
 26 | #             [2, -6, 8,-6, 2],
 27 | #             [-2, 8,-12,8,-2],
 28 | #             [2, -6, 8,-6, 2],
 29 | #             [-1, 2,-2, 2,-1]          
 30 | #         ],dtype=np.float32)
 31 | #         self.kernel3 = np.array([
 32 | #             [0, 0, 0, 0, 0],
 33 | #             [0, 0, 0, 0, 0],
 34 | #             [0, 1,-2, 1, 0],
 35 | #             [0, 0, 0, 0, 0],
 36 | #             [0, 0, 0, 0, 0],
 37 | #         ],dtype=np.float32)
 38 | #         # shape (3,3,5,5)
 39 | #         weight = torch.tensor( np.array([ 
 40 | #             [self.kernel1 / q[0] for i in range(3)],
 41 | #             [self.kernel2 / q[1] for i in range(3)],
 42 | #             [self.kernel3 / q[2] for i in range(3)],
 43 | #         ]),dtype=torch.float32)
 44 | #         print(weight)
 45 | #         # weight = torch.transpose(weight)
 46 | #         self.weight = torch.nn.Parameter(weight, requires_grad=False) 
 47 | 
 48 | #     def forward(self, x):
 49 | #         with torch.no_grad():
 50 | #             return torch.nn.functional.conv2d(x, weight=self.weight, padding = 2)
 51 | '''
 52 |   BayarConv2D, refering from 'Constrained Convolutional Neural Networks: A New Approach Towards General Purpose Image Manipulation Detection'
 53 | '''
 54 | class BayarConv2D(nn.Module):
 55 |     def __init__(self ,inputchanel, outputchanel, kernelsize) :
 56 |         super(BayarConv2D,self).__init__()
 57 |         self.mask = None
 58 |         weight = torch.Tensor(inputchanel, outputchanel, kernelsize, kernelsize)
 59 |         self.weight = torch.nn.Parameter(weight)
 60 |         nn.init.xavier_normal_(self.weight)
 61 |         # print(self.weight)
 62 |     
 63 |     def _initialize_mask(self) :
 64 |         chanelin = self.weight.shape[0]
 65 |         chanelout  = self.weight.shape[1]
 66 |         ksize = self.weight.shape[2] 
 67 |         m = np.zeros([chanelin, chanelout, ksize, ksize]).astype('float32')
 68 |         m[:,:,ksize//2,ksize//2] = 1.
 69 |         self.mask = torch.tensor(m).cuda()
 70 |     
 71 |     def _get_new_weight(self) :
 72 |         with torch.no_grad():
 73 |             if self.mask is None :
 74 |                 self._initialize_mask()
 75 |             self.weight.data *= (1-self.mask)
 76 |             # print(self.weight)
 77 |             rest_sum = torch.sum(self.weight, dim=(2,3), keepdims=True)
 78 |             # print('sum')
 79 |             # print(rest_sum)
 80 |             # print(rest_sum.shape)
 81 |             self.weight.data /= rest_sum + 1e-7
 82 |             self.weight.data -= self.mask
 83 |             # print(self.weight)
 84 |             # print(self.weight.grad)
 85 |     
 86 |     def forward(self, x):
 87 |         self._get_new_weight()
 88 |         return torch.nn.functional.conv2d(x, weight=self.weight, padding = 2)
 89 | 
 90 | ''' 
 91 |     Kernel coefficient copy from the Mantra-Net source code, with 3 input chanels and 9 output chanels, which is unexpected different from the papar.
 92 | '''
 93 | class SRMConv2D(nn.Module):
 94 |     def _get_srm_list(self) :
 95 |         # srm kernel 1                                                                                                                                
 96 |         srm1 = np.zeros([5,5]).astype('float32')
 97 |         srm1[1:-1,1:-1] = np.array([[-1, 2, -1],
 98 |                                     [2, -4, 2],
 99 |                                     [-1, 2, -1]] )
100 |         srm1 /= 4.
101 |         # srm kernel 2                                                                                                                                
102 |         srm2 = np.array([[-1, 2, -2, 2, -1],
103 |                          [2, -6, 8, -6, 2],
104 |                          [-2, 8, -12, 8, -2],
105 |                          [2, -6, 8, -6, 2],
106 |                          [-1, 2, -2, 2, -1]]).astype('float32')
107 |         srm2 /= 12.
108 |         # srm kernel 3                                                                                                                                
109 |         srm3 = np.zeros([5,5]).astype('float32')
110 |         srm3[2,1:-1] = np.array([1,-2,1])
111 |         srm3 /= 2.
112 |         return [ srm1, srm2, srm3 ]
113 |     
114 |     def _build_SRM_kernel(self) :
115 |         kernel = []
116 |         srm_list = self._get_srm_list()
117 |         for idx, srm in enumerate( srm_list ):
118 |             for ch in range(3) :
119 |                 this_ch_kernel = np.zeros([5,5,3]).astype('float32')
120 |                 this_ch_kernel[:,:,ch] = srm
121 |                 kernel.append( this_ch_kernel )
122 |         kernel = np.stack( kernel, axis=-1 )
123 |         # srm_kernel = K.variable( kernel, dtype='float32', name='srm' )
124 |         '''
125 |         Keras kernel form   (kernel_width, kernel_height, inputChanels, outputChanels)
126 |         pytorch Kernal form (inputChanels, outputChanel, kernel_size, kernel_size)
127 |         
128 |         There is a need to switch the dim to fit in pytorch with the Mantra-Net source code writting in keras.
129 |         '''
130 |         kernel = np.swapaxes(kernel,1,2)
131 |         # kernel = np.swapaxes(kernel,1,2)
132 |         kernel = np.swapaxes(kernel,0,3)      
133 |         return kernel
134 |     
135 |     def __init__(self):
136 |         super(SRMConv2D,self).__init__()
137 |         self.weight = torch.tensor(self._build_SRM_kernel()).cuda()
138 |     def forward(self, x):
139 |         with torch.no_grad():
140 |             return torch.nn.functional.conv2d(x, weight=self.weight, padding = 2)
141 | 
142 | class CombindConv2D(nn.Module):
143 |     def __init__(self, outputChanels) -> None:
144 |         super(CombindConv2D, self).__init__()
145 |         self.subLayer1 = BayarConv2D(3,3,5) # outchanel 3
146 |         self.relu1 = nn.ReLU(inplace=True)
147 |         self.subLayer2 = SRMConv2D()        # outchanel 9 
148 |         self.relu2 = nn.ReLU(inplace=True)
149 |         self.subLayer3 = nn.Conv2d(3,outputChanels - 3 - 9, kernel_size=5, padding=2) # 总数-12个普通卷积层
150 |         self.relu3 = nn.ReLU(inplace=True)
151 |     def forward(self,x):
152 |         x1 = self.subLayer1(x)
153 |         x1 = self.relu1(x1)
154 |         x2 = self.subLayer2(x)
155 |         x2 = self.relu2(x2)
156 |         x3 = self.subLayer3(x)
157 |         x3 = self.relu3(x3)
158 |         # print(x1.shape)
159 |         # print(x2.shape)
160 |         # print(x3.shape)
161 |         x = torch.cat([x1,x2,x3], dim=1)
162 |         # print(x.shape)
163 |         return x
164 | 
165 | if __name__ =='__main__':
166 |     # 测试完整Combind
167 |     # net = CombindConv2D(16)
168 |     # # image_dir = 'NC2016_Test0613/world/NC2016_2198.jpg'
169 |     # image_dir = 'NC2016_Test0613/probe/NC2016_8411.jpg'
170 |     # # image_dir = '1.jpg'
171 |     # image = Image.open(image_dir)
172 |     # image = np.array(image)
173 |     # print(image.shape)
174 |     # trans = transforms.ToTensor()
175 |     # t = trans(image).unsqueeze(0)
176 |     # t = net(t)
177 |     # print(net)
178 |     # print(t.shape)
179 |     ##########
180 |     # 测试
181 |     # a = torch.Tensor()
182 |     # b = BayarConv2D(3,3,5)
183 |     # b._initialize_mask()
184 |     # b._get_new_weight()
185 |     net = SRMConv2D()
186 |     # image_dir = 'NC2016_Test0613/world/NC2016_2198.jpg'
187 |     image_dir = 'NC2016_Test0613/probe/NC2016_8411.jpg'
188 |     # image_dir = 'NC2016_Test0613/probe/NC2016_6003.jpg'
189 |     # image_dir = '1.jpg'
190 |     image = Image.open(image_dir).resize((1200,1200))
191 |     image = np.array(image)
192 |     # image = np.concatenate([image,image,image],axis=-1)
193 |     print(image.shape)
194 |     trans = transforms.ToTensor()
195 |     t = trans(image).unsqueeze(0)
196 |     t = net(t)
197 |     print(t[0].shape)
198 |     # 3层直接输出
199 |     trans = transforms.ToPILImage()
200 |     # img = trans(t[0][3:9])
201 |     # ########## 超厚
202 |     # print(t[0] * 255)
203 |     # number = np.array(img)
204 |     # print(np.max(number))
205 |     # print(np.min(number))
206 |     final = torch.zeros(3,image.shape[0],image.shape[1])
207 |     final[0] += t[0][0]
208 |     final[0] += t[0][3]
209 |     final[0] += t[0][6]
210 |     final[1] += t[0][1]
211 |     final[1] += t[0][4]
212 |     final[1] += t[0][7]
213 |     final[2] += t[0][2]
214 |     final[2] += t[0][5]
215 |     final[2] += t[0][8]
216 |     print(final)
217 |     img=trans(final)
218 |     # 单层输出
219 |     t = t[0][0]
220 |     M = torch.max(final)
221 |     Mi = torch.min(final)
222 |     print(M,' ',Mi)
223 |     # img = np.array((final- Mi)/(M - Mi)  * 255, dtype=np.float32)
224 |     img = trans(final)
225 |     print(np.max(img))
226 |     print(np.min(img))
227 |     plt.imshow(img, cmap='jet')
228 |     plt.show()


--------------------------------------------------------------------------------
/imports/ParametersManager.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | # 定义一个管理模型训练时参数的类
 4 | class ParametersManager():
 5 |     def __init__(self,device) -> None:
 6 |         self.device = device
 7 |         # 具体数据
 8 |         self.EpochDone = 0      # 已经完成的Epoch个数
 9 |         self.LearningRate = []    # 各个Epoch的学习率 
10 |         self.TrainACC = []        # 训练集准确率
11 |         self.TestACC = []         # 测试集准确率
12 |         self.loss = []            # loss
13 |         self.state_dict = 0 # 模型的具体权重
14 |         self.datas = {}
15 |     # 打包
16 |     def pack(self):
17 |         self.datas = {
18 |             'EpochDone' : self.EpochDone,        # 已经完成的Epoch个数
19 |             'LearningRate' : self.LearningRate,    # 各个Epoch的学习率 
20 |             'TrainACC' : self.TrainACC,        # 训练集准确率
21 |             'TestACC' : self.TestACC,         # 测试集准确率
22 |             'loss' : self.loss,            # loss
23 |             'state_dict' : self.state_dict, # 模型的具体权重
24 |         } 
25 |     # 解包
26 |     def unpack(self):
27 |         self.EpochDone = self.datas['EpochDone']
28 |         self.LearningRate = self.datas['LearningRate']
29 |         self.TestACC = self.datas['TestACC']
30 |         self.TrainACC = self.datas['TrainACC']
31 |         self.loss = self.datas['loss']
32 |         self.state_dict = self.datas['state_dict']
33 |     # 从脚本中获取模型的参数
34 |     def loadModelParameters(self, model:nn.Module):
35 |         self.state_dict = model.state_dict()
36 |     
37 |     # 从脚本中将参数输出给模型
38 |     def setModelParameters(self, model:nn.Module):
39 |         model.load_state_dict(self.state_dict)
40 |     
41 |     # 从脚本中获取一个Epoch的
42 |     def oneEpochDone(self, LastLearningRate, LastTrainACC, lastTestACC, lastLoss):
43 |         self.EpochDone += 1
44 |         self.LearningRate.append(LastLearningRate)
45 |         self.TrainACC.append(LastTrainACC)
46 |         self.TestACC.append(lastTestACC)
47 |         self.loss.append(lastLoss)
48 |     
49 |     # 保存数据到文件
50 |     def saveToFile(self, path):
51 |         self.pack()
52 |         torch.save(self.datas, path)
53 |         print('===succesfully saved model!===')
54 |     
55 |     # 从文件中读取数据
56 |     def loadFromFile(self, path):
57 |         self.datas = torch.load(path,map_location=torch.device(self.device))
58 |         self.unpack()
59 |         print('===Load model succesfully!===')
60 |     # 展示当前存储的模型的数据
61 |     def show(self):
62 |         print('===' * 10 + 
63 | '''\n此模型已经训练了{}个Epoch \n
64 | 目前的训练集准确率为 {:.3f}% \n
65 | 目前的测试集准确率为 {:.3f}% \n'''.format(self.EpochDone, self.TrainACC[-1] * 100, self.TestACC[-1] * 100),'===' * 10)


--------------------------------------------------------------------------------
/imports/ZPool2D.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, tensor
 3 | import torch.nn.functional
 4 | import numpy as np
 5 | import torchvision.transforms as transforms
 6 | from PIL import Image
 7 | 
 8 | class Zpool2D_Window(nn.Module):
 9 |     def __init__(self, inputChannels, window_size_list, min_value=1e-5) -> None:
10 |         super().__init__()
11 |         self.min_value = min_value
12 |         self.window_size_list = window_size_list
13 |         self.maxWinSize = np.max(window_size_list)
14 |         
15 |         self.TinyWeight  = nn.Parameter(torch.full([1,1,inputChannels,1,1], min_value, dtype=torch.float32) ,requires_grad= True)
16 |         self.TinyWeight.data.clamp(min=0)
17 | 
18 |     def _init_padding_buff(self, x): # include Cumulate sum
19 |         paddingLayer = nn.ZeroPad2d(self.maxWinSize//2 + 1)
20 |         x_pad = paddingLayer(x)
21 |         x_cum = torch.cumsum(x_pad, 2)
22 |         x_cum = torch.cumsum(x_cum, 3)
23 |         return x_cum
24 |                 
25 |     def _compute_a_window_avg(self, x, winSize):
26 |         # --left top Big square block-- coordinate
27 |         top = self.maxWinSize // 2 - winSize // 2
28 |         bottom = top + winSize
29 |         left = self.maxWinSize // 2 - winSize // 2
30 |         right = left + winSize 
31 | 
32 |         Ax, Ay = (left, top)
33 |         Bx, By = (right, top)
34 |         Cx, Cy = (right, bottom)
35 |         Dx, Dy = (left, bottom)
36 |          
37 |         # negative number , but can be parse to a positve when using fomula like this -> [:-1]
38 |         
39 |         # --right bottom Big square block-- coordinate
40 |         top0 = -self.maxWinSize // 2 - winSize // 2 - 1
41 |         bottom0 = top0 + winSize 
42 |         left0 = -self.maxWinSize // 2 - winSize // 2 - 1 
43 |         right0 = left0 + winSize
44 |         
45 |         Ax0, Ay0 = (left0, top0)
46 |         Bx0, By0 = (right0, top0)
47 |         Cx0, Cy0 = (right0, bottom0)
48 |         Dx0, Dy0 = (left0, bottom0)
49 |         
50 |         counts = torch.ones_like(x)
51 |         # print(counts)
52 |         counts_pading = self._init_padding_buff(counts)
53 |         # print(counts_pading)
54 |         x_padding = self._init_padding_buff(x)
55 | 
56 |         counts_2d = counts_pading[:,:,Ay:Ay0, Ax:Ax0] \
57 |                   + counts_pading[:,:,Cy:Cy0, Cx:Cx0] \
58 |                   - counts_pading[:,:,By:By0, Bx:Bx0] \
59 |                   - counts_pading[:,:,Dy:Dy0, Dx:Dx0]
60 | 
61 |         sum_x_2d = x_padding[:,:,Ay:Ay0, Ax:Ax0] \
62 |                  + x_padding[:,:,Cy:Cy0, Cx:Cx0] \
63 |                  - x_padding[:,:,By:By0, Bx:Bx0] \
64 |                  - x_padding[:,:,Dy:Dy0, Dx:Dx0]
65 |         avg_x_2d = sum_x_2d / counts_2d
66 |         return avg_x_2d
67 |     
68 |     def forward(self, x):
69 |         outputFeature = []
70 |         # 1. window
71 |         for win in self.window_size_list:
72 |             avg_x_2d = self._compute_a_window_avg(x, win)
73 |             D_x = x - avg_x_2d
74 |             outputFeature.append(D_x)
75 |         # 2. global
76 |         mu_f = torch.mean(x, dim=(2,3), keepdim=True)
77 |         D_f = x - mu_f
78 |         outputFeature.append(D_f)
79 |         # 5 Dim Tensor arrange : (Batch, Diff_Windows, channel, width, height )
80 |         outputFeature = torch.stack(outputFeature,1) 
81 |         
82 |         std_x = torch.std(outputFeature, dim=(3,4),keepdim=True)
83 |         std_x = torch.maximum(std_x, self.TinyWeight + self.min_value / 10.)
84 |         
85 |         x = torch.stack([x for i in range(len(self.window_size_list)+ 1) ], dim=1)
86 |         Z_f = x / std_x
87 | 
88 |         return Z_f
89 | # a = np.ones(60) * 1.1
90 | a = np.arange(0,36 * 3,1)
91 | # a = np.zeros(60)
92 | a = np.resize(a, (1,3,6,6))
93 | a = torch.tensor(a, dtype=torch.float32)
94 | # print(a)
95 | net = Zpool2D_Window(3, [3,5,7])
96 | # a = net._init_padding_buff(a)
97 | # print(a)
98 | a = net(a)
99 | # print(net._init_padding_buff(a))


--------------------------------------------------------------------------------
/imports/convlstm.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | 
  4 | 
  5 | class ConvLSTMCell(nn.Module):
  6 | 
  7 |     def __init__(self, input_dim, hidden_dim, kernel_size, bias):
  8 |         """
  9 |         Initialize ConvLSTM cell.
 10 | 
 11 |         Parameters
 12 |         ----------
 13 |         input_dim: int
 14 |             Number of channels of input tensor.
 15 |         hidden_dim: int
 16 |             Number of channels of hidden state.
 17 |         kernel_size: (int, int)
 18 |             Size of the convolutional kernel.
 19 |         bias: bool
 20 |             Whether or not to add the bias.
 21 |         """
 22 | 
 23 |         super(ConvLSTMCell, self).__init__()
 24 | 
 25 |         self.input_dim = input_dim
 26 |         self.hidden_dim = hidden_dim
 27 | 
 28 |         self.kernel_size = kernel_size
 29 |         self.padding = kernel_size[0] // 2, kernel_size[1] // 2
 30 |         self.bias = bias
 31 | 
 32 |         self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim,
 33 |                               out_channels=4 * self.hidden_dim,
 34 |                               kernel_size=self.kernel_size,
 35 |                               padding=self.padding,
 36 |                               bias=self.bias)
 37 | 
 38 |     def forward(self, input_tensor, cur_state):
 39 |         h_cur, c_cur = cur_state
 40 | 
 41 |         combined = torch.cat([input_tensor, h_cur], dim=1)  # concatenate along channel axis
 42 | 
 43 |         combined_conv = self.conv(combined)
 44 |         cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
 45 |         i = torch.sigmoid(cc_i)
 46 |         f = torch.sigmoid(cc_f)
 47 |         o = torch.sigmoid(cc_o)
 48 |         g = torch.tanh(cc_g)
 49 | 
 50 |         c_next = f * c_cur + i * g
 51 |         h_next = o * torch.tanh(c_next)
 52 | 
 53 |         return h_next, c_next
 54 | 
 55 |     def init_hidden(self, batch_size, image_size):
 56 |         height, width = image_size
 57 |         return (torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device),
 58 |                 torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device))
 59 | 
 60 | 
 61 | class ConvLSTM(nn.Module):
 62 | 
 63 |     """
 64 | 
 65 |     Parameters:
 66 |         input_dim: Number of channels in input
 67 |         hidden_dim: Number of hidden channels
 68 |         kernel_size: Size of kernel in convolutions
 69 |         num_layers: Number of LSTM layers stacked on each other
 70 |         batch_first: Whether or not dimension 0 is the batch or not
 71 |         bias: Bias or no bias in Convolution
 72 |         return_all_layers: Return the list of computations for all layers
 73 |         Note: Will do same padding.
 74 | 
 75 |     Input:
 76 |         A tensor of size B, T, C, H, W or T, B, C, H, W
 77 |     Output:
 78 |         A tuple of two lists of length num_layers (or length 1 if return_all_layers is False).
 79 |             0 - layer_output_list is the list of lists of length T of each output
 80 |             1 - last_state_list is the list of last states
 81 |                     each element of the list is a tuple (h, c) for hidden state and memory
 82 |     Example:
 83 |         >> x = torch.rand((32, 10, 64, 128, 128))
 84 |         >> convlstm = ConvLSTM(64, 16, 3, 1, True, True, False)
 85 |         >> _, last_states = convlstm(x)
 86 |         >> h = last_states[0][0]  # 0 for layer index, 0 for h index
 87 |     """
 88 | 
 89 |     def __init__(self, input_dim, hidden_dim, kernel_size, num_layers,
 90 |                  batch_first=False, bias=True, return_all_layers=False):
 91 |         super(ConvLSTM, self).__init__()
 92 | 
 93 |         self._check_kernel_size_consistency(kernel_size)
 94 | 
 95 |         # Make sure that both `kernel_size` and `hidden_dim` are lists having len == num_layers
 96 |         kernel_size = self._extend_for_multilayer(kernel_size, num_layers)
 97 |         hidden_dim = self._extend_for_multilayer(hidden_dim, num_layers)
 98 |         if not len(kernel_size) == len(hidden_dim) == num_layers:
 99 |             raise ValueError('Inconsistent list length.')
100 | 
101 |         self.input_dim = input_dim
102 |         self.hidden_dim = hidden_dim
103 |         self.kernel_size = kernel_size
104 |         self.num_layers = num_layers
105 |         self.batch_first = batch_first
106 |         self.bias = bias
107 |         self.return_all_layers = return_all_layers
108 | 
109 |         cell_list = []
110 |         for i in range(0, self.num_layers):
111 |             cur_input_dim = self.input_dim if i == 0 else self.hidden_dim[i - 1]
112 | 
113 |             cell_list.append(ConvLSTMCell(input_dim=cur_input_dim,
114 |                                           hidden_dim=self.hidden_dim[i],
115 |                                           kernel_size=self.kernel_size[i],
116 |                                           bias=self.bias))
117 | 
118 |         self.cell_list = nn.ModuleList(cell_list)
119 | 
120 |     def forward(self, input_tensor, hidden_state=None):
121 |         """
122 | 
123 |         Parameters
124 |         ----------
125 |         input_tensor: todo
126 |             5-D Tensor either of shape (t, b, c, h, w) or (b, t, c, h, w)
127 |         hidden_state: todo
128 |             None. todo implement stateful
129 | 
130 |         Returns
131 |         -------
132 |         last_state_list, layer_output
133 |         """
134 |         if not self.batch_first:
135 |             # (t, b, c, h, w) -> (b, t, c, h, w)
136 |             input_tensor = input_tensor.permute(1, 0, 2, 3, 4)
137 | 
138 |         b, _, _, h, w = input_tensor.size()
139 | 
140 |         # Implement stateful ConvLSTM
141 |         if hidden_state is not None:
142 |             raise NotImplementedError()
143 |         else:
144 |             # Since the init is done in forward. Can send image size here
145 |             hidden_state = self._init_hidden(batch_size=b,
146 |                                              image_size=(h, w))
147 | 
148 |         layer_output_list = []
149 |         last_state_list = []
150 | 
151 |         seq_len = input_tensor.size(1)
152 |         cur_layer_input = input_tensor
153 | 
154 |         for layer_idx in range(self.num_layers):
155 | 
156 |             h, c = hidden_state[layer_idx]
157 |             output_inner = []
158 |             for t in range(seq_len):
159 |                 h, c = self.cell_list[layer_idx](input_tensor=cur_layer_input[:, t, :, :, :],
160 |                                                  cur_state=[h, c])
161 |                 output_inner.append(h)
162 | 
163 |             layer_output = torch.stack(output_inner, dim=1)
164 |             cur_layer_input = layer_output
165 | 
166 |             layer_output_list.append(layer_output)
167 |             last_state_list.append([h, c])
168 | 
169 |         if not self.return_all_layers:
170 |             layer_output_list = layer_output_list[-1:]
171 |             last_state_list = last_state_list[-1:]
172 | 
173 |         return layer_output_list, last_state_list
174 | 
175 |     def _init_hidden(self, batch_size, image_size):
176 |         init_states = []
177 |         for i in range(self.num_layers):
178 |             init_states.append(self.cell_list[i].init_hidden(batch_size, image_size))
179 |         return init_states
180 | 
181 |     @staticmethod
182 |     def _check_kernel_size_consistency(kernel_size):
183 |         if not (isinstance(kernel_size, tuple) or
184 |                 (isinstance(kernel_size, list) and all([isinstance(elem, tuple) for elem in kernel_size]))):
185 |             raise ValueError('`kernel_size` must be tuple or list of tuples')
186 | 
187 |     @staticmethod
188 |     def _extend_for_multilayer(param, num_layers):
189 |         if not isinstance(param, list):
190 |             param = [param] * num_layers
191 |         return param
192 | 


--------------------------------------------------------------------------------