├── .gitignore
├── LICENSE
├── README.md
├── docs
├── HBaseCommands.txt
└── HbaseQueryExamples.txt
├── scripts
├── auth
│ ├── LOCATION.html
│ ├── PHPMailerAutoload.php
│ ├── UA.html
│ ├── class.phpmailer.php
│ ├── class.pop3.php
│ ├── class.smtp.php
│ ├── mail.awk
│ └── sendmail.php
├── bsfl
├── conf
│ ├── auth.conf
│ └── sflow.conf
├── createReputationList.sh
├── create_conf.sh
├── hz-utils
│ ├── genCnCList.sh
│ ├── getReposList.sh
│ ├── start-all.sh
│ ├── start-dbupdates.sh
│ ├── start-hogzilla.sh
│ ├── start-pigtail.sh
│ ├── start-sflow2hz.sh
│ ├── stop-all.sh
│ ├── stop-dbupdates.sh
│ ├── stop-hogzilla.sh
│ ├── stop-pigtail.sh
│ ├── stop-sflow2hz.sh
│ └── updateReputationList.php
├── install_hogzilla.sh
├── myFuncs
└── run.sh
└── src
├── Hogzilla.scala
├── HogzillaContinuous.scala
├── HogzillaStream.scala
└── org
└── hogzilla
├── auth
├── HogAuth.scala
└── package.scala
├── cluster
├── HogClusterMember.scala
└── package.scala
├── dns
├── HogDNS.scala
└── package.scala
├── event
├── HogEvent.scala
├── HogSignature.scala
└── package.scala
├── hbase
├── HogHBaseCluster.scala
├── HogHBaseHistogram.scala
├── HogHBaseInventory.scala
├── HogHBaseRDD.scala
├── HogHBaseReputation.scala
└── package.scala
├── histogram
├── Histograms.scala
├── HogHistogram.scala
└── package.scala
├── http
├── HogHTTP.scala
└── package.scala
├── initiate
├── HogInitiate.scala
└── package.scala
├── prepare
├── HogPrepare.scala
└── package.scala
├── sflow
├── HogSFlow.scala
├── HogSFlowHistograms.scala
└── package.scala
├── snort
├── HogSnort.scala
└── package.scala
└── util
├── HogConfig.scala
├── HogFeature.scala
├── HogFlow.scala
├── HogGeograph.scala
├── HogStringUtils.scala
└── package.scala
/.gitignore:
--------------------------------------------------------------------------------
1 | .classpath
2 | .project
3 | .settings/
4 | .cache-main
5 | bin/
6 | lib/
7 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
341 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### 0. SUMMARY
2 |
3 | ### 1. COPYRIGHT
4 |
5 | ### 2. CONTACT
6 |
7 |
--------------------------------------------------------------------------------
/docs/HBaseCommands.txt:
--------------------------------------------------------------------------------
1 |
2 | create 'hogzilla_flows','flow','event'
3 | create 'hogzilla_sflows','flow'
4 | create 'hogzilla_events','event'
5 | create 'hogzilla_sensor','sensor'
6 | create 'hogzilla_signatures','signature'
7 | create 'hogzilla_mynets','net'
8 | create 'hogzilla_reputation','rep'
9 | create 'hogzilla_histograms','info','values','labels'
10 | create 'hogzilla_clusters','info'
11 | create 'hogzilla_cluster_members','info','member','cluster'
12 | create 'hogzilla_inventory','info'
13 | create 'hogzilla_authrecords','auth'
14 |
15 | put 'hogzilla_mynets', '10.', 'net:description', 'Intranet 1'
16 | put 'hogzilla_mynets', '10.', 'net:prefix', '10.'
17 |
18 | put 'hogzilla_reputation', '10.1.1.1', 'rep:description', 'SMTP Server'
19 | put 'hogzilla_reputation', '10.1.1.1', 'rep:ip', '10.1.1.1'
20 | put 'hogzilla_reputation', '10.1.1.1', 'rep:list', 'MX'
21 | put 'hogzilla_reputation', '10.1.1.1', 'rep:list_type', 'whitelist'
22 |
23 |
24 | put 'hogzilla_reputation', '10.1.1.2', 'rep:description', 'Big Talker 1'
25 | put 'hogzilla_reputation', '10.1.1.2', 'rep:ip', '10.1.1.1'
26 | put 'hogzilla_reputation', '10.1.1.2', 'rep:list', 'TTalker'
27 | put 'hogzilla_reputation', '10.1.1.2', 'rep:list_type', 'whitelist'
28 |
29 | put 'hogzilla_reputation', '10.1.111.', 'rep:description', 'DMZ'
30 | put 'hogzilla_reputation', '10.1.111.', 'rep:ip', '10.1.111.'
31 | put 'hogzilla_reputation', '10.1.111.', 'rep:list', 'TTalker'
32 | put 'hogzilla_reputation', '10.1.111.', 'rep:list_type', 'whitelist'
33 |
34 |
--------------------------------------------------------------------------------
/docs/HbaseQueryExamples.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | scan 'hogzilla_flows', {COLUMNS => ['flow:lower_name','flow:upper_name','flow:detected_protocol','flow:bytes','flow:host_server_name','flow:packets'],
7 | FILTER => "(SingleColumnValueFilter('flow','detected_protocol',=,'regexstring:.*5/DNS.*',true,true)) AND (SingleColumnValueFilter('flow','bytes',=,'regexstring:......*',true,true))",
8 | LIMIT => 10}
9 |
10 |
11 |
12 | scan 'hogzilla_flows', {COLUMNS => ['flow:lower_name','flow:upper_name','flow:detected_protocol','flow:bytes','flow:host_server_name','flow:packets','event:signature_id'],
13 | FILTER => "(SingleColumnValueFilter('flow','detected_protocol',=,'regexstring:.*5.178/DNS.Amazon.*',true,true)) AND (SingleColumnValueFilter('flow','bytes',=,'regexstring:......*',true,true))",
14 | LIMIT => 10}
15 |
16 | scan 'hogzilla_flows', {COLUMNS => ['flow:lower_name','flow:upper_name','flow:detected_protocol','flow:lower_port',
17 | 'flow:bytes','flow:host_server_name','flow:packets','event:signature_id',
18 | 'flow:dns_num_ret_code'],
19 | FILTER => "(SingleColumnValueFilter('flow','lower_port',=,'regexstring:53',true,true)) AND (SingleColumnValueFilter('flow','dns_num_ret_code',!=,'regexstring:xyz',true,true))",
20 | LIMIT => 10}
21 |
22 | scan 'hogzilla_flows', {COLUMNS => ['flow:lower_name','flow:upper_name','flow:detected_protocol','flow:lower_port',
23 | 'flow:bytes','flow:host_server_name','flow:packets','event:signature_id',
24 | 'flow:dns_ret_code','flow:packets_without_payload'],
25 | FILTER => "(SingleColumnValueFilter('flow','lower_port',=,'regexstring:53',true,true))",
26 | LIMIT => 10}
27 |
28 | COLUMNS => ['flow:lower_name','flow:upper_name','flow:detected_protocol','flow:lower_port',
29 | 'flow:bytes','flow:host_server_name','flow:packets','event:signature_id',
30 | 'flow:dns_ret_code','flow:packets_without_payload','flow:host_server_name'],
31 |
32 | scan 'hogzilla_flows', {
33 | FILTER => "(SingleColumnValueFilter('flow','lower_port',=,'regexstring:53',true,true)) AND (SingleColumnValueFilter('flow','host_server_name',=,'regexstring:msnsolution.nicaze.net',true,true))",
34 | LIMIT => 1}
35 |
36 | scan 'hogzilla_flows', {
37 | FILTER => "(SingleColumnValueFilter('flow','lower_port',=,'regexstring:53',true,true)) AND (SingleColumnValueFilter('flow','detected_protocol',=,'regexstring:.*5/DNS.*',true,true))",
38 | LIMIT => 1}
39 |
40 | scan 'hogzilla_flows',{LIMIT => 10, FILTER => "(SingleColumnValueFilter('flow','upper_port',=,'regexstring:80',true,true) OR SingleColumnValueFilter('flow','lower_port',=,'regexstring:80',true,true)) AND (SingleColumnValueFilter('flow','upper_name',=,'regexstring:189.9.39.12',true,true) OR SingleColumnValueFilter('flow','lower_name',=,'regexstring:189.9.39.12',true,true)) AND SingleColumnValueFilter('event','priority',=,'regexstring:1',true,true)" }
41 |
42 | scan 'hogzilla_sflows',{LIMIT => 10, FILTER => "(SingleColumnValueFilter('flow','tcpFlags',=,'regexstring:0x12',true,true) AND SingleColumnValueFilter('flow','srcIP',=,'regexstring:10.1.1',true,true)) " }
43 |
--------------------------------------------------------------------------------
/scripts/auth/LOCATION.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | New login from [LOCATION]
8 |
9 | Dear user,
10 |
11 | You account [USERNAME] ....
12 |
13 | UA: [UA]
14 | Location: [LOCATION]
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/scripts/auth/PHPMailerAutoload.php:
--------------------------------------------------------------------------------
1 |
8 | * @author Jim Jagielski (jimjag)
9 | * @author Andy Prevost (codeworxtech)
10 | * @author Brent R. Matzelle (original founder)
11 | * @copyright 2013 Marcus Bointon
12 | * @copyright 2010 - 2012 Jim Jagielski
13 | * @copyright 2004 - 2009 Andy Prevost
14 | * @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License
15 | * @note This program is distributed in the hope that it will be useful - WITHOUT
16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 | * FITNESS FOR A PARTICULAR PURPOSE.
18 | */
19 |
20 | /**
21 | * PHPMailer SPL autoloader.
22 | * @param string $classname The name of the class to load
23 | */
24 | function PHPMailerAutoload($classname)
25 | {
26 | //Can't use __DIR__ as it's only in PHP 5.3+
27 | $filename = dirname(__FILE__).DIRECTORY_SEPARATOR.'class.'.strtolower($classname).'.php';
28 | if (is_readable($filename)) {
29 | require $filename;
30 | }
31 | }
32 |
33 | if (version_compare(PHP_VERSION, '5.1.2', '>=')) {
34 | //SPL autoloading was introduced in PHP 5.1.2
35 | if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
36 | spl_autoload_register('PHPMailerAutoload', true, true);
37 | } else {
38 | spl_autoload_register('PHPMailerAutoload');
39 | }
40 | } else {
41 | //Fall back to traditional autoload for old PHP versions
42 | function __autoload($classname)
43 | {
44 | PHPMailerAutoload($classname);
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/scripts/auth/UA.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | New login from [LOCATION]
8 |
9 | Dear user,
10 |
11 | You account [USERNAME] ....
12 |
13 | UA: [UA]
14 | Location: [LOCATION]
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/scripts/auth/class.pop3.php:
--------------------------------------------------------------------------------
1 |
9 | * @author Jim Jagielski (jimjag)
10 | * @author Andy Prevost (codeworxtech)
11 | * @author Brent R. Matzelle (original founder)
12 | * @copyright 2013 Marcus Bointon
13 | * @copyright 2010 - 2012 Jim Jagielski
14 | * @copyright 2004 - 2009 Andy Prevost
15 | * @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License
16 | * @note This program is distributed in the hope that it will be useful - WITHOUT
17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 | * FITNESS FOR A PARTICULAR PURPOSE.
19 | */
20 |
21 | /**
22 | * PHPMailer POP-Before-SMTP Authentication Class.
23 | * Specifically for PHPMailer to use for RFC1939 POP-before-SMTP authentication.
24 | * Does not support APOP.
25 | * @package PHPMailer
26 | * @author Richard Davey (original author)
27 | * @author Marcus Bointon (coolbru)
28 | * @author Jim Jagielski (jimjag)
29 | * @author Andy Prevost (codeworxtech)
30 | */
31 |
32 | class POP3
33 | {
34 | /**
35 | * The POP3 PHPMailer Version number.
36 | * @type string
37 | * @access public
38 | */
39 | public $Version = '5.2.7';
40 |
41 | /**
42 | * Default POP3 port number.
43 | * @type int
44 | * @access public
45 | */
46 | public $POP3_PORT = 110;
47 |
48 | /**
49 | * Default timeout in seconds.
50 | * @type int
51 | * @access public
52 | */
53 | public $POP3_TIMEOUT = 30;
54 |
55 | /**
56 | * POP3 Carriage Return + Line Feed.
57 | * @type string
58 | * @access public
59 | * @deprecated Use the constant instead
60 | */
61 | public $CRLF = "\r\n";
62 |
63 | /**
64 | * Debug display level.
65 | * Options: 0 = no, 1+ = yes
66 | * @type int
67 | * @access public
68 | */
69 | public $do_debug = 0;
70 |
71 | /**
72 | * POP3 mail server hostname.
73 | * @type string
74 | * @access public
75 | */
76 | public $host;
77 |
78 | /**
79 | * POP3 port number.
80 | * @type int
81 | * @access public
82 | */
83 | public $port;
84 |
85 | /**
86 | * POP3 Timeout Value in seconds.
87 | * @type int
88 | * @access public
89 | */
90 | public $tval;
91 |
92 | /**
93 | * POP3 username
94 | * @type string
95 | * @access public
96 | */
97 | public $username;
98 |
99 | /**
100 | * POP3 password.
101 | * @type string
102 | * @access public
103 | */
104 | public $password;
105 |
106 | /**
107 | * Resource handle for the POP3 connection socket.
108 | * @type resource
109 | * @access private
110 | */
111 | private $pop_conn;
112 |
113 | /**
114 | * Are we connected?
115 | * @type bool
116 | * @access private
117 | */
118 | private $connected;
119 |
120 | /**
121 | * Error container.
122 | * @type array
123 | * @access private
124 | */
125 | private $error;
126 |
127 | /**
128 | * Line break constant
129 | */
130 | const CRLF = "\r\n";
131 |
132 | /**
133 | * Constructor.
134 | * @access public
135 | */
136 | public function __construct()
137 | {
138 | $this->pop_conn = 0;
139 | $this->connected = false;
140 | $this->error = null;
141 | }
142 |
143 | /**
144 | * Simple static wrapper for all-in-one POP before SMTP
145 | * @param $host
146 | * @param bool $port
147 | * @param bool $tval
148 | * @param string $username
149 | * @param string $password
150 | * @param int $debug_level
151 | * @return bool
152 | */
153 | public static function popBeforeSmtp(
154 | $host,
155 | $port = false,
156 | $tval = false,
157 | $username = '',
158 | $password = '',
159 | $debug_level = 0
160 | ) {
161 | $pop = new POP3;
162 | return $pop->authorise($host, $port, $tval, $username, $password, $debug_level);
163 | }
164 |
165 | /**
166 | * Authenticate with a POP3 server.
167 | * A connect, login, disconnect sequence
168 | * appropriate for POP-before SMTP authorisation.
169 | * @access public
170 | * @param string $host
171 | * @param bool|int $port
172 | * @param bool|int $tval
173 | * @param string $username
174 | * @param string $password
175 | * @param int $debug_level
176 | * @return bool
177 | */
178 | public function authorise($host, $port = false, $tval = false, $username = '', $password = '', $debug_level = 0)
179 | {
180 | $this->host = $host;
181 | // If no port value provided, use default
182 | if ($port === false) {
183 | $this->port = $this->POP3_PORT;
184 | } else {
185 | $this->port = $port;
186 | }
187 | // If no timeout value provided, use default
188 | if ($tval === false) {
189 | $this->tval = $this->POP3_TIMEOUT;
190 | } else {
191 | $this->tval = $tval;
192 | }
193 | $this->do_debug = $debug_level;
194 | $this->username = $username;
195 | $this->password = $password;
196 | // Refresh the error log
197 | $this->error = null;
198 | // connect
199 | $result = $this->connect($this->host, $this->port, $this->tval);
200 | if ($result) {
201 | $login_result = $this->login($this->username, $this->password);
202 | if ($login_result) {
203 | $this->disconnect();
204 | return true;
205 | }
206 | }
207 | // We need to disconnect regardless of whether the login succeeded
208 | $this->disconnect();
209 | return false;
210 | }
211 |
212 | /**
213 | * Connect to a POP3 server.
214 | * @access public
215 | * @param string $host
216 | * @param bool|int $port
217 | * @param integer $tval
218 | * @return boolean
219 | */
220 | public function connect($host, $port = false, $tval = 30)
221 | {
222 | // Are we already connected?
223 | if ($this->connected) {
224 | return true;
225 | }
226 |
227 | //On Windows this will raise a PHP Warning error if the hostname doesn't exist.
228 | //Rather than suppress it with @fsockopen, capture it cleanly instead
229 | set_error_handler(array($this, 'catchWarning'));
230 |
231 | // connect to the POP3 server
232 | $this->pop_conn = fsockopen(
233 | $host, // POP3 Host
234 | $port, // Port #
235 | $errno, // Error Number
236 | $errstr, // Error Message
237 | $tval
238 | ); // Timeout (seconds)
239 | // Restore the error handler
240 | restore_error_handler();
241 | // Does the Error Log now contain anything?
242 | if ($this->error && $this->do_debug >= 1) {
243 | $this->displayErrors();
244 | }
245 | // Did we connect?
246 | if ($this->pop_conn == false) {
247 | // It would appear not...
248 | $this->error = array(
249 | 'error' => "Failed to connect to server $host on port $port",
250 | 'errno' => $errno,
251 | 'errstr' => $errstr
252 | );
253 | if ($this->do_debug >= 1) {
254 | $this->displayErrors();
255 | }
256 | return false;
257 | }
258 |
259 | // Increase the stream time-out
260 | // Check for PHP 4.3.0 or later
261 | if (version_compare(phpversion(), '5.0.0', 'ge')) {
262 | stream_set_timeout($this->pop_conn, $tval, 0);
263 | } else {
264 | // Does not work on Windows
265 | if (substr(PHP_OS, 0, 3) !== 'WIN') {
266 | socket_set_timeout($this->pop_conn, $tval, 0);
267 | }
268 | }
269 |
270 | // Get the POP3 server response
271 | $pop3_response = $this->getResponse();
272 | // Check for the +OK
273 | if ($this->checkResponse($pop3_response)) {
274 | // The connection is established and the POP3 server is talking
275 | $this->connected = true;
276 | return true;
277 | }
278 | return false;
279 | }
280 |
281 | /**
282 | * Log in to the POP3 server.
283 | * Does not support APOP (RFC 2828, 4949).
284 | * @access public
285 | * @param string $username
286 | * @param string $password
287 | * @return boolean
288 | */
289 | public function login($username = '', $password = '')
290 | {
291 | if ($this->connected == false) {
292 | $this->error = 'Not connected to POP3 server';
293 |
294 | if ($this->do_debug >= 1) {
295 | $this->displayErrors();
296 | }
297 | }
298 | if (empty($username)) {
299 | $username = $this->username;
300 | }
301 | if (empty($password)) {
302 | $password = $this->password;
303 | }
304 |
305 | // Send the Username
306 | $this->sendString("USER $username" . self::CRLF);
307 | $pop3_response = $this->getResponse();
308 | if ($this->checkResponse($pop3_response)) {
309 | // Send the Password
310 | $this->sendString("PASS $password" . self::CRLF);
311 | $pop3_response = $this->getResponse();
312 | if ($this->checkResponse($pop3_response)) {
313 | return true;
314 | }
315 | }
316 | return false;
317 | }
318 |
319 | /**
320 | * Disconnect from the POP3 server.
321 | * @access public
322 | */
323 | public function disconnect()
324 | {
325 | $this->sendString('QUIT');
326 | //The QUIT command may cause the daemon to exit, which will kill our connection
327 | //So ignore errors here
328 | @fclose($this->pop_conn);
329 | }
330 |
331 | /**
332 | * Get a response from the POP3 server.
333 | * $size is the maximum number of bytes to retrieve
334 | * @param integer $size
335 | * @return string
336 | * @access private
337 | */
338 | private function getResponse($size = 128)
339 | {
340 | $r = fgets($this->pop_conn, $size);
341 | if ($this->do_debug >= 1) {
342 | echo "Server -> Client: $r";
343 | }
344 | return $r;
345 | }
346 |
347 | /**
348 | * Send raw data to the POP3 server.
349 | * @param string $string
350 | * @return integer
351 | * @access private
352 | */
353 | private function sendString($string)
354 | {
355 | if ($this->pop_conn) {
356 | if ($this->do_debug >= 2) { //Show client messages when debug >= 2
357 | echo "Client -> Server: $string";
358 | }
359 | return fwrite($this->pop_conn, $string, strlen($string));
360 | }
361 | return 0;
362 | }
363 |
364 | /**
365 | * Checks the POP3 server response.
366 | * Looks for for +OK or -ERR.
367 | * @param string $string
368 | * @return boolean
369 | * @access private
370 | */
371 | private function checkResponse($string)
372 | {
373 | if (substr($string, 0, 3) !== '+OK') {
374 | $this->error = array(
375 | 'error' => "Server reported an error: $string",
376 | 'errno' => 0,
377 | 'errstr' => ''
378 | );
379 | if ($this->do_debug >= 1) {
380 | $this->displayErrors();
381 | }
382 | return false;
383 | } else {
384 | return true;
385 | }
386 | }
387 |
388 | /**
389 | * Display errors if debug is enabled.
390 | * @access private
391 | */
392 | private function displayErrors()
393 | {
394 | echo '';
395 | foreach ($this->error as $single_error) {
396 | print_r($single_error);
397 | }
398 | echo '
';
399 | }
400 |
401 | /**
402 | * POP3 connection error handler.
403 | * @param integer $errno
404 | * @param string $errstr
405 | * @param string $errfile
406 | * @param integer $errline
407 | * @access private
408 | */
409 | private function catchWarning($errno, $errstr, $errfile, $errline)
410 | {
411 | $this->error[] = array(
412 | 'error' => "Connecting to the POP3 server raised a PHP warning: ",
413 | 'errno' => $errno,
414 | 'errstr' => $errstr,
415 | 'errfile' => $errfile,
416 | 'errline' => $errline
417 | );
418 | }
419 | }
420 |
--------------------------------------------------------------------------------
/scripts/auth/mail.awk:
--------------------------------------------------------------------------------
1 | function ltrim(s) { sub(/^[ \t\r\n]+/, "", s); return s }
2 | function rtrim(s) { sub(/[ \t\r\n]+$/, "", s); return s }
3 | function trim(s) { return rtrim(ltrim(s)); }
4 |
5 | $1 ~ /^Username/ {
6 |
7 | username = $2;
8 | getline;
9 | tag=$2
10 | content = gensub(/(.*):(.*)/,"\\2","g",$0);
11 | getline; getline; getline;
12 | location = gensub(/.*Location: (.*), UA:.*/,"\\1","g",$0);
13 | UA = gensub(/.*UA: (.*), AuthMethod:.*/,"\\1","g",$0);
14 | DATE = gensub(/.*TIME: (.*), .*/,"\\1","g",$0);
15 | IP = gensub(/^(.*) =.*/,"\\1","g",$0);
16 | IP = gensub(/(.*)\(.*\)/,"\\1","g",IP);
17 |
18 |
19 | if(tag ~ /Cities/){
20 | type="LOCATION"
21 | } else
22 | if(tag ~ /UserAgents/){
23 | type="UA"
24 | }
25 | print type"|"username"|"trim(DATE)"|"trim(IP)"|"trim(location)"|"trim(UA);
26 |
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/scripts/auth/sendmail.php:
--------------------------------------------------------------------------------
1 | IsSMTP();
35 | $mail->Host = "$SMTP";
36 | $mail->Port = "$PORTASMTP";
37 |
38 | $mail->SetFrom($EMAILREMETENTE, $NOMEREMETENTE);
39 | //$mail->AddReplyTo($REPLYTO,$REPLYTONOME);
40 | $mail->Subject = $ASSUNTO;
41 |
42 | $mail->AltBody = "use a HTML client"; // optional, comment out and test
43 | $mail->CharSet = "UTF-8";
44 | $mail->MsgHTML($body);
45 | //$mail->AddEmbeddedImage('cab.jpg', 'cab');
46 | //$mail->AddAttachment("imagem/phpmailer_mini.gif"); // attachment
47 |
48 | $mail->AddAddress($to);
49 | if(!$mail->Send()) {
50 | //echo "Mailer Error: " . $mail->ErrorInfo;
51 | //print_r($mail);
52 | } else {
53 | //echo "Message sent! to $to\n";
54 | }
55 |
56 | }
57 | ?>
58 |
--------------------------------------------------------------------------------
/scripts/bsfl:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # ###########################
3 | # Bash Shell Function Library
4 | # ###########################
5 | #
6 | # Author: Louwrentius
7 | # Contributions by: Jani Hurskainen
8 | #
9 | # Copyright © 2010
10 | #
11 | # Released under the curren GPL version.
12 | #
13 | # Description:
14 | #
15 | # This is a shell script library. It contains functions that can be called by
16 | # programs that include (source) this library.
17 | #
18 | # By simply sourcing this library, you can use all available functions as
19 | # documented on the projects page.
20 | #
21 | #
22 |
23 | BSFL_VERSION="2.00-beta-2"
24 |
25 | #
26 | # Do not edit this file. Just source it into your script
27 | # and override the variables to change their value.
28 | #
29 |
30 | init () {
31 |
32 | #
33 | # Debug mode shows more verbose output to screen and log files.
34 | # Value: yes or no (y / n)
35 | #
36 | DEBUG=no
37 |
38 | #
39 | # Syslog style log messages
40 | #
41 | if ! defined LOGDATEFORMAT
42 | then
43 | LOGDATEFORMAT="%b %e %H:%M:%S"
44 | fi
45 | if ! defined LOG_FILE
46 | then
47 | LOG_FILE=$0.log
48 | fi
49 |
50 | #
51 | # Enable / disable logging to a file
52 | # Value: yes or no (y / n)
53 | #
54 | if ! defined LOG_ENABLED
55 | then
56 | LOG_ENABLED=no
57 | fi
58 | if ! defined SYSLOG_ENABLED
59 | then
60 | SYSLOG_ENABLED=no
61 | fi
62 | if ! defined SYSLOG_TAG
63 | then
64 | SYSLOG_TAG=$0
65 | fi
66 |
67 | #
68 | # Use colours in output.
69 | #
70 | RED="tput setaf 1"
71 | GREEN="tput setaf 2"
72 | YELLOW="tput setaf 3"
73 | BLUE="tput setaf 4"
74 | MAGENTA="tput setaf 5"
75 | CYAN="tput setaf 6"
76 | LIGHT_BLUE="$CYAN"
77 | BOLD="tput bold"
78 | DEFAULT="tput sgr0"
79 |
80 | RED_BG="tput setab 1"
81 | GREEN_BG="tput setab 2"
82 | YELLOW_BG="tput setab 3"
83 | BLUE_BG="tput setab 4"
84 | MAGENTA_BG="tput setab 5"
85 | CYAN_BG="tput setab 6"
86 |
87 | #
88 | # Bug fix for Bash, parsing exclamation mark.
89 | #
90 | set +o histexpand
91 | #
92 | # returns 0 if a variable is defined (set)
93 | # returns 1 if a variable is unset
94 | #
95 |
96 | }
97 |
98 | function defined {
99 | [[ ${!1-X} == ${!1-Y} ]]
100 | }
101 |
102 | #
103 | # returns 0 if a variable is defined (set) and value's length > 0
104 | # returns 1 otherwise
105 | #
106 | function has_value {
107 | if defined $1; then
108 | if [[ -n ${!1} ]]; then
109 | return 0
110 | fi
111 | fi
112 | return 1
113 | }
114 |
115 | #
116 | # returns 0 if a directory exists
117 | # returns 1 otherwise
118 | #
119 | function directory_exists {
120 | if [[ -d "$1" ]]; then
121 | return 0
122 | fi
123 | return 1
124 | }
125 |
126 | #
127 | # returns 0 if a (regular) file exists
128 | # returns 1 otherwise
129 | #
130 | function file_exists {
131 | if [[ -f "$1" ]]; then
132 | return 0
133 | fi
134 | return 1
135 | }
136 |
137 | #
138 | # returns lowercase string
139 | #
140 | function tolower {
141 | echo "$1" | tr '[:upper:]' '[:lower:]'
142 | }
143 |
144 | #
145 | # returns uppercase string
146 | #
147 | function toupper {
148 | echo "$1" | tr '[:lower:]' '[:upper:]'
149 | }
150 |
151 | #
152 | # Only returns the first part of a string, delimited by tabs or spaces
153 | #
154 | function trim {
155 | echo $1
156 | }
157 |
158 | #
159 | # Dummy function to provide usage instructions.
160 | # Override this function if required.
161 | #
162 | show_usage () {
163 |
164 | MESSAGE="$1"
165 | echo "$MESSAGE"
166 | exit 1
167 | }
168 |
169 | #
170 | # Checks if a variable is set to "y" or "yes".
171 | # Usefull for detecting if a configurable option is set or not.
172 | #
173 | option_enabled () {
174 |
175 | VAR="$1"
176 | VAR_VALUE=$(eval echo \$$VAR)
177 | if [[ "$VAR_VALUE" == "y" ]] || [[ "$VAR_VALUE" == "yes" ]]
178 | then
179 | return 0
180 | else
181 | return 1
182 | fi
183 | }
184 |
185 | #
186 | # The log funcion just puts a string into a file, prepended with a date & time in
187 | # syslog format.
188 | #
189 |
190 | log2syslog () {
191 |
192 | if option_enabled SYSLOG_ENABLED
193 | then
194 | MESSAGE="$1"
195 | logger -t "$SYSLOG_TAG" " $MESSAGE" #The space is not a typo!"
196 | fi
197 | }
198 |
199 | #
200 | # This function writes messages to a log file and/or syslog
201 | # The only argument is a message that has to be logged.
202 | #
203 |
204 | log () {
205 |
206 | if option_enabled LOG_ENABLED || option_enabled SYSLOG_ENABLED
207 | then
208 | LOG_MESSAGE="$1"
209 | DATE=`date +"$LOGDATEFORMAT"`
210 |
211 | if has_value LOG_MESSAGE
212 | then
213 | LOG_STRING="$DATE $LOG_MESSAGE"
214 | else
215 | LOG_STRING="$DATE -- empty log message, no input received --"
216 | fi
217 |
218 | if option_enabled LOG_ENABLED
219 | then
220 | echo "$LOG_STRING" >> "$LOG_FILE"
221 | fi
222 |
223 | if option_enabled SYSLOG_ENABLED
224 | then
225 | #
226 | # Syslog already prepends a date/time stamp so only the message
227 | # is logged.
228 | #
229 | log2syslog "$LOG_MESSAGE"
230 | fi
231 | fi
232 | }
233 |
234 |
235 | #
236 | # This function basically replaces the 'echo' function in bash scripts.
237 | # The added functionality over echo is logging and using colors.
238 | #
239 | # The first argument is the string / message that must be displayed.
240 | # The second argument is the text color.
241 |
242 | msg () {
243 |
244 | MESSAGE="$1"
245 | COLOR="$2"
246 |
247 | if ! has_value COLOR
248 | then
249 | COLOR="$DEFAULT"
250 | fi
251 |
252 | if has_value "MESSAGE"
253 | then
254 | $COLOR
255 | echo "$MESSAGE"
256 | $DEFAULT
257 | log "$MESSAGE"
258 | else
259 | echo "-- no message received --"
260 | log "$MESSAGE"
261 | fi
262 | }
263 |
264 | #
265 | # This function echos a message
266 | # and displays the status at the end of the line.
267 | #
268 | # It can be used to create status messages other
269 | # than the default messages available such as
270 | # OK or FAIL
271 | #
272 | msg_status () {
273 |
274 | MESSAGE="$1"
275 | STATUS="$2"
276 |
277 | msg "$MESSAGE"
278 | display_status "$STATUS"
279 | }
280 |
281 | #
282 | # These functions are just short hand for messages like
283 | # msg_status "this message is ok" OK
284 | #
285 |
286 | #
287 | # The following functions are shorthand for
288 | # msg_status "a message" OK
289 | # msg_status "another message" FAIL
290 |
291 |
292 | msg_emergency () {
293 |
294 | MESSAGE="$1"
295 | STATUS="EMERGENCY"
296 | msg_status "$MESSAGE" "$STATUS"
297 | }
298 |
299 | msg_alert () {
300 |
301 | MESSAGE="$1"
302 | STATUS="ALERT"
303 | msg_status "$MESSAGE" "$STATUS"
304 | }
305 |
306 | msg_critical () {
307 |
308 | MESSAGE="$1"
309 | STATUS="CRITICAL"
310 | msg_status "$MESSAGE" "$STATUS"
311 | }
312 |
313 | msg_error () {
314 |
315 | MESSAGE="$1"
316 | STATUS="ERROR"
317 | msg_status "$MESSAGE" "$STATUS"
318 | }
319 |
320 | msg_warning () {
321 |
322 | MESSAGE="$1"
323 | STATUS="WARNING"
324 | msg_status "$MESSAGE" "$STATUS"
325 | }
326 |
327 | msg_notice () {
328 | MESSAGE="$1"
329 | STATUS="NOTICE"
330 | msg_status "$MESSAGE" "$STATUS"
331 | }
332 |
333 | msg_info () {
334 | MESSAGE="$1"
335 | STATUS="INFO"
336 | msg_status "$MESSAGE" "$STATUS"
337 | }
338 |
339 | msg_debug () {
340 | MESSAGE="$1"
341 | STATUS="DEBUG"
342 | msg_status "$MESSAGE" "$STATUS"
343 | }
344 |
345 | msg_ok () {
346 |
347 | MESSAGE="$1"
348 | STATUS="OK"
349 | msg_status "$MESSAGE" "$STATUS"
350 | }
351 |
352 | msg_not_ok () {
353 |
354 | MESSAGE="$1"
355 | STATUS="NOT_OK"
356 | msg_status "$MESSAGE" "$STATUS"
357 | }
358 |
359 | msg_fail () {
360 |
361 | MESSAGE="$1"
362 | STATUS="FAILED"
363 | msg_status "$MESSAGE" "$STATUS"
364 | }
365 |
366 | msg_success () {
367 | MESSAGE="$1"
368 | STATUS="SUCCESS"
369 | msg_status "$MESSAGE" "$STATUS"
370 | }
371 |
372 | msg_passed () {
373 | MESSAGE="$1"
374 | STATUS="PASSED"
375 | msg_status "$MESSAGE" "$STATUS"
376 | }
377 |
378 | check_status () {
379 |
380 | CMD="$1"
381 | STATUS="$2"
382 |
383 | if [ "$STATUS" == "0" ]
384 | then
385 | msg_ok "$CMD"
386 | else
387 | msg_fail "$CMD"
388 | fi
389 | }
390 |
391 | #
392 | # Private function
393 | #
394 | # This is a function that just positions
395 | # the cursor one row up and to the right.
396 | # It then prints a message with specified
397 | # Color
398 | # It is used for displaying colored status messages on the
399 | # Right side of the screen.
400 | #
401 | # ARG1 = "status message (OK / FAIL)"
402 | # ARG2 = The color in which the status is displayed.
403 | #
404 | raw_status () {
405 |
406 | STATUS="$1"
407 | COLOR="$2"
408 |
409 | function position_cursor () {
410 |
411 | let RES_COL=`tput cols`-12
412 | tput cuf $RES_COL
413 | tput cuu1
414 | }
415 |
416 | position_cursor
417 | echo -n "["
418 | $DEFAULT
419 | $BOLD
420 | $COLOR
421 | echo -n "$STATUS"
422 | $DEFAULT
423 | echo "]"
424 | log "Status = $STATUS"
425 | }
426 |
427 | #
428 | # This function converts a status message to a particular color.
429 | #
430 | display_status () {
431 |
432 |
433 | STATUS="$1"
434 |
435 | case $STATUS in
436 |
437 | EMERGENCY )
438 | STATUS="EMERGENCY"
439 | COLOR="$RED"
440 | ;;
441 | ALERT )
442 | STATUS=" ALERT "
443 | COLOR="$RED"
444 | ;;
445 | CRITICAL )
446 | STATUS="CRITICAL "
447 | COLOR="$RED"
448 | ;;
449 | ERROR )
450 | STATUS=" ERROR "
451 | COLOR="$RED"
452 | ;;
453 |
454 | WARNING )
455 | STATUS=" WARNING "
456 | COLOR="$YELLOW"
457 | ;;
458 |
459 | NOTICE )
460 | STATUS=" NOTICE "
461 | COLOR="$BLUE"
462 | ;;
463 | INFO )
464 | STATUS=" INFO "
465 | COLOR="$LIGHT_BLUE"
466 | ;;
467 | DEBUG )
468 | STATUS=" DEBUG "
469 | COLOR="$DEFAULT"
470 | ;;
471 |
472 | OK )
473 | STATUS=" OK "
474 | COLOR="$GREEN"
475 | ;;
476 | NOT_OK)
477 | STATUS=" NOT OK "
478 | COLOR="$RED"
479 | ;;
480 |
481 | PASSED )
482 | STATUS=" PASSED "
483 | COLOR="$GREEN"
484 | ;;
485 |
486 | SUCCESS )
487 | STATUS=" SUCCESS "
488 | COLOR="$GREEN"
489 | ;;
490 |
491 | FAILURE | FAILED )
492 | STATUS=" FAILED "
493 | COLOR="$RED"
494 | ;;
495 |
496 | *)
497 | STATUS="UNDEFINED"
498 | COLOR="$YELLOW"
499 | esac
500 |
501 | raw_status "$STATUS" "$COLOR"
502 | }
503 |
504 | #
505 | # Exit with error status
506 | #
507 | bail () {
508 |
509 | ERROR="$?"
510 | MSG="$1"
511 | if [ ! "$ERROR" = "0" ]
512 | then
513 | msg_fail "$MSG"
514 | exit "$ERROR"
515 | fi
516 | }
517 |
518 | #
519 | # This function executes a command provided as a parameter
520 | # The function then displays if the command succeeded or not.
521 | #
522 | cmd () {
523 |
524 | COMMAND="$1"
525 | msg "Executing: $COMMAND"
526 |
527 | RESULT=`eval $COMMAND 2>&1`
528 | msg "$RESULT"
529 | ERROR="$?"
530 |
531 | MSG="Command: ${COMMAND:0:29}..."
532 |
533 | tput cuu1
534 |
535 | if [ "$ERROR" == "0" ]
536 | then
537 | msg_ok "$MSG"
538 | if [ "$DEBUG" == "1" ]
539 | then
540 | msg "$RESULT"
541 | fi
542 | else
543 | msg_fail "$MSG"
544 | log "$RESULT"
545 | fi
546 |
547 | return "$ERROR"
548 | }
549 |
550 | #
551 | # These functions can be used for timing how long (a) command(s) take to
552 | # execute.
553 | #
554 | now () {
555 |
556 | echo $(date +%s)
557 | }
558 |
559 | elapsed () {
560 |
561 | START="$1"
562 | STOP="$2"
563 |
564 | echo $(( STOP - START ))
565 | }
566 |
567 | #
568 | # Prints an error message ($2) to stderr and exits with the return code ($1).
569 | # The message is also logged.
570 | #
571 | function die {
572 | local -r err_code="$1"
573 | local -r err_msg="$2"
574 | local -r err_caller="${3:-$(caller 0)}"
575 |
576 | msg_fail "ERROR: $err_msg"
577 | msg_fail "ERROR: At line $err_caller"
578 | msg_fail "ERROR: Error code = $err_code"
579 | exit "$err_code"
580 | } >&2 # function writes to stderr
581 |
582 | #
583 | # Check if a return code ($1) indicates an error (i.e. >0) and prints an error
584 | # message ($2) to stderr and exits with the return code ($1).
585 | # The error is also logged.
586 | #
587 | # Die if error code is false.
588 | #
589 | function die_if_false {
590 | local -r err_code=$1
591 | local -r err_msg=$2
592 | local -r err_caller=$(caller 0)
593 |
594 | if [[ "$err_code" != "0" ]]
595 | then
596 | die $err_code "$err_msg" "$err_caller"
597 | fi
598 | } >&2 # function writes to stderr
599 |
600 | #
601 | # Dies when error code is true
602 | #
603 | function die_if_true {
604 | local -r err_code=$1
605 | local -r err_msg=$2
606 | local -r err_caller=$(caller 0)
607 |
608 | if [[ "$err_code" == "0" ]]
609 | then
610 | die $err_code "$err_msg" "$err_caller"
611 | fi
612 | } >&2 # function writes to stderr
613 |
614 | #
615 | # Replace some text inside a string.
616 | #
617 | function str_replace () {
618 | local ORIG="$1"
619 | local DEST="$2"
620 | local DATA="$3"
621 |
622 | echo "$DATA" | sed "s/$ORIG/$DEST/g"
623 | }
624 |
625 | #
626 | # Replace string of text in file.
627 | # Uses the ed editor to replace the string.
628 | #
629 | # arg1 = string to be matched
630 | # arg2 = new string that replaces matched string
631 | # arg3 = file to operate on.
632 | #
633 | function str_replace_in_file () {
634 | local ORIG="$1"
635 | local DEST="$2"
636 | local FILE="$3"
637 |
638 | has_value FILE
639 | die_if_false $? "Empty argument 'file'"
640 | file_exists "$FILE"
641 | die_if_false $? "File does not exist"
642 |
643 | printf ",s/$ORIG/$DEST/g\nw\nQ" | ed -s "$FILE" > /dev/null 2>&1
644 | return "$?"
645 | }
646 |
647 | cmd_su () {
648 |
649 | USER="$1"
650 | COMMAND="$2"
651 |
652 | msg "Executing: $COMMAND"
653 |
654 | RESULT=`su $USER -c "$COMMAND" 2>&1`
655 | ERROR="$?"
656 |
657 | MSG="Command: ${COMMAND:0:29}..."
658 |
659 | tput cuu1
660 |
661 | if [ "$ERROR" == "0" ]
662 | then
663 | msg_ok "$MSG"
664 | if [ "$DEBUG" == "1" ]
665 | then
666 | msg "$RESULT"
667 | fi
668 | else
669 | msg_fail "$MSG"
670 | log "$RESULT"
671 | fi
672 |
673 | return "$ERROR"
674 | }
675 |
676 | init
677 |
678 |
--------------------------------------------------------------------------------
/scripts/conf/auth.conf:
--------------------------------------------------------------------------------
1 | location {
2 | disabled = "0"
3 | allowedRadix = "300"
4 | excludedCities = "Set(Campinas)"
5 | reverseDomainsWhitelist = "Set(google.com,gmail.com)"
6 | }
7 | system {
8 | disabled = "0"
9 | excludedCities = "Set()"
10 | reverseDomainsWhitelist = "Set(google.com,gmail.com)"
11 | }
12 | useragent {
13 | disabled = "0"
14 | excludedCities = "Set()"
15 | reverseDomainsWhitelist = "Set(google.com,gmail.com)"
16 | }
17 |
--------------------------------------------------------------------------------
/scripts/conf/sflow.conf:
--------------------------------------------------------------------------------
1 | general {
2 | excludeIPs = "Set()"
3 | }
4 |
5 | abusedSMTP {
6 | disabled = "0"
7 | minBytes = "50000000"
8 | excludeIPs = "Set()"
9 | }
10 |
11 | alert {
12 | maxFlowList = "1000"
13 | }
14 |
15 | alien {
16 | disabled = "0"
17 | minPairs = "20"
18 | excludeIPs = "Set()"
19 | }
20 |
21 | atypicalAlienPorts {
22 | disabled = "0"
23 | minPacketsPerFlow = "2"
24 | excludeIPs = "Set()"
25 | }
26 |
27 | atypicalData {
28 | disabled = "0"
29 | minBytes = "5737418240"
30 | excludeIPs = "Set()"
31 | }
32 |
33 | atypicalPairs {
34 | disabled = "0"
35 | minPairs = "300"
36 | excludeIPs = "Set()"
37 | }
38 |
39 | atypicalPorts {
40 | disabled = "0"
41 | excludePorts = "Set(80,443,587,465,993,995)"
42 | minPacketsPerFlow = "2"
43 | excludeIPs = "Set()"
44 | }
45 |
46 | bigProviders {
47 | disabled = "0"
48 | minBytes = "1073741824"
49 | }
50 |
51 | BotNet {
52 | disabled = "0"
53 | minPktsPerFlow = "20"
54 | excludeIPs = "Set()"
55 | }
56 |
57 | DDoS {
58 | disabled = "0"
59 | minFlows = "50"
60 | minPairs = "20"
61 | excludeIPs = "Set()"
62 | }
63 |
64 | dnsTunnel {
65 | disabled = "0"
66 | minBytes = "50000000"
67 | excludeIPs = "Set()"
68 | }
69 |
70 | hPortScan {
71 | disabled = "0"
72 | excludeAlienPorts = "Set(80,443,53)"
73 | excludeAlienPorts = "Set(80,443,587,465,993,995)"
74 | excludeMyPorts = "Set(123)"
75 | minFlows = "100"
76 | excludeIPs = "Set()"
77 | }
78 |
79 | ICMPTunnel {
80 | disabled = "0"
81 | minBytes = "100000000"
82 | minPacket = "200"
83 | excludeIPs = "Set()"
84 | }
85 |
86 | mediaStreaming {
87 | disabled = "0"
88 | maxDuration = "7200"
89 | maxUploadBytes = "10000000"
90 | minDownloadBytes = "1000000"
91 | minDuration = "300"
92 | excludePorts = "Set(1194)"
93 | excludeIPs = "Set()"
94 | }
95 |
96 | p2p {
97 | disabled = "0"
98 | minBytes2nd = "10000000"
99 | minPairs2nd = "10"
100 | minPairs = "5"
101 | minPorts2nd = "10"
102 | minPorts = "4"
103 | excludeIPs = "Set()"
104 | }
105 |
106 | SMTPTalkers {
107 | disabled = "0"
108 | minBytes = "20971520"
109 | excludeIPs = "Set()"
110 | }
111 |
112 | topTalkers {
113 | disabled = "0"
114 | threshold = "21474836480"
115 | excludeIPs = "Set()"
116 | }
117 |
118 | vPortScan {
119 | disabled = "0"
120 | maxPortNumber = "1024"
121 | minPorts = "3"
122 | excludeIPs = "Set()"
123 | }
124 |
125 | UDPAmplifier {
126 | disabled = "0"
127 | excludeIPs = "Set()"
128 | minPktsPerFlow = "2000"
129 | }
130 |
--------------------------------------------------------------------------------
/scripts/createReputationList.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | FILE=$1
4 |
5 |
6 | #TTalker|whitelist|Big Talker|10.1.2.226
7 |
8 | cat $FILE | while read line ; do
9 | list=`echo $line | cut -d'|' -f1`
10 | listType=`echo $line | cut -d'|' -f2`
11 | description=`echo $line | cut -d'|' -f3`
12 | ip=`echo $line | cut -d'|' -f4`
13 |
14 | cat << EOF
15 | put 'hogzilla_reputation', '$ip', 'rep:description', '$description'
16 | put 'hogzilla_reputation', '$ip', 'rep:ip', '$ip'
17 | put 'hogzilla_reputation', '$ip', 'rep:list', '$list'
18 | put 'hogzilla_reputation', '$ip', 'rep:list_type', '$listType'
19 |
20 | EOF
21 | done
22 |
--------------------------------------------------------------------------------
/scripts/create_conf.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | grep -r HogConfig.get ../src | sed 's/.*HogConfig.get.*(config[ ]*,[ ]*"\([a-Z0-9.]*\)",\([ a-Z0-9,"()]*\)).*/\1 = \2/' | sort | sed 's/\"//g' | sed 's/\(.*\)\.\(.*\) = \(.*\)/\1 {\n\t\t\2 = "\3"\n\t}/g'
4 |
--------------------------------------------------------------------------------
/scripts/hz-utils/genCnCList.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #
4 | # C&C list
5 |
6 | URL="https://rules.emergingthreats.net/blockrules/emerging-botcc.rules"
7 |
8 |
9 | TMPRULES=`mktemp -t rules.XXXXXXX` || exit 1
10 |
11 | wget -q -O $TMPRULES https://rules.emergingthreats.net/blockrules/emerging-botcc.rules
12 |
13 | cat $TMPRULES | grep -v "^#" | cut -d "[" -f2 | cut -d "]" -f1 | sed 's/,/\n/g'
14 |
15 | rm -f $TMPRULES
16 |
--------------------------------------------------------------------------------
/scripts/hz-utils/getReposList.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | ## Create repositories IPs list
4 | #
5 | # Usage: ./getReposList.sh
6 | # Ex: ./getReposList.sh windows
7 |
8 |
9 | if test -z $1
10 | then
11 | echo "Usage: ./getReposList.sh "
12 | echo "System: windows, linux, freebsd, apple, or android"
13 | exit 1
14 | fi
15 |
16 | #### URLs utilizadas pelos sistemas
17 |
18 |
19 | # Windows
20 | URLs_windows="windowsupdate.microsoft.com update.microsoft.com windowsupdate.com download.windowsupdate.com download.microsoft.com download.windowsupdate.com ntservicepack.microsoft.com time.windows.com javadl-esd.sun.com fpdownload.adobe.com cache.pack.google.com aus2.mozilla.org aus3.mozilla.org aus4.mozilla.org avast.com files.avast.com"
21 |
22 | # Linux
23 | URLs_linux="security.ubuntu.com security.debian.org mirrorlist.centos.org 0.rhel.pool.ntp.org 1.rhel.pool.ntp.org 2.rhel.pool.ntp.org ntp.ubuntu.com linux.dropbox.com"
24 |
25 | #Android
26 | URLs_android="play.google.com android.clients.google.com"
27 |
28 | # Apple
29 | URLs_ios="phobos.apple.com deimos3.apple.com albert.apple.com gs.apple.com itunes.apple.com ax.itunes.apple.com"
30 |
31 | # FreeBSD
32 | URLs_bsd="ftp.freebsd.org"
33 |
34 | function IP6_to_long()
35 | {
36 | INPUT="$(tr 'A-F' 'a-f' <<< "$@")"
37 | O=""
38 | while [ "$O" != "$INPUT" ]; do
39 | O="$INPUT"
40 | # fill all words with zeroes
41 | INPUT="$( sed 's|:\([0-9a-f]\{3\}\):|:0\1:|g' <<< "$INPUT" )"
42 | INPUT="$( sed 's|:\([0-9a-f]\{3\}\)$|:0\1|g' <<< "$INPUT")"
43 | INPUT="$( sed 's|^\([0-9a-f]\{3\}\):|0\1:|g' <<< "$INPUT" )"
44 | INPUT="$( sed 's|:\([0-9a-f]\{2\}\):|:00\1:|g' <<< "$INPUT")"
45 | INPUT="$( sed 's|:\([0-9a-f]\{2\}\)$|:00\1|g' <<< "$INPUT")"
46 | INPUT="$( sed 's|^\([0-9a-f]\{2\}\):|00\1:|g' <<< "$INPUT")"
47 | INPUT="$( sed 's|:\([0-9a-f]\):|:000\1:|g' <<< "$INPUT")"
48 | INPUT="$( sed 's|:\([0-9a-f]\)$|:000\1|g' <<< "$INPUT")"
49 | INPUT="$( sed 's|^\([0-9a-f]\):|000\1:|g' <<< "$INPUT")"
50 | done
51 | # now expand the ::
52 | ZEROES=""
53 | grep -qs "::" <<< "$INPUT"
54 | if [ "$?" -eq 0 ]; then
55 | GRPS="$(sed 's|[0-9a-f]||g' <<< "$INPUT" | wc -m)"
56 | ((GRPS--)) # carriage return
57 | ((MISSING=8-GRPS))
58 | for ((i=0;i<$MISSING;i++)); do
59 | ZEROES="$ZEROES:0000"
60 | done
61 | # be careful where to place the :
62 | INPUT="$( sed 's|\(.\)::\(.\)|\1'$ZEROES':\2|g' <<< "$INPUT")"
63 | INPUT="$( sed 's|\(.\)::$|\1'$ZEROES':0000|g' <<< "$INPUT")"
64 | INPUT="$( sed 's|^::\(.\)|'$ZEROES':0000:\1|g;s|^:||g' <<< "$INPUT")"
65 |
66 | fi
67 |
68 | # an expanded address has 39 chars + CR
69 | if [ $(echo $INPUT | wc -m) != 40 ]; then
70 | echo "invalid IPv6 Address"
71 | fi
72 |
73 | # echo the fully expanded version of the address
74 | echo $INPUT
75 | }
76 |
77 | function GET_IP_WINDOWS()
78 | {
79 | IPs_win=""
80 | for url in $(echo $URLs_windows)
81 | do
82 | IPs_win=`echo $IPs_win & host $url | sed 's/IPv6 //g'| cut -d " " -f4 | grep "^[0-9]"`
83 | done
84 | echo $IPs_win | sed 's/ /\n/g' | sort -u | grep -v ":"
85 | IPv6=`echo $IPs_win | sed 's/ /\n/g' | sort -u | grep ":"`
86 | for ip6 in $(echo $IPv6)
87 | do
88 | IP6_to_long $ip6
89 | done
90 | }
91 | function GET_IP_LINUX()
92 | {
93 | IPs_lnx=""
94 | for url in $(echo $URLs_linux)
95 | do
96 | IPs_lnx=`echo $IPs_lnx & host $url | sed 's/IPv6 //g'| cut -d " " -f4 | grep "^[0-9]"`
97 | done
98 | echo $IPs_lnx | sed 's/ /\n/g' |sort -u | grep -v ":"
99 | IPv6=`echo $IPs_lnx | sed 's/ /\n/g' | sort -u | grep ":"`
100 | for ip6 in $(echo $IPv6)
101 | do
102 | IP6_to_long $ip6
103 | done
104 |
105 | }
106 |
107 | function GET_IP_ANDROID()
108 | {
109 | IPs_andr=""
110 | for url in $(echo $URLs_android)
111 | do
112 | IPs_andr=`echo $IPs_andr & host $url | sed 's/IPv6 //g'| cut -d " " -f4 | grep "^[0-9]"`
113 | done
114 | echo $IPs_andr | sed 's/ /\n/g' |sort -u | grep -v ":"
115 | IPv6=`echo $IPs_andr | sed 's/ /\n/g' | sort -u | grep ":"`
116 | for ip6 in $(echo $IPv6)
117 | do
118 | IP6_to_long $ip6
119 | done
120 |
121 | }
122 | function GET_IP_IOS()
123 | {
124 | IPs_ios=""
125 | for url in $(echo $URLs_ios)
126 | do
127 | IPs_ios=`echo $IPs_ios & host $url | sed 's/IPv6 //g'| cut -d " " -f4 | grep "^[0-9]"`
128 | done
129 | echo $IPs_ios | sed 's/ /\n/g' |sort -u | grep -v ":"
130 | IPv6=`echo $IPs_ios | sed 's/ /\n/g' | sort -u | grep ":"`
131 | for ip6 in $(echo $IPv6)
132 | do
133 | IP6_to_long $ip6
134 | done
135 |
136 | }
137 |
138 |
139 | function GET_IP_FREEBSD()
140 | {
141 | IPs_bsd=""
142 | for url in $(echo $URLs_bsd)
143 | do
144 | IPs_bsd=`echo $IPs_bsd & host $url | sed 's/IPv6 //g'| cut -d " " -f4 | grep "^[0-9]"`
145 | done
146 | echo $IPs_bsd | sed 's/ /\n/g' |sort -u | grep -v ":"
147 | IPv6=`echo $IPs_bsd | sed 's/ /\n/g' | sort -u | grep ":"`
148 | for ip6 in $(echo $IPv6)
149 | do
150 | IP6_to_long $ip6
151 | done
152 | }
153 |
154 |
155 | case $1 in
156 | windows) GET_IP_WINDOWS;;
157 | linux) GET_IP_LINUX;;
158 | freebsd) GET_IP_FREEBSD;;
159 | android) GET_IP_ANDROID;;
160 | apple) GET_IP_IOS;;
161 |
162 | *) echo "Invalid Option"
163 | ;;
164 | esac
165 |
--------------------------------------------------------------------------------
/scripts/hz-utils/start-all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | HADOOP_HOME=/home/hogzilla/hadoop
4 | HBASE_HOME=/home/hogzilla/hbase
5 |
6 | whoami | grep root > /dev/null
7 | if [ $? -eq 0 ] ; then
8 | su - hogzilla -c "$HADOOP_HOME/sbin/start-dfs.sh"
9 | su - hogzilla -c "$HADOOP_HOME/sbin/start-yarn.sh"
10 | su - hogzilla -c "$HBASE_HOME/bin/start-hbase.sh"
11 | su - hogzilla -c "$HBASE_HOME/bin/hbase-daemon.sh start thrift"
12 | su - hogzilla -c "/home/hogzilla/bin/start-pigtail.sh"
13 | su - hogzilla -c "/home/hogzilla/hadoop/bin/hdfs dfsadmin -safemode leave"
14 | su - hogzilla -c "/home/hogzilla/bin/start-hogzilla.sh"
15 | su - hogzilla -c "/home/hogzilla/bin/start-sflow2hz.sh"
16 | su - hogzilla -c "/home/hogzilla/bin/start-dbupdates.sh"
17 | else
18 | $HADOOP_HOME/sbin/start-dfs.sh
19 | $HADOOP_HOME/sbin/start-yarn.sh
20 | $HBASE_HOME/bin/start-hbase.sh
21 | $HBASE_HOME/bin/hbase-daemon.sh start thrift
22 | /home/hogzilla/bin/start-pigtail.sh
23 | /home/hogzilla/hadoop/bin/hdfs dfsadmin -safemode leave
24 | /home/hogzilla/bin/start-hogzilla.sh
25 | /home/hogzilla/bin/start-sflow2hz.sh
26 | /home/hogzilla/bin/start-dbupdates.sh
27 | fi
28 |
--------------------------------------------------------------------------------
/scripts/hz-utils/start-dbupdates.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | BIN="/home/hogzilla/bin"
4 |
5 | cd $BIN
6 |
7 | (
8 | while : ; do
9 | $BIN/genCnCList.sh > /tmp/cclist.temp
10 | php $BIN/updateReputationList.php -t blacklist -n CCBotNet -f /tmp/cclist.temp &>/dev/null
11 | rm -f /tmp/cclist.temp
12 |
13 | for os in windows linux freebsd android apple ; do
14 | $BIN/getReposList.sh $os > /tmp/$os.txt
15 | php $BIN/updateReputationList.php -t $os -n OSRepo -f /tmp/$os.txt &>/dev/null
16 | rm -f /tmp/$os.txt
17 | done
18 |
19 | sleep 86400 # daily
20 | done
21 | )&
22 |
--------------------------------------------------------------------------------
/scripts/hz-utils/start-hogzilla.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Confira as variaveis abaixo
4 | HBASE_PATH=/home/hogzilla/hbase
5 | HBASE_VERSION="1.2.6"
6 |
7 | # Needed by the AuthModule
8 | HOGDIR="/home/hogzilla/hogzilla"
9 | EXTRAJAR=",$HOGDIR/jars/uap-scala_2.10-0.2.1-SNAPSHOT.jar,$HOGDIR/jars/snakeyaml-1.18.jar"
10 | FILES="--files $HOGDIR/conf/sflow.conf,$HOGDIR/conf/auth.conf"
11 |
12 |
13 |
14 | (while : ; do
15 | #cd /home/hogzilla
16 | /home/hogzilla/spark/bin/spark-submit \
17 | --master yarn-cluster \
18 | --num-executors 2 \
19 | --driver-memory 1g \
20 | --executor-memory 3g \
21 | --executor-cores 4 \
22 | --jars $HBASE_PATH/lib/hbase-annotations-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-annotations-$HBASE_VERSION-tests.jar,$HBASE_PATH/lib/hbase-client-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-common-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-common-$HBASE_VERSION-tests.jar,$HBASE_PATH/lib/hbase-examples-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-hadoop2-compat-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-hadoop-compat-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-it-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-it-$HBASE_VERSION-tests.jar,$HBASE_PATH/lib/hbase-prefix-tree-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-procedure-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-protocol-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-rest-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-server-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-server-$HBASE_VERSION-tests.jar,$HBASE_PATH/lib/hbase-shell-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-thrift-$HBASE_VERSION.jar,$HBASE_PATH/lib/htrace-core-3.1.0-incubating.jar,$HBASE_PATH/lib/guava-12.0.1.jar,$HBASE_PATH/lib/metrics-core-2.2.0.jar$EXTRAJAR --driver-class-path ./$HBASE_PATH/conf/ $FILES --class Hogzilla /home/hogzilla/Hogzilla.jar &> /tmp/hogzilla.log &
23 |
24 | sleep 21600 # 6h
25 |
26 | #rm -rf /tmp/hadoop-hogzilla*
27 |
28 | done) &
29 |
30 |
--------------------------------------------------------------------------------
/scripts/hz-utils/start-pigtail.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | (
4 | cd /home/hogzilla/pigtail/
5 | while : ; do
6 | php /home/hogzilla/pigtail/pigtail.php
7 | sleep 10
8 | done
9 | )&
10 |
--------------------------------------------------------------------------------
/scripts/hz-utils/start-sflow2hz.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | BINPATH="/home/hogzilla/bin"
4 |
5 | (
6 | while : ; do
7 | sflowtool -p 6343 -l | $BINPATH/sflow2hz -h 127.0.0.1 -p 9090 &> /dev/null
8 | sleep 300
9 | done )>&/dev/null &
10 |
--------------------------------------------------------------------------------
/scripts/hz-utils/stop-all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | HADOOP_HOME=/home/hogzilla/hadoop
4 | HBASE_HOME=/home/hogzilla/hbase
5 |
6 | /home/hogzilla/bin/stop-pigtail.sh
7 | /home/hogzilla/bin/stop-hogzilla.sh
8 | /home/hogzilla/bin/stop-sflow2hz.sh
9 | /home/hogzilla/bin/stop-dbupdates.sh
10 |
11 | $HBASE_HOME/bin/hbase-daemon.sh stop thrift
12 | $HBASE_HOME/bin/stop-hbase.sh
13 | $HADOOP_HOME/sbin/stop-dfs.sh
14 | $HADOOP_HOME/sbin/stop-yarn.sh
15 |
--------------------------------------------------------------------------------
/scripts/hz-utils/stop-dbupdates.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | ps auxw | grep start-dbupdates.sh | grep -v grep | awk '{print $2}' | xargs kill -9
4 |
--------------------------------------------------------------------------------
/scripts/hz-utils/stop-hogzilla.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | ps auxw | grep start-hogzilla | grep -v grep | awk '{print $2}' | xargs kill -9
4 |
--------------------------------------------------------------------------------
/scripts/hz-utils/stop-pigtail.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | ps auxw | grep start-pigtail | grep -v grep | awk '{print $2}' | xargs kill -9
5 |
--------------------------------------------------------------------------------
/scripts/hz-utils/stop-sflow2hz.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 |
4 | ps auxw | grep start-sflow2hz | grep -v grep | awk '{print $2}' | xargs kill -9
5 | ps auxw | grep sflowtool | grep -v grep | awk '{print $2}' | xargs kill -9
6 | ps auxw | grep sflow2hz | grep -v grep | awk '{print $2}' | xargs kill -9
7 |
--------------------------------------------------------------------------------
/scripts/hz-utils/updateReputationList.php:
--------------------------------------------------------------------------------
1 |
5 | *
6 | * This program is free software; you can redistribute it and/or modify
7 | * it under the terms of the GNU General Public License Version 2 as
8 | * published by the Free Software Foundation. You may not use, modify or
9 | * distribute this program under any other version of the GNU General
10 | * Public License.
11 | *
12 | * This program is distributed in the hope that it will be useful,
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | * GNU General Public License for more details.
16 | *
17 | * You should have received a copy of the GNU General Public License
18 | * along with this program; if not, write to the Free Software
19 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 | *
21 | * MORE CREDITS
22 | * - Contribute and put your "Name - Contribution" here.
23 | *
24 | * USING THIS SCRIPT
25 | *
26 | * 1. Run
27 | * /usr/bin/php updateReputationList.php list
28 | *
29 | * ATTENTION: This PHP script must run in CLI!
30 | *
31 | * If you have any problems, let us know!
32 | * See how to get help at http://ids-hogzilla.org/post/community/
33 | */
34 |
35 | // Some useful variables
36 | $hbaseHost="localhost"; /* Host or IP of your HBase */
37 | $hbasePort=9090;
38 |
39 | $GLOBALS['THRIFT_ROOT'] = '/usr/share/php';
40 |
41 | define("DEBUG",true);
42 |
43 | // Thrift stuff
44 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/ClassLoader/ThriftClassLoader.php');
45 |
46 | $classLoader = new Thrift\ClassLoader\ThriftClassLoader();
47 | $classLoader->registerNamespace('Thrift', $GLOBALS['THRIFT_ROOT']);
48 | $classLoader->register();
49 |
50 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/Transport/TSocket.php');
51 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/Transport/TBufferedTransport.php');
52 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/Protocol/TBinaryProtocol.php');
53 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/Packages/Hbase/Hbase.php');
54 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/Packages/Hbase/Types.php');
55 |
56 | $socket = new Thrift\Transport\TSocket($hbaseHost, $hbasePort);
57 | $socket->setSendTimeout(10000);
58 | $socket->setRecvTimeout(20000);
59 | $transport = new Thrift\Transport\TBufferedTransport($socket);
60 | $protocol = new Thrift\Protocol\TBinaryProtocol($transport);
61 | $client = new Hbase\HbaseClient($protocol);
62 |
63 |
64 | /*
65 | * BEGIN
66 | */
67 |
68 | // Parse arguments
69 | if(DEBUG) { echo "Parse options\n" ;}
70 | $options = getopt("t:n:f:");
71 | $listType=@$options["t"];
72 | $listName=@$options["n"];
73 | $listFile=@$options["f"];
74 |
75 | if(strlen($listType) ==0 || strlen($listName) ==0 || strlen($listFile) ==0 )
76 | {
77 | echo "Usage: php updateReputationList.php -t ListType -n ListName -f file \n";
78 | echo "Examples: php updateReputationList.php -t whitelist -n MX -f file_one_ip_per_line.txt \n";
79 | echo " php updateReputationList.php -t whitelist -n TTalker -f file_one_ip_per_line.txt \n";
80 | exit;
81 | }
82 |
83 |
84 | // Open file
85 | if(DEBUG) { echo "Open file\n" ;}
86 | $fileHandle = fopen($listFile, "r");
87 | if(!$fileHandle) {
88 | echo "Error opening file $listFile .";
89 | exit;
90 | }
91 |
92 | // Open connections
93 | if(DEBUG) { echo "Open connection\n" ;}
94 | $transport->open();
95 |
96 | // Scan+Filter on HBase
97 | $filter = array();
98 | $filter[] = "SingleColumnValueFilter('rep', 'list_type', =, 'binary:".$listType."')";
99 | $filter[] = "SingleColumnValueFilter('rep', 'list', =, 'binary:".$listName."')";
100 | $filterString = implode(" AND ", $filter);
101 | $scanFilter = new Hbase\TScan();
102 | $scanFilter->filterString = $filterString;
103 | $scanner = $client->scannerOpenWithScan("hogzilla_reputation", $scanFilter, array());
104 |
105 | // Delete rows, iterating
106 | if(DEBUG) { echo "Deleting current list from HBase\n" ;}
107 | try
108 | {
109 | while (true)
110 | {
111 | $row=$client->scannerGet($scanner);
112 | if(sizeof($row)==0) break;
113 | if(DEBUG) {
114 | $values = $row[0]->columns;
115 | $ip = $values["rep:ip"]->value;
116 | echo "Deleting $ip from list $listName/$listType\n" ;
117 | }
118 | $client->deleteAllRow("hogzilla_reputation", $row[0]->row, array());
119 | }
120 | $client->scannerClose($scanner);
121 |
122 | // Iterate file
123 | while (($ip = fgets($fileHandle)) !== false)
124 | {
125 | // Parse
126 | preg_replace( "/\r|\n/", "", $ip );
127 | $ip=trim($ip);
128 | // Create mutation
129 | $mutations = array();
130 | $dataIP = array(
131 | 'column' => "rep:ip",
132 | 'value' => $ip
133 | );
134 | $dataListName = array('column' => "rep:list", 'value' => $listName );
135 | $dataListType = array('column' => "rep:list_type", 'value' => $listType );
136 | $dataListDesc = array('column' => "rep:description", 'value' => "" );
137 | $mutations[] = new Hbase\Mutation($dataIP);
138 | $mutations[] = new Hbase\Mutation($dataListName);
139 | $mutations[] = new Hbase\Mutation($dataListType);
140 | $mutations[] = new Hbase\Mutation($dataListDesc);
141 | // Insert mutations
142 | $client->mutateRow("hogzilla_reputation", $ip."-".$listName."-".$listType, $mutations, array());
143 | }
144 | } catch(Exception $e)
145 | {
146 | echo 'ERROR: ', $e->getMessage(), "\n";
147 | }
148 |
149 | // Close file
150 | fclose($fileHandle);
151 |
152 | // Close connections (HBase)
153 | $transport->close();
154 |
155 | ?>
156 |
--------------------------------------------------------------------------------
/scripts/myFuncs:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | function package_install_cmd {
4 | PKGCMD=$1
5 | PKGNAME=$2
6 | PKGDESC=$3
7 |
8 | $PKGCMD &>/dev/null
9 | if [ $? -gt 0 ] ; then
10 | msg_info "$PKGDESC not installed. Installing now..."
11 | apt-get --force-yes -y install $PKGNAME
12 | $PKGCMD &>/dev/null
13 | if [ $? -eq 0 ] ; then
14 | msg_ok "$PKGDESC installed."
15 | else
16 | msg_fail "$PKGDESC installation failed!"
17 | die 1 "I could NOT install $PKGDESC. Check your sources.list and/or Internet access or try to do it manually!"
18 | fi
19 | else
20 | msg_ok "$PKGDESC installed."
21 | fi
22 | }
23 |
24 | function package_install {
25 | PKGNAME=$1
26 | PKGDESC=$2
27 |
28 | dpkg -l | awk '{print $2}' | grep ^$PKGNAME$ &>/dev/null
29 | if [ $? -gt 0 ] ; then
30 | msg_info "$PKGDESC not installed. Installing now..."
31 | apt-get --force-yes -y install $PKGNAME
32 | dpkg -l | awk '{print $2}' | grep $PKGNAME &>/dev/null
33 | if [ $? -eq 0 ] ; then
34 | msg_ok "$PKGDESC installed."
35 | else
36 | msg_fail "$PKGDESC installation failed!"
37 | die 1 "I could NOT install $PKGDESC. Check your Internet access or try to do it manually!"
38 | fi
39 | else
40 | msg_ok "$PKGDESC installed."
41 | fi
42 | }
43 |
44 | function cmd_if_0_info
45 | {
46 | CMDCHECK=$1
47 | CMD=$2
48 | ELSEMSG=$3
49 |
50 | eval $CMDCHECK &>/dev/null
51 | if [ $? -eq 0 ] ; then
52 | cmd "$CMD"
53 | else
54 | msg_info "$ELSEMSG"
55 | fi
56 | }
57 |
58 | function cmd_if_n0_info
59 | {
60 | CMDCHECK=$1
61 | CMD=$2
62 | ELSEMSG=$3
63 |
64 | eval $CMDCHECK &>/dev/null
65 | if [ $? -gt 0 ] ; then
66 | cmd "$CMD"
67 | else
68 | msg_info "$ELSEMSG"
69 | fi
70 | }
71 |
--------------------------------------------------------------------------------
/scripts/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../bin
4 | jar -cf /tmp/Hogzilla.jar *
5 |
--------------------------------------------------------------------------------
/src/Hogzilla.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | import org.apache.spark.SparkConf
21 | import org.apache.spark.SparkContext
22 | import org.hogzilla.hbase.HogHBaseRDD
23 | import org.hogzilla.initiate.HogInitiate
24 | import org.hogzilla.prepare.HogPrepare
25 | import org.hogzilla.sflow._
26 | import org.hogzilla.http.HogHTTP
27 | import org.hogzilla.auth.HogAuth
28 | import org.hogzilla.dns.HogDNS
29 | import org.hogzilla.snort.HogSnort
30 |
31 | /**
32 | *
33 | * Keep it useful, simple, robust, and scalable.
34 | *
35 | *
36 | */
37 | object Hogzilla {
38 |
39 | def main(args: Array[String])
40 | {
41 | val sparkConf = new SparkConf()
42 | .setAppName("Hogzilla")
43 | .set("spark.executor.memory", "1g")
44 | .set("spark.default.parallelism", "160") // 160
45 |
46 | val spark = new SparkContext(sparkConf)
47 |
48 | // Get the HBase RDD
49 | val HogRDD = HogHBaseRDD.connect(spark);
50 |
51 | // Initiate HogZilla
52 | HogInitiate.initiate(spark);
53 |
54 |
55 | // Prepare the data
56 | HogPrepare.prepare(HogRDD)
57 |
58 | // General module
59 | HogSnort.run(HogRDD,spark)
60 |
61 | // Run algorithms for DNS protocol
62 | HogDNS.run(HogRDD,spark);
63 |
64 | // Run algorithms for HTTP protocol
65 | HogHTTP.run(HogRDD,spark);
66 |
67 | // Run algorithms for SMTP protocol
68 | //HogSMTP.run(HogRDD);
69 |
70 |
71 | // ============================ Run algorithms for SFlows ============================
72 |
73 | val HogRDDSFlow = HogHBaseRDD.connectSFlow(spark);
74 | HogSFlow.run(HogRDDSFlow,spark);
75 |
76 |
77 | val HogRDDHistograms = HogHBaseRDD.connectHistograms(spark);
78 | HogSFlowHistograms.run(HogRDDHistograms,spark);
79 |
80 | // Use continuous mode
81 | //val HogRDDAuth = HogHBaseRDD.connectAuth(spark);
82 | //HogAuth.run(HogRDDAuth,spark);
83 |
84 |
85 |
86 | // Stop Spark
87 | spark.stop()
88 |
89 | // Close the HBase Connection
90 | HogHBaseRDD.close();
91 |
92 | }
93 |
94 | }
--------------------------------------------------------------------------------
/src/HogzillaContinuous.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | import org.apache.spark.SparkConf
21 | import org.apache.spark.SparkContext
22 | import org.hogzilla.hbase.HogHBaseRDD
23 | import org.hogzilla.initiate.HogInitiate
24 | import org.hogzilla.prepare.HogPrepare
25 | import org.hogzilla.sflow._
26 | import org.hogzilla.http.HogHTTP
27 | import org.hogzilla.auth.HogAuth
28 | import org.hogzilla.dns.HogDNS
29 | import org.apache.hadoop.hbase.client.Delete
30 | import scala.concurrent.Await
31 |
32 | /**
33 | *
34 | * Keep it useful, simple, robust, and scalable.
35 | *
36 | *
37 | */
38 | object HogzillaContinuous {
39 |
40 | def main(args: Array[String])
41 | {
42 | val sparkConf = new SparkConf()
43 | .setAppName("HogzillaContinuous")
44 | .set("spark.executor.memory", "512m")
45 | .set("spark.default.parallelism", "16")
46 |
47 | val spark = new SparkContext(sparkConf)
48 |
49 | // Get the HBase RDD
50 | val HogRDD = HogHBaseRDD.connect(spark);
51 |
52 | //var i=0
53 | while(true) {
54 | //i=i+1
55 | val HogRDDAuth = HogHBaseRDD.connectAuth(spark)
56 | val summary = HogAuth.runDeleting(HogRDDAuth,spark)
57 | Thread.sleep(10000) // 10s
58 | }
59 |
60 | // Stop Spark
61 | spark.stop()
62 |
63 | // Close the HBase Connection
64 | HogHBaseRDD.close();
65 |
66 | }
67 |
68 | }
--------------------------------------------------------------------------------
/src/HogzillaStream.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | import org.apache.spark.SparkConf
21 | import org.apache.spark.SparkContext
22 | import org.hogzilla.hbase.HogHBaseRDD
23 | import org.hogzilla.initiate.HogInitiate
24 | import org.hogzilla.prepare.HogPrepare
25 | import org.hogzilla.sflow._
26 | import org.hogzilla.http.HogHTTP
27 | import org.hogzilla.auth.HogAuth
28 | import org.hogzilla.dns.HogDNS
29 | import org.apache.spark.streaming.Seconds
30 | import org.apache.spark.streaming.StreamingContext
31 | import org.apache.spark.storage.StorageLevel
32 |
33 | /**
34 | *
35 | * Keep it useful, simple, robust, and scalable.
36 | *
37 | * NOT RUNNING! DEPENDS ON IMPLEMENTATION ON AUTH2HZ!
38 | *
39 | */
40 | object HogzillaStream {
41 |
42 | def main(args: Array[String])
43 | {
44 | val sparkConf = new SparkConf()
45 | .setAppName("HogzillaStream")
46 | .setMaster("local[2]")
47 | .set("spark.executor.memory", "512m")
48 | .set("spark.default.parallelism", "16") // 160
49 |
50 | val ssc = new StreamingContext(sparkConf, Seconds(1))
51 | val spark = new SparkContext(sparkConf)
52 |
53 | // Get the HBase RDD
54 | val HogRDD = HogHBaseRDD.connect(spark);
55 |
56 | val lines = ssc.socketTextStream("localhost", 9999,StorageLevel.MEMORY_AND_DISK_SER)
57 |
58 | val HogRDDAuth = HogHBaseRDD.connectAuth(spark);
59 | HogAuth.run(HogRDDAuth,spark);
60 |
61 | val words = lines.flatMap(_.split(" "))
62 | val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
63 | wordCounts.print()
64 |
65 |
66 | ssc.start()
67 | ssc.awaitTermination()
68 |
69 |
70 | // Stop Spark
71 | spark.stop()
72 |
73 | // Close the HBase Connection
74 | HogHBaseRDD.close();
75 |
76 | }
77 |
78 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/auth/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 | /**
23 | * @author pa
24 | */
25 | package object auth {
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/src/org/hogzilla/cluster/HogClusterMember.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.cluster
21 |
22 | import org.apache.spark.mllib.linalg.Vector
23 |
24 |
25 | /** clusterIdx,centroidMain,clusterSize,members.filter(_._1.equals(clusterIdx)).map({_._2})
26 | * @author pa
27 | */
28 | case class HogClusterMember(clusterIdx:Int, centroid:List[(Long,Double)], clusterSize:Long, allKeys:List[Long],
29 | memberIP:String, ports:Set[Long], frequency_vector:List[(Long,Double)], distance:Double)
30 | {
31 |
32 | def formatTitle:String =
33 | {
34 | "Group information for "+memberIP
35 | }
36 |
37 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/cluster/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 | /**
23 | * @author pa
24 | */
25 | package object cluster {
26 |
27 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/dns/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 | /**
23 | * @author pa
24 | */
25 | package object dns {
26 |
27 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/event/HogEvent.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.event
21 |
22 | import java.util.HashMap
23 | import java.util.Map
24 | import org.apache.hadoop.hbase.client.Put
25 | import org.apache.hadoop.hbase.util.Bytes
26 | import org.hogzilla.hbase.HogHBaseRDD
27 | import org.hogzilla.util.HogFlow
28 | import java.net.InetAddress
29 |
30 |
31 | class HogEvent(flow:HogFlow)
32 | {
33 | var sensorid:Int=0
34 | var signature_id:Double=0
35 | var priorityid:Int=0
36 | var text:String=""
37 | var data:Map[String,String]=new HashMap()
38 | var ports:String=""
39 | var title:String=""
40 | var username:String=""
41 | var coords:String=""
42 |
43 |
44 | def formatIPtoBytes(ip:String):Array[Byte] =
45 | {
46 | try {
47 | // Eca! Snorby doesn't support IPv6 yet. See https://github.com/Snorby/snorby/issues/65
48 | if(ip.contains(":"))
49 | InetAddress.getByName("255.255.6.6").getAddress
50 | else
51 | InetAddress.getByName(ip).getAddress
52 | } catch {
53 | case t: Throwable =>
54 | // Bogus address!
55 | InetAddress.getByName("255.255.1.1").getAddress
56 | }
57 |
58 | }
59 |
60 |
61 | def alert()
62 | {
63 | val put = new Put(Bytes.toBytes(flow.get("flow:id")))
64 | put.add(Bytes.toBytes("event"), Bytes.toBytes("note"), Bytes.toBytes(text))
65 | put.add(Bytes.toBytes("event"), Bytes.toBytes("lower_ip"), formatIPtoBytes(flow.lower_ip))
66 | put.add(Bytes.toBytes("event"), Bytes.toBytes("upper_ip"), formatIPtoBytes(flow.upper_ip))
67 | put.add(Bytes.toBytes("event"), Bytes.toBytes("lower_ip_str"), Bytes.toBytes(flow.lower_ip))
68 | put.add(Bytes.toBytes("event"), Bytes.toBytes("upper_ip_str"), Bytes.toBytes(flow.upper_ip))
69 | put.add(Bytes.toBytes("event"), Bytes.toBytes("signature_id"), Bytes.toBytes("%.0f".format(signature_id)))
70 | put.add(Bytes.toBytes("event"), Bytes.toBytes("time"), Bytes.toBytes(System.currentTimeMillis))
71 | put.add(Bytes.toBytes("event"), Bytes.toBytes("ports"), Bytes.toBytes(ports))
72 | put.add(Bytes.toBytes("event"), Bytes.toBytes("title"), Bytes.toBytes(title))
73 |
74 | if(!username.equals(""))
75 | put.add(Bytes.toBytes("event"), Bytes.toBytes("username"), Bytes.toBytes(username))
76 | if(!coords.equals(""))
77 | put.add(Bytes.toBytes("event"), Bytes.toBytes("coords"), Bytes.toBytes(coords))
78 |
79 | HogHBaseRDD.hogzilla_events.put(put)
80 |
81 | //println(f"ALERT: $text%100s\n\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
82 | }
83 | }
84 |
85 |
--------------------------------------------------------------------------------
/src/org/hogzilla/event/HogSignature.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.event
21 |
22 | import org.hogzilla.hbase.HogHBaseRDD
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.hadoop.hbase.client.Get
25 | import org.apache.hadoop.hbase.client.Put
26 |
27 |
28 |
29 | /**
30 | * @author pa
31 | */
32 | case class HogSignature(signature_class:Int, signature_name:String, signature_priority:Int, signature_revision:Int, signature_id:Double,signature_group_id:Int) {
33 | //Example: 3,"HZ: Suspicious DNS flow identified by K-Means clustering",2,1,826000001,826
34 |
35 | def saveHBase():HogSignature =
36 | {
37 | val get = new Get(Bytes.toBytes("%.0f".format(signature_id)))
38 |
39 | if(!HogHBaseRDD.hogzilla_sensor.exists(get))
40 | {
41 | val put = new Put(Bytes.toBytes("%.0f".format(signature_id)))
42 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("id"), Bytes.toBytes("%.0f".format(signature_id)))
43 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("class"), Bytes.toBytes(signature_class.toString()))
44 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("name"), Bytes.toBytes(signature_name))
45 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("priority"), Bytes.toBytes(signature_priority.toString()))
46 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("revision"), Bytes.toBytes(signature_revision.toString()))
47 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("group_id"), Bytes.toBytes(signature_group_id.toString()))
48 | HogHBaseRDD.hogzilla_signatures.put(put)
49 | }
50 |
51 | this
52 | }
53 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/event/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 | /**
23 | * @author pa
24 | */
25 | package object event {
26 |
27 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/hbase/HogHBaseCluster.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.hbase
21 |
22 | import org.apache.hadoop.hbase.client.Put
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.spark.rdd.RDD
25 | import org.apache.spark.mllib.linalg.Vector
26 | import org.apache.hadoop.hbase.client.Get
27 | import org.apache.hadoop.hbase.client.Delete
28 | import org.hogzilla.cluster.HogClusterMember
29 |
30 |
31 | object HogHBaseCluster {
32 |
33 | def formatClusterTitle(clusterCentroid: List[(Long,Double)], clusterIdx:Int):String =
34 | {
35 | val mainTitle =
36 | "Group "+clusterIdx.toString+" - "+
37 | clusterCentroid
38 | .filter({case (port,rate) =>
39 | rate > 4.999
40 | })
41 | .map({case (port,rate) =>
42 | port.toString()+":"+"%.0f".format(rate)+"%"
43 | }).mkString(", ")
44 |
45 | val onePercentList=
46 | clusterCentroid
47 | .filter({case (port,rate) =>
48 | .9999 < rate & rate < 5
49 | })
50 |
51 | if(onePercentList.size>0)
52 | {
53 | mainTitle+", "+
54 | onePercentList.map({case (port,rate) =>
55 | port.toString()
56 | }).mkString("(",", ",")"+"> 1%")
57 |
58 | }else
59 | {
60 | mainTitle
61 | }
62 | }
63 |
64 | def deleteCluster(clusterIdx:Int)=
65 | {
66 | val del = new Delete(Bytes.toBytes(clusterIdx.toString))
67 | HogHBaseRDD.hogzilla_clusters.delete(del)
68 | }
69 |
70 |
71 | def deleteClusterMember(memberIP:String)=
72 | {
73 | val del = new Delete(Bytes.toBytes(memberIP))
74 | HogHBaseRDD.hogzilla_cluster_members.delete(del)
75 | }
76 |
77 | def saveCluster(clusterIdx:Int, clusterCentroid:List[(Long,Double)], clusterSize: Long, members:Array[String]) = {
78 |
79 | val memberString = members.mkString(",")
80 |
81 | val put = new Put(Bytes.toBytes(clusterIdx.toString))
82 | put.add(Bytes.toBytes("info"), Bytes.toBytes("title"), Bytes.toBytes(formatClusterTitle(clusterCentroid,clusterIdx)))
83 | put.add(Bytes.toBytes("info"), Bytes.toBytes("size"), Bytes.toBytes(clusterSize.toString))
84 | put.add(Bytes.toBytes("info"), Bytes.toBytes("centroid"), Bytes.toBytes(clusterCentroid.mkString("[",",","]")))
85 | put.add(Bytes.toBytes("info"), Bytes.toBytes("members"), Bytes.toBytes(memberString))
86 |
87 | HogHBaseRDD.hogzilla_clusters.put(put)
88 | }
89 |
90 | def saveClusterMember(clusterMember:HogClusterMember) = {
91 |
92 | val put = new Put(Bytes.toBytes(clusterMember.memberIP.toString))
93 | put.add(Bytes.toBytes("info"), Bytes.toBytes("title"), Bytes.toBytes(clusterMember.formatTitle))
94 | put.add(Bytes.toBytes("cluster"),Bytes.toBytes("size"), Bytes.toBytes(clusterMember.clusterSize.toString))
95 | put.add(Bytes.toBytes("cluster"),Bytes.toBytes("centroid"), Bytes.toBytes(clusterMember.centroid.mkString("[",",","]")))
96 | put.add(Bytes.toBytes("cluster"),Bytes.toBytes("idx"), Bytes.toBytes(clusterMember.clusterIdx.toString))
97 | put.add(Bytes.toBytes("cluster"),Bytes.toBytes("description"),Bytes.toBytes(formatClusterTitle(clusterMember.centroid,clusterMember.clusterIdx)))
98 | put.add(Bytes.toBytes("member"), Bytes.toBytes("ports"), Bytes.toBytes("TCP: "+clusterMember.ports.mkString(""," ","")))
99 | put.add(Bytes.toBytes("member"), Bytes.toBytes("frequencies"),Bytes.toBytes("TCP: "+
100 | clusterMember.frequency_vector
101 | .filter({case (port,freq) => clusterMember.ports.contains(port)})
102 | .map({case (port,freq) => port.toString+"="+
103 | "%.0f".format(freq)+"%"
104 | })
105 | .mkString(""," ","")
106 | ))
107 | put.add(Bytes.toBytes("member"), Bytes.toBytes("ip"), Bytes.toBytes(clusterMember.memberIP))
108 | put.add(Bytes.toBytes("member"), Bytes.toBytes("distance"), Bytes.toBytes("%.2f".format(clusterMember.distance)))
109 |
110 |
111 | HogHBaseRDD.hogzilla_cluster_members.put(put)
112 | }
113 |
114 |
115 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/hbase/HogHBaseHistogram.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.hbase
21 |
22 | import scala.collection.mutable.HashMap
23 | import scala.collection.mutable.Map
24 | import scala.collection.mutable.Set
25 | import org.apache.hadoop.hbase.Cell
26 | import org.apache.hadoop.hbase.CellUtil
27 | import org.apache.hadoop.hbase.HBaseConfiguration
28 | import org.apache.hadoop.hbase.client.Delete
29 | import org.apache.hadoop.hbase.client.Get
30 | import org.apache.hadoop.hbase.client.Put
31 | import org.apache.hadoop.hbase.client.Result
32 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat
33 | import org.apache.hadoop.hbase.util.Bytes
34 | import org.apache.spark.SparkContext
35 | import org.hogzilla.histogram.HogHistogram
36 | import org.hogzilla.histogram.Histograms
37 | import org.apache.commons.lang3.StringUtils
38 |
39 |
40 |
41 | object HogHBaseHistogram {
42 |
43 | def mapByResult(result:Result):(HashMap[String,Double],HashMap[String,String]) =
44 | {
45 |
46 | val map=new HashMap[String,Double]
47 | val mapLabels=new HashMap[String,String]
48 |
49 | if(!result.isEmpty())
50 | {
51 | val cells = result.listCells()
52 |
53 | val it = cells.iterator()
54 | while(it.hasNext())
55 | {
56 | val cell = it.next()
57 |
58 | val column = new String(CellUtil.cloneFamily(cell))
59 | val columnQualifier = new String(CellUtil.cloneQualifier(cell))
60 | val value = new String(CellUtil.cloneValue(cell))
61 |
62 | //println("Column: "+column+" ::"+value)
63 |
64 | if(column.equals("values"))
65 | map.put(columnQualifier,value.toDouble)
66 | else if (column.equals("labels")) {
67 | mapLabels.put(columnQualifier,value)
68 | }
69 | }
70 | }
71 |
72 | (map,mapLabels)
73 | }
74 |
75 | def getHistogram(histName:String):HogHistogram =
76 | {
77 |
78 | val get1 = new Get(Bytes.toBytes(histName))
79 |
80 | val result = HogHBaseRDD.hogzilla_histograms.get(get1) //getScanner(new Scan()).iterator()
81 | val tuple=mapByResult(result)
82 | val map=tuple._1
83 | val mapLabels=tuple._2
84 |
85 | if(!map.isEmpty)
86 | {
87 | //val histName = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")))
88 | val sizeArray = result.getValue(Bytes.toBytes("info"), Bytes.toBytes("size"))
89 | if(sizeArray.length==0)
90 | {
91 | new HogHistogram(histName,0L,map,mapLabels)
92 | }
93 | else
94 | {
95 | new HogHistogram(histName,Bytes.toString(sizeArray).toLong,map,mapLabels)
96 | }
97 |
98 | }else
99 | {
100 | new HogHistogram(histName,0,map,mapLabels)
101 | }
102 | }
103 |
104 |
105 | //def saveHistogram(histName:String,size:Long,hist:Map[String,Double]) =
106 | def saveHistogram(hogHist:HogHistogram) =
107 | {
108 | val (histName,size,map,mapLabels) = (hogHist.histName, hogHist.histSize, hogHist.histMap, hogHist.histLabels)
109 |
110 | val put = new Put(Bytes.toBytes(histName))
111 |
112 | put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(histName))
113 | put.add(Bytes.toBytes("info"), Bytes.toBytes("size"), Bytes.toBytes(hogHist.histSize.toString()))
114 |
115 | map./:(0){ case (ac,(port,weight)) =>
116 | put.add(Bytes.toBytes("values"), Bytes.toBytes(port), Bytes.toBytes(weight.toString()))
117 | 0 }
118 | if(mapLabels!=null)
119 | mapLabels./:(0){ case (ac,(key,label)) =>
120 | put.add(Bytes.toBytes("labels"), Bytes.toBytes(key), Bytes.toBytes(StringUtils.stripAccents(label).take(50)))
121 | 0 }
122 |
123 |
124 | HogHBaseRDD.hogzilla_histograms.delete(new Delete(put.getRow))
125 |
126 | try {
127 | HogHBaseRDD.hogzilla_histograms.put(put)
128 | } catch {
129 | case t: Throwable => t.printStackTrace()
130 | println(hogHist.histName)
131 | hogHist.histLabels.foreach(println(_))
132 | hogHist.histMap.foreach({case (key,map) => println(key+" => "+map.toString)})
133 |
134 | }
135 |
136 | }
137 |
138 |
139 | // Ex: FTP Servers
140 | def getIPListHIST01(spark: SparkContext,filterPort:String):scala.collection.immutable.Set[String] =
141 | {
142 | val table = "hogzilla_histograms"
143 | val conf = HBaseConfiguration.create()
144 |
145 | conf.set(TableInputFormat.INPUT_TABLE, table)
146 | conf.set("zookeeper.session.timeout", "600000")
147 | conf.setInt("hbase.client.scanner.timeout.period", 600000)
148 |
149 |
150 | val hBaseRDD = spark.newAPIHadoopRDD(conf, classOf[TableInputFormat],
151 | classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
152 | classOf[org.apache.hadoop.hbase.client.Result])
153 |
154 | hBaseRDD
155 | .map ({ case (id,result) =>
156 | val port = Bytes.toString(result.getValue(Bytes.toBytes("values"),Bytes.toBytes(filterPort)))
157 | val name = Bytes.toString(result.getValue(Bytes.toBytes("info"),Bytes.toBytes("name")))
158 | val size = Bytes.toString(result.getValue(Bytes.toBytes("info"),Bytes.toBytes("size")))
159 | if(port==null || port.isEmpty())
160 | (Histograms.getIPFromHistName(name),size,0D)
161 | else
162 | (Histograms.getIPFromHistName(name),size,port.toDouble)
163 | })
164 | .filter({case (ip,size,port) => port > Histograms.atypicalThreshold})
165 | .map({case (ip,size,port) => ip})
166 | .collect
167 | .toSet
168 | }
169 |
170 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/hbase/HogHBaseInventory.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.hbase
21 |
22 | import org.apache.hadoop.hbase.client.Put
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import org.apache.spark.rdd.RDD
25 | import org.apache.spark.mllib.linalg.Vector
26 | import org.apache.hadoop.hbase.client.Get
27 | import org.apache.hadoop.hbase.client.Delete
28 | import org.hogzilla.cluster.HogClusterMember
29 |
30 |
31 | object HogHBaseInventory {
32 |
33 |
34 | def deleteInventory(myIP:Int)=
35 | {
36 | val del = new Delete(Bytes.toBytes(myIP.toString))
37 | HogHBaseRDD.hogzilla_inventory.delete(del)
38 | }
39 |
40 | def saveInventory(myIP:String, opSystem:String) = {
41 |
42 |
43 | val put = new Put(Bytes.toBytes(myIP+"-"+opSystem))
44 | put.add(Bytes.toBytes("info"), Bytes.toBytes("title"), Bytes.toBytes("Inventory information for "+myIP))
45 | put.add(Bytes.toBytes("info"), Bytes.toBytes("ip"), Bytes.toBytes(myIP))
46 | put.add(Bytes.toBytes("info"), Bytes.toBytes("OS"), Bytes.toBytes(opSystem))
47 |
48 | HogHBaseRDD.hogzilla_inventory.put(put)
49 | }
50 |
51 |
52 |
53 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/hbase/HogHBaseRDD.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.hbase
21 |
22 |
23 | /**
24 | * @author pa
25 | */
26 |
27 | import scala.math.random
28 | import java.lang.Math
29 | import org.apache.spark._
30 | import org.apache.hadoop.hbase.client.HBaseAdmin
31 | import org.apache.hadoop.hbase.util.Bytes
32 | import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName}
33 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat
34 | import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD}
35 | import org.apache.spark.mllib.linalg.Vectors
36 | import org.apache.spark.rdd.RDD
37 | import org.apache.hadoop.hbase.client.HTable
38 | import org.hogzilla.util.HogFeature
39 | import scala.collection.mutable.HashSet
40 |
41 |
42 | object HogHBaseRDD {
43 |
44 | val conf = HBaseConfiguration.create()
45 | val admin = new HBaseAdmin(conf)
46 | val columns = new HashSet()++
47 | List(
48 | new HogFeature("flow:first_seen","u_int64_t",false),
49 | new HogFeature("flow:bittorent_hash","char",false),
50 | new HogFeature("flow:info","char",false),
51 | new HogFeature("flow:host_server_name","char",false),
52 | new HogFeature("flow:ssh_ssl_client_info","char",false),
53 | new HogFeature("flow:ssh_ssl_server_info","char",false),
54 | new HogFeature("flow:src_ip","u_int32_t",false),
55 | new HogFeature("flow:dst_ip","u_int32_t",false),
56 | new HogFeature("flow:src_port","u_int16_t",false),
57 | new HogFeature("flow:dst_port","u_int16_t",false),
58 | new HogFeature("flow:protocol","char",true,false),
59 | // new HogFeature("flow:bidirectional","u_int8_t"),
60 | new HogFeature("flow:src_name","char",false),
61 | new HogFeature("flow:dst_name","char",false),
62 | new HogFeature("flow:bytes","u_int64_t"),
63 | new HogFeature("flow:packets","u_int32_t"),
64 | new HogFeature("flow:payload_bytes","u_int64_t"),
65 | new HogFeature("flow:packets_without_payload","u_int32_t"),
66 | new HogFeature("flow:payload_bytes_first","u_int32_t"),
67 | new HogFeature("flow:flow_duration","u_int64_t"),
68 | new HogFeature("flow:flow_use_time","u_int64_t"),
69 | new HogFeature("flow:flow_idle_time","u_int64_t"),
70 | new HogFeature("flow:src2dst_pay_bytes","u_int64_t"),
71 | new HogFeature("flow:dst2src_pay_bytes","u_int64_t"),
72 | new HogFeature("flow:src2dst_header_bytes","u_int64_t"),
73 | new HogFeature("flow:dst2src_header_bytes","u_int64_t"),
74 | new HogFeature("flow:src2dst_packets","u_int32_t"),
75 | new HogFeature("flow:dst2src_packets","u_int32_t"),
76 | new HogFeature("flow:src2dst_inter_time_avg","u_int64_t"),
77 | new HogFeature("flow:src2dst_inter_time_min","u_int64_t"),
78 | new HogFeature("flow:src2dst_inter_time_max","u_int64_t"),
79 | new HogFeature("flow:src2dst_inter_time_std","u_int64_t"),
80 | new HogFeature("flow:dst2src_inter_time_avg","u_int64_t"),
81 | new HogFeature("flow:dst2src_inter_time_min","u_int64_t"),
82 | new HogFeature("flow:dst2src_inter_time_max","u_int64_t"),
83 | new HogFeature("flow:dst2src_inter_time_std","u_int64_t"),
84 | new HogFeature("flow:src2dst_pay_bytes_avg","u_int64_t"),
85 | new HogFeature("flow:src2dst_pay_bytes_min","u_int64_t"),
86 | new HogFeature("flow:src2dst_pay_bytes_max","u_int64_t"),
87 | new HogFeature("flow:src2dst_pay_bytes_std","u_int64_t"),
88 | new HogFeature("flow:dst2src_pay_bytes_avg","u_int64_t"),
89 | new HogFeature("flow:dst2src_pay_bytes_min","u_int64_t"),
90 | new HogFeature("flow:dst2src_pay_bytes_max","u_int64_t"),
91 | new HogFeature("flow:dst2src_pay_bytes_std","u_int64_t"),
92 | new HogFeature("flow:dst2src_pay_bytes_rate","u_int64_t"),
93 | new HogFeature("flow:src2dst_pay_bytes_rate","u_int64_t"),
94 | new HogFeature("flow:dst2src_packets_rate","u_int64_t"),
95 | new HogFeature("flow:src2dst_packets_rate","u_int64_t"),
96 | new HogFeature("flow:inter_time_avg","u_int64_t"),
97 | new HogFeature("flow:inter_time_min","u_int64_t"),
98 | new HogFeature("flow:inter_time_max","u_int64_t"),
99 | new HogFeature("flow:inter_time_std","u_int64_t"),
100 | new HogFeature("flow:payload_bytes_avg","u_int64_t"),
101 | new HogFeature("flow:payload_bytes_std","u_int64_t"),
102 | new HogFeature("flow:payload_bytes_min","u_int64_t"),
103 | new HogFeature("flow:payload_bytes_max","u_int64_t"),
104 | new HogFeature("flow:src2dst_header_bytes_avg","u_int64_t"),
105 | new HogFeature("flow:src2dst_header_bytes_min","u_int64_t"),
106 | new HogFeature("flow:src2dst_header_bytes_max","u_int64_t"),
107 | new HogFeature("flow:src2dst_header_bytes_std","u_int64_t"),
108 | new HogFeature("flow:dst2src_header_bytes_avg","u_int64_t"),
109 | new HogFeature("flow:dst2src_header_bytes_min","u_int64_t"),
110 | new HogFeature("flow:dst2src_header_bytes_max","u_int64_t"),
111 | new HogFeature("flow:dst2src_header_bytes_std","u_int64_t"),
112 | new HogFeature("flow:packets_syn","u_int32_t"),
113 | new HogFeature("flow:packets_ack","u_int32_t"),
114 | new HogFeature("flow:packets_fin","u_int32_t"),
115 | new HogFeature("flow:packets_rst","u_int32_t"),
116 | new HogFeature("flow:packets_psh","u_int32_t"),
117 | new HogFeature("flow:packets_urg","u_int32_t"),
118 | new HogFeature("flow:tcp_retransmissions","u_int32_t"),
119 | // new HogFeature("flow:payload_size_variation","u_int32_t"),
120 | new HogFeature("flow:C_number_of_contacts","u_int32_t"),
121 | new HogFeature("flow:C_src2dst_pay_bytes_avg","u_int64_t"),
122 | new HogFeature("flow:C_src2dst_pay_bytes_min","u_int64_t"),
123 | new HogFeature("flow:C_src2dst_pay_bytes_max","u_int64_t"),
124 | new HogFeature("flow:C_src2dst_pay_bytes_std","u_int64_t"),
125 | new HogFeature("flow:C_src2dst_header_bytes_avg","u_int64_t"),
126 | new HogFeature("flow:C_src2dst_header_bytes_min","u_int64_t"),
127 | new HogFeature("flow:C_src2dst_header_bytes_max","u_int64_t"),
128 | new HogFeature("flow:C_src2dst_header_bytes_std","u_int64_t"),
129 | new HogFeature("flow:C_src2dst_packets_avg","u_int64_t"),
130 | new HogFeature("flow:C_src2dst_packets_min","u_int64_t"),
131 | new HogFeature("flow:C_src2dst_packets_max","u_int64_t"),
132 | new HogFeature("flow:C_src2dst_packets_std","u_int64_t"),
133 | new HogFeature("flow:C_dst2src_pay_bytes_avg","u_int64_t"),
134 | new HogFeature("flow:C_dst2src_pay_bytes_min","u_int64_t"),
135 | new HogFeature("flow:C_dst2src_pay_bytes_max","u_int64_t"),
136 | new HogFeature("flow:C_dst2src_pay_bytes_std","u_int64_t"),
137 | new HogFeature("flow:C_dst2src_header_bytes_avg","u_int64_t"),
138 | new HogFeature("flow:C_dst2src_header_bytes_min","u_int64_t"),
139 | new HogFeature("flow:C_dst2src_header_bytes_max","u_int64_t"),
140 | new HogFeature("flow:C_dst2src_header_bytes_std","u_int64_t"),
141 | new HogFeature("flow:C_dst2src_packets_avg","u_int64_t"),
142 | new HogFeature("flow:C_dst2src_packets_min","u_int64_t"),
143 | new HogFeature("flow:C_dst2src_packets_max","u_int64_t"),
144 | new HogFeature("flow:C_dst2src_packets_std","u_int64_t"),
145 | new HogFeature("flow:C_packets_syn_avg","u_int64_t"),
146 | new HogFeature("flow:C_packets_syn_min","u_int64_t"),
147 | new HogFeature("flow:C_packets_syn_max","u_int64_t"),
148 | new HogFeature("flow:C_packets_syn_std","u_int64_t"),
149 | new HogFeature("flow:C_packets_ack_avg","u_int64_t"),
150 | new HogFeature("flow:C_packets_ack_min","u_int64_t"),
151 | new HogFeature("flow:C_packets_ack_max","u_int64_t"),
152 | new HogFeature("flow:C_packets_ack_std","u_int64_t"),
153 | new HogFeature("flow:C_packets_fin_avg","u_int64_t"),
154 | new HogFeature("flow:C_packets_fin_min","u_int64_t"),
155 | new HogFeature("flow:C_packets_fin_max","u_int64_t"),
156 | new HogFeature("flow:C_packets_fin_std","u_int64_t"),
157 | new HogFeature("flow:C_packets_rst_avg","u_int64_t"),
158 | new HogFeature("flow:C_packets_rst_min","u_int64_t"),
159 | new HogFeature("flow:C_packets_rst_max","u_int64_t"),
160 | new HogFeature("flow:C_packets_rst_std","u_int64_t"),
161 | new HogFeature("flow:C_packets_psh_avg","u_int64_t"),
162 | new HogFeature("flow:C_packets_psh_min","u_int64_t"),
163 | new HogFeature("flow:C_packets_psh_max","u_int64_t"),
164 | new HogFeature("flow:C_packets_psh_std","u_int64_t"),
165 | new HogFeature("flow:C_packets_urg_avg","u_int64_t"),
166 | new HogFeature("flow:C_packets_urg_min","u_int64_t"),
167 | new HogFeature("flow:C_packets_urg_max","u_int64_t"),
168 | new HogFeature("flow:C_packets_urg_std","u_int64_t"),
169 | new HogFeature("flow:C_tcp_retransmissions_avg","u_int64_t"),
170 | new HogFeature("flow:C_tcp_retransmissions_min","u_int64_t"),
171 | new HogFeature("flow:C_tcp_retransmissions_max","u_int64_t"),
172 | new HogFeature("flow:C_tcp_retransmissions_std","u_int64_t"),
173 | new HogFeature("flow:C_dst2src_pay_bytes_rate_avg","u_int64_t"),
174 | new HogFeature("flow:C_dst2src_pay_bytes_rate_min","u_int64_t"),
175 | new HogFeature("flow:C_dst2src_pay_bytes_rate_max","u_int64_t"),
176 | new HogFeature("flow:C_dst2src_pay_bytes_rate_std","u_int64_t"),
177 | new HogFeature("flow:C_src2dst_pay_bytes_rate_avg","u_int64_t"),
178 | new HogFeature("flow:C_src2dst_pay_bytes_rate_min","u_int64_t"),
179 | new HogFeature("flow:C_src2dst_pay_bytes_rate_max","u_int64_t"),
180 | new HogFeature("flow:C_src2dst_pay_bytes_rate_std","u_int64_t"),
181 | new HogFeature("flow:C_dst2src_packets_rate_avg","u_int64_t"),
182 | new HogFeature("flow:C_dst2src_packets_rate_min","u_int64_t"),
183 | new HogFeature("flow:C_dst2src_packets_rate_max","u_int64_t"),
184 | new HogFeature("flow:C_dst2src_packets_rate_std","u_int64_t"),
185 | new HogFeature("flow:C_src2dst_packets_rate_avg","u_int64_t"),
186 | new HogFeature("flow:C_src2dst_packets_rate_min","u_int64_t"),
187 | new HogFeature("flow:C_src2dst_packets_rate_max","u_int64_t"),
188 | new HogFeature("flow:C_src2dst_packets_rate_std","u_int64_t"),
189 | new HogFeature("flow:C_duration_avg","u_int64_t"),
190 | new HogFeature("flow:C_duration_min","u_int64_t"),
191 | new HogFeature("flow:C_duration_max","u_int64_t"),
192 | new HogFeature("flow:C_duration_std","u_int64_t"),
193 | new HogFeature("flow:C_idletime_avg","u_int64_t"),
194 | new HogFeature("flow:C_idletime_min","u_int64_t"),
195 | new HogFeature("flow:C_idletime_max","u_int64_t"),
196 | new HogFeature("flow:C_idletime_std","u_int64_t"),
197 | new HogFeature("flow:response_rel_time","u_int32_t"),
198 | new HogFeature("flow:detection_completed","u_int8_t"),
199 | new HogFeature("flow:ndpi_risk",",char",false,false,1),
200 | new HogFeature("flow:detected_os","char",false),
201 | new HogFeature("flow:dns_num_queries","u_int32_t"),
202 | new HogFeature("flow:dns_num_answers","u_int32_t"),
203 | new HogFeature("flow:dns_reply_code","u_int32_t"),
204 | new HogFeature("flow:dns_query_type","u_int32_t"),
205 | new HogFeature("flow:dns_query_class","u_int32_t"),
206 | new HogFeature("flow:dns_rsp_type","u_int32_t"),
207 |
208 | new HogFeature("flow:http_url","char",false),
209 | new HogFeature("flow:http_content_type","char",true,false),
210 | new HogFeature("flow:http_method","u_int32_t"),
211 | new HogFeature("flow:http_num_request_headers","u_int32_t"),
212 | new HogFeature("flow:http_num_response_headers","u_int32_t"),
213 | new HogFeature("flow:http_request_version","u_int32_t"),
214 | new HogFeature("flow:http_response_status_code","u_int32_t"),
215 |
216 |
217 | new HogFeature("event:sensor_id","u_int32_t",false),
218 | new HogFeature("event:event_id","u_int32_t",false),
219 | new HogFeature("event:event_second","u_int64_t",false),
220 | new HogFeature("event:event_microsecond","u_int64_t",false),
221 | new HogFeature("event:signature_id","u_int64_t",false,false,1),
222 | new HogFeature("event:generator_id","u_int64_t",false),
223 | new HogFeature("event:classification_id","u_int32_t",false),
224 | new HogFeature("event:priority_id","u_int32_t",false)
225 | )
226 |
227 |
228 | val columnsSFlow = List("flow:IPprotocol","flow:IPsize","flow:agentID","flow:dstIP","flow:dstMAC","flow:dstPort","flow:ethernetType","flow:inVlan","flow:inputPort","flow:ipTos",
229 | "flow:ipTtl","flow:outVlan","flow:outputPort","flow:packetSize","flow:samplingRate","flow:srcIP","flow:srcMAC","flow:srcPort","flow:tcpFlags",
230 | "flow:timestamp")
231 |
232 | // "flow:inter_time-%d","flow:packet_size-%d"
233 |
234 | val hogzilla_flows = new HTable(conf,"hogzilla_flows")
235 | val hogzilla_sflows = new HTable(conf,"hogzilla_sflows")
236 | val hogzilla_events = new HTable(conf,"hogzilla_events")
237 | val hogzilla_sensor = new HTable(conf,"hogzilla_sensor")
238 | val hogzilla_signatures = new HTable(conf,"hogzilla_signatures")
239 | val hogzilla_mynets = new HTable(conf,"hogzilla_mynets")
240 | val hogzilla_reputation = new HTable(conf,"hogzilla_reputation")
241 | val hogzilla_histograms = new HTable(conf,"hogzilla_histograms")
242 | val hogzilla_clusters = new HTable(conf,"hogzilla_clusters")
243 | val hogzilla_cluster_members = new HTable(conf,"hogzilla_cluster_members")
244 | val hogzilla_inventory = new HTable(conf,"hogzilla_inventory")
245 | val hogzilla_authrecords = new HTable(conf,"hogzilla_authrecords")
246 |
247 |
248 | def connect(spark: SparkContext):RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]=
249 | {
250 | val table = "hogzilla_flows"
251 |
252 | conf.set(TableInputFormat.INPUT_TABLE, table)
253 | conf.set("zookeeper.session.timeout", "1800000")
254 | conf.setInt("hbase.client.scanner.timeout.period", 1800000)
255 | // You can limit the SCANNED COLUMNS here
256 | conf.set("hbase.rpc.timeout", "1800000")
257 | //conf.set(TableInputFormat.SCAN_COLUMNS, "flow:packets,flow:detected_protocol"),
258 |
259 |
260 | if (!admin.isTableAvailable(table)) {
261 | println("Table hogzilla_flows does not exist.")
262 | }
263 |
264 | val hBaseRDD = spark.newAPIHadoopRDD(conf, classOf[TableInputFormat],
265 | classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
266 | classOf[org.apache.hadoop.hbase.client.Result])
267 |
268 | return hBaseRDD
269 | }
270 |
271 | def connectSFlow(spark: SparkContext):RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]=
272 | {
273 | val table = "hogzilla_sflows"
274 |
275 | conf.set(TableInputFormat.INPUT_TABLE, table)
276 | conf.set("zookeeper.session.timeout", "600000")
277 | conf.setInt("hbase.client.scanner.timeout.period", 600000)
278 | //conf.set("hbase.rpc.timeout", "1800000")
279 | // You can limit the SCANNED COLUMNS here
280 | //conf.set(TableInputFormat.SCAN_COLUMNS, "flow:packets,flow:detected_protocol"),
281 |
282 |
283 | if (!admin.isTableAvailable(table)) {
284 | println("Table hogzilla_sflows does not exist.")
285 | }
286 |
287 | val hBaseRDD = spark.newAPIHadoopRDD(conf, classOf[TableInputFormat],
288 | classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
289 | classOf[org.apache.hadoop.hbase.client.Result])
290 |
291 | return hBaseRDD
292 | }
293 |
294 |
295 |
296 | def connectHistograms(spark: SparkContext):RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]=
297 | {
298 | val table = "hogzilla_histograms"
299 |
300 | conf.set(TableInputFormat.INPUT_TABLE, table)
301 | conf.set("zookeeper.session.timeout", "600000")
302 | conf.setInt("hbase.client.scanner.timeout.period", 600000)
303 | //conf.set("hbase.rpc.timeout", "1800000")
304 | // You can limit the SCANNED COLUMNS here
305 | //conf.set(TableInputFormat.SCAN_COLUMNS, "flow:packets,flow:detected_protocol"),
306 |
307 |
308 | if (!admin.isTableAvailable(table)) {
309 | println("Table hogzilla_histograms does not exist.")
310 | }
311 |
312 | val hBaseRDD = spark.newAPIHadoopRDD(conf, classOf[TableInputFormat],
313 | classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
314 | classOf[org.apache.hadoop.hbase.client.Result])
315 |
316 | return hBaseRDD
317 | }
318 |
319 |
320 |
321 | def connectAuth(spark: SparkContext):RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]=
322 | {
323 | val table = "hogzilla_authrecords"
324 |
325 | conf.set(TableInputFormat.INPUT_TABLE, table)
326 | conf.set("zookeeper.session.timeout", "600000")
327 | conf.setInt("hbase.client.scanner.timeout.period", 600000)
328 | //conf.set("hbase.rpc.timeout", "1800000")
329 | // You can limit the SCANNED COLUMNS here
330 | //conf.set(TableInputFormat.SCAN_COLUMNS, "flow:packets,flow:detected_protocol"),
331 |
332 |
333 | if (!admin.isTableAvailable(table)) {
334 | println("Table hogzilla_authrecords does not exist.")
335 | }
336 |
337 | val hBaseRDD = spark.newAPIHadoopRDD(conf, classOf[TableInputFormat],
338 | classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
339 | classOf[org.apache.hadoop.hbase.client.Result])
340 |
341 | return hBaseRDD
342 | }
343 |
344 | def close()
345 | {
346 | admin.close()
347 | }
348 |
349 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/hbase/HogHBaseReputation.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.hbase
21 |
22 |
23 | /**
24 | * @author pa
25 | */
26 |
27 | import scala.math.random
28 | import java.lang.Math
29 | import org.apache.spark._
30 | import org.apache.hadoop.hbase.client.HBaseAdmin
31 | import org.apache.hadoop.hbase.util.Bytes
32 | import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName}
33 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat
34 | import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD}
35 | import org.apache.spark.mllib.linalg.Vectors
36 | import org.apache.spark.rdd.RDD
37 | import org.apache.hadoop.hbase.client.HTable
38 | import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
39 | import org.apache.hadoop.hbase.filter.BinaryComparator
40 | import org.apache.hadoop.hbase.filter.FilterList
41 | import org.apache.hadoop.hbase.filter.CompareFilter
42 | import java.util.ArrayList
43 | import org.apache.hadoop.hbase.client.Scan
44 | import org.apache.hadoop.hbase.filter.Filter
45 | import scala.collection.mutable.HashSet
46 | import org.apache.hadoop.hbase.client.Put
47 |
48 |
49 | object HogHBaseReputation {
50 |
51 | // Ex: MX, whitelist
52 | def getReputationList(listName:String, listType:String):Set[String] =
53 | {
54 | val list = new HashSet[String]
55 |
56 |
57 | val filters: ArrayList[Filter] = new ArrayList();
58 |
59 | val colValFilter1 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list_type"),
60 | CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listType)))
61 | colValFilter1.setFilterIfMissing(false);
62 |
63 | val colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list"),
64 | CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listName)))
65 | colValFilter2.setFilterIfMissing(false);
66 |
67 | filters.add(colValFilter1);
68 | filters.add(colValFilter2);
69 |
70 | val filterList = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters);
71 | val scan = new Scan()
72 | scan.setFilter(filterList)
73 |
74 | val it = HogHBaseRDD.hogzilla_reputation.getScanner(scan).iterator()
75 |
76 | while(it.hasNext())
77 | {
78 | list.add( Bytes.toString(it.next().getValue(Bytes.toBytes("rep"),Bytes.toBytes("ip"))) )
79 | }
80 |
81 | list.toSet
82 |
83 | }
84 |
85 | def saveReputationList(listName:String, listType:String, ip:String) =
86 | {
87 | val put = new Put(Bytes.toBytes(ip+"-"+listName+"-"+listType))
88 | put.add(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), Bytes.toBytes(listType))
89 | put.add(Bytes.toBytes("rep"), Bytes.toBytes("list"), Bytes.toBytes(listName))
90 | put.add(Bytes.toBytes("rep"), Bytes.toBytes("ip"), Bytes.toBytes(ip))
91 |
92 | HogHBaseRDD.hogzilla_reputation.put(put)
93 | }
94 |
95 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/hbase/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 | /**
23 | * @author pa
24 | */
25 | package object hbase {
26 |
27 |
28 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/histogram/Histograms.scala:
--------------------------------------------------------------------------------
1 | package org.hogzilla.histogram
2 |
3 | import scala.collection.mutable.HashSet
4 | import scala.collection.mutable.Map
5 | import scala.collection.mutable.Set
6 | import scala.math.log
7 |
8 |
9 | /**
10 | * @author pa
11 | */
12 |
13 | object Histograms {
14 |
15 |
16 | val atypicalThreshold = 0.0000001D
17 |
18 | def KullbackLiebler(histogram1:Map[String,Double],histogram2:Map[String,Double]):Double =
19 | {
20 |
21 | val keys = histogram1.keySet ++ histogram2.keySet
22 |
23 | keys./:(0.0){ case (ac,key) =>
24 | val p:Double = { if(histogram1.get(key).isEmpty) 0 else histogram1.get(key).get }
25 | val q:Double = { if(histogram2.get(key).isEmpty) 0 else histogram2.get(key).get }
26 | if(p==0)
27 | ac
28 | else
29 | {
30 | if(q==0)
31 | ac + 0
32 | else
33 | ac + p*log(p/q)
34 | }
35 | }
36 | }
37 |
38 |
39 | def atypical(histogram1:Map[String,Double],histogram2:Map[String,Double]):Set[String] =
40 | {
41 |
42 | val ret = new HashSet[String]
43 |
44 | val keys = histogram2.keySet
45 |
46 | keys./:(0.0){ case (ac,key) =>
47 | val p:Double = { if(histogram1.get(key).isEmpty) 0 else histogram1.get(key).get }
48 | val q:Double = { if(histogram2.get(key).isEmpty) 0 else histogram2.get(key).get }
49 | if(patypicalThreshold)
50 | {
51 | ret.add(key)
52 | ac+1
53 | }
54 | else
55 | 0
56 | }
57 |
58 | ret
59 | }
60 |
61 | // Return typical events in histogram1 (main saved), which occurred in histogram2 (current)
62 | def typical(histogram1:Map[String,Double],histogram2:Map[String,Double]):Set[String] =
63 | {
64 |
65 | val ret = new HashSet[String]
66 |
67 | val keys = histogram2.keySet
68 |
69 | keys./:(0.0){ case (ac,key) =>
70 | val p:Double = { if(histogram1.get(key).isEmpty) 0 else histogram1.get(key).get }
71 | val q:Double = { if(histogram2.get(key).isEmpty) 0 else histogram2.get(key).get }
72 | if(p>atypicalThreshold && q>atypicalThreshold)
73 | {
74 | ret.add(key)
75 | ac+1
76 | }
77 | else
78 | 0
79 | }
80 |
81 | ret
82 | }
83 |
84 | def isTypicalEvent(histogram1:Map[String,Double],event:String):Boolean=
85 | {
86 |
87 | val p:Double = { if(histogram1.get(event).isEmpty) 0 else histogram1.get(event).get }
88 | if(p>atypicalThreshold)
89 | {
90 | true
91 | }
92 | else
93 | false
94 |
95 | }
96 |
97 | def isAtypicalEvent(histogram1:Map[String,Double],event:String):Boolean=
98 | {
99 | !isTypicalEvent(histogram1,event)
100 | }
101 |
102 |
103 | def merge(histogram1:HogHistogram,histogram2:HogHistogram):HogHistogram =
104 | {
105 |
106 | val keys = histogram1.histMap.keySet ++ histogram2.histMap.keySet
107 | val keysLabel = histogram1.histLabels.keySet ++ histogram2.histLabels.keySet
108 | var div:Double = 1
109 | if(histogram1.histSize.toDouble > 1000)
110 | div = 2
111 |
112 | keys./:(0.0){ case (ac,key) =>
113 | val p:Double = { if(histogram1.histMap.get(key).isEmpty) 0 else histogram1.histMap.get(key).get }
114 | val q:Double = { if(histogram2.histMap.get(key).isEmpty) 0 else histogram2.histMap.get(key).get }
115 |
116 | if(p>0 || q>0)
117 | {
118 | val newp = (
119 | p*histogram1.histSize.toDouble/div+
120 | q*histogram2.histSize.toDouble
121 |
122 | )/(histogram1.histSize.toDouble/div+histogram2.histSize.toDouble)
123 |
124 | histogram1.histMap.put(key,newp)
125 | }
126 | 0D
127 | }
128 |
129 | keysLabel./:(0.0){ case (ac,key) =>
130 | if(histogram1.histLabels.get(key).isEmpty)
131 | histogram1.histLabels.put(key,histogram2.histLabels.get(key).get)
132 |
133 | 0D
134 | }
135 |
136 | val total = histogram1.histSize/div+histogram2.histSize
137 | new HogHistogram(histogram1.histName,total.toInt,histogram1.histMap,histogram1.histLabels)
138 | }
139 |
140 | // It is not exactly a histogram, but...
141 | def mergeMax(histogram1:HogHistogram,histogram2:HogHistogram):HogHistogram =
142 | {
143 |
144 |
145 | val keys = histogram1.histMap.keySet ++ histogram2.histMap.keySet
146 | val keysLabel = histogram1.histLabels.keySet ++ histogram2.histLabels.keySet
147 |
148 | keys./:(0.0){ case (ac,key) =>
149 | val p:Double = { if(histogram1.histMap.get(key).isEmpty) 0 else histogram1.histMap.get(key).get }
150 | val q:Double = { if(histogram2.histMap.get(key).isEmpty) 0 else histogram2.histMap.get(key).get }
151 |
152 | if(p>0 || q>0)
153 | {
154 | histogram1.histMap.put(key,p.max(q))
155 | }
156 | 0D
157 | }
158 |
159 | keysLabel./:(0.0){ case (ac,key) =>
160 | if(histogram1.histLabels.get(key).isEmpty)
161 | histogram1.histLabels.put(key,histogram2.histLabels.get(key).get)
162 |
163 | 0D
164 | }
165 |
166 | val total = histogram1.histSize+histogram2.histSize
167 | new HogHistogram(histogram1.histName,total,histogram1.histMap,histogram1.histLabels)
168 | }
169 |
170 |
171 | // hist1 - hist2
172 | def difference(histogram1:HogHistogram,histogram2:HogHistogram):HogHistogram =
173 | {
174 |
175 | val keys = histogram2.histMap.keySet // ++ histogram2.histMap.keySet
176 |
177 | keys./:(0.0){ case (ac,key) =>
178 | val p:Double = { if(histogram1.histMap.get(key).isEmpty) 0 else histogram1.histMap.get(key).get }
179 | val q:Double = { if(histogram2.histMap.get(key).isEmpty) 0 else histogram2.histMap.get(key).get }
180 |
181 | if(p>0 || q>0)
182 | {
183 | val newp = (
184 |
185 | p*histogram1.histSize.toDouble-
186 | q*histogram2.histSize.toDouble
187 |
188 | )/(histogram1.histSize.toDouble-histogram2.histSize.toDouble)
189 |
190 | histogram1.histMap.put(key,newp)
191 | }
192 | 0D
193 | }
194 |
195 | val total = histogram1.histSize-histogram2.histSize
196 | new HogHistogram(histogram1.histName,total,histogram1.histMap,histogram1.histLabels)
197 | }
198 |
199 |
200 | def getIPFromHistName(histogramName:String):String =
201 | {
202 | histogramName.subSequence(histogramName.lastIndexOf("-")+1, histogramName.length()).toString
203 | }
204 |
205 | /*
206 | final val EPS = 1e-10
207 |
208 | type DATASET = Iterator[(Double, Double)]
209 |
210 | def execute( xy: DATASET, f: Double => Double): Double = {
211 | val z = xy.filter{ case(x, y) => abs(y) > EPS}
212 | - z./:(0.0){ case(s, (x, y)) => s + y*log(f(x)/y)}
213 | }
214 |
215 | def execute( xy: DATASET, fs: Iterable[Double=>Double]): Iterable[Double] =
216 | fs.map(execute(xy, _))
217 |
218 |
219 | */
220 |
221 |
222 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/histogram/HogHistogram.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.histogram
21 |
22 | import scala.collection.mutable.Map
23 | import scala.collection.mutable.HashMap
24 |
25 | class HogHistogram(val histName:String,val histSize:Long, val histMap:Map[String,Double], val histLabels:Map[String,String]=new HashMap[String,String])
26 | {
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/src/org/hogzilla/histogram/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 | /**
23 | * @author pa
24 | */
25 | package org.hogzilla.histogram {
26 |
27 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/http/HogHTTP.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 | /**
20 | * REFERENCES:
21 | * - http://ids-hogzilla.org/xxx/826000101
22 | */
23 |
24 |
25 | package org.hogzilla.http
26 |
27 | import scala.math.random
28 | import org.apache.hadoop.hbase.util.Bytes
29 | import org.apache.spark._
30 | import org.apache.spark.mllib.clustering.KMeans
31 | import org.apache.spark.mllib.linalg.Vectors
32 | import org.apache.spark.mllib.linalg.Vector
33 | import org.apache.spark.rdd.RDD
34 | import org.hogzilla.hbase.HogHBaseRDD
35 | import org.hogzilla.event.{HogEvent, HogSignature}
36 | import java.util.HashSet
37 | import org.apache.spark.mllib.regression.LabeledPoint
38 | import org.apache.spark.mllib.classification.SVMWithSGD
39 | import scala.tools.nsc.doc.base.comment.OrderedList
40 | import org.apache.spark.mllib.optimization.L1Updater
41 | import org.hogzilla.util.HogFlow
42 | import scala.collection.mutable.HashMap
43 | import scala.collection.mutable.Map
44 |
45 | /**
46 | *
47 | */
48 | object HogHTTP {
49 |
50 | val signature = (HogSignature(3,"HZ: Suspicious HTTP flow identified by K-Means clustering",2,1,826000101,826).saveHBase(),
51 | HogSignature(3,"HZ: Suspicious HTTP flow identified by SuperBag",2,1,826000102,826).saveHBase())
52 |
53 | val numberOfClusters=32
54 | val maxAnomalousClusterProportion=0.05
55 | val minDirtyProportion=0.001
56 |
57 | /**
58 | *
59 | *
60 | *
61 | */
62 | def run(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)],spark:SparkContext)
63 | {
64 |
65 | // HTTP K-means clustering
66 | kmeans(HogRDD)
67 |
68 | }
69 |
70 |
71 | /**
72 | *
73 | *
74 | *
75 | */
76 | def kmeansPopulate(event:HogEvent):HogEvent =
77 | {
78 | val centroids:String = event.data.get("centroids")
79 | val vector:String = event.data.get("vector")
80 | val clusterLabel:String = event.data.get("clusterLabel")
81 | val hostname:String = event.data.get("hostname")
82 |
83 |
84 | event.text = "This flow was detected by Hogzilla as an anormal activity. In what follows you can see more information.\n"+
85 | "Hostname mentioned in HTTP flow: "+hostname+"\n"+
86 | "Hogzilla module: HogHTTP, Method: k-means clustering with k="+numberOfClusters+"\n"+
87 | "URL for more information: http://ids-hogzilla.org/signature-db/"+"%.0f".format(signature._1.signature_id)+"\n"+""
88 | //"Centroids:\n"+centroids+"\n"+
89 | //"Vector: "+vector+"\n"+
90 | //"(cluster,label nDPI): "+clusterLabel+"\n"
91 |
92 | event.signature_id = signature._1.signature_id
93 |
94 | event
95 | }
96 |
97 |
98 | /**
99 | *
100 | *
101 | *
102 | */
103 | def kmeans(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)])
104 | {
105 |
106 | val features = Array("flow:avg_packet_size",
107 | "flow:packets_without_payload",
108 | "flow:avg_inter_time",
109 | "flow:flow_duration",
110 | "flow:max_packet_size",
111 | "flow:bytes",
112 | "flow:packets",
113 | "flow:min_packet_size",
114 | "flow:packet_size-0",
115 | "flow:inter_time-0",
116 | "flow:packet_size-1",
117 | "flow:inter_time-1",
118 | "flow:packet_size-2",
119 | "flow:inter_time-2",
120 | "flow:packet_size-3",
121 | "flow:inter_time-3",
122 | "flow:packet_size-4",
123 | "flow:inter_time-4",
124 | "flow:http_method")
125 |
126 | println("Filtering HogRDD...")
127 | val HttpRDD = HogRDD.
128 | map { case (id,result) => {
129 | val map: Map[String,String] = new HashMap[String,String]
130 | map.put("flow:id",Bytes.toString(id.get).toString())
131 | HogHBaseRDD.columns.foreach { column =>
132 |
133 | val ret = result.getValue(Bytes.toBytes(column.name.split(":")(0).toString()),Bytes.toBytes(column.name.split(":")(1).toString()))
134 | map.put(column.name, Bytes.toString(ret))
135 | }
136 | if(map.get("flow:packet_size-1")==null) map.put("flow:packet_size-1","0")
137 | if(map.get("flow:inter_time-1")==null) map.put("flow:inter_time-1","0")
138 | if(map.get("flow:packet_size-2")==null) map.put("flow:packet_size-2","0")
139 | if(map.get("flow:inter_time-2")==null) map.put("flow:inter_time-2","0")
140 | if(map.get("flow:packet_size-3")==null) map.put("flow:packet_size-3","0")
141 | if(map.get("flow:inter_time-3")==null) map.put("flow:inter_time-3","0")
142 | if(map.get("flow:packet_size-4")==null) map.put("flow:packet_size-4","0")
143 | if(map.get("flow:inter_time-4")==null) map.put("flow:inter_time-4","0")
144 | if(map.get("flow:http_method")==null) map.put("flow:http_method","0")
145 |
146 | val lower_ip = result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("lower_ip"))
147 | val upper_ip = result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("upper_ip"))
148 | new HogFlow(map,Bytes.toString(lower_ip),Bytes.toString(upper_ip))
149 | }
150 | }.filter(x => ( x.get("flow:lower_port").equals("80") ||
151 | x.get("flow:upper_port").equals("80") ||
152 | x.get("flow:lower_port").equals("81") ||
153 | x.get("flow:upper_port").equals("81")
154 | ) && x.get("flow:packets").toDouble.>(1)
155 | && x.get("flow:id").split('.')(0).toLong.<(System.currentTimeMillis()-6000000)
156 | ).cache
157 |
158 | println("Counting HogRDD...")
159 | val RDDtotalSize= HttpRDD.count()
160 | println("Filtered HogRDD has "+RDDtotalSize+" rows!")
161 |
162 | if(RDDtotalSize==0)
163 | return
164 |
165 | println("Calculating some variables to normalize data...")
166 | val HttpRDDcount = HttpRDD.map(flow => features.map { feature => flow.get(feature).toDouble }).cache()
167 | val n = RDDtotalSize
168 | val numCols = HttpRDDcount.first.length
169 | val sums = HttpRDDcount.reduce((a,b) => a.zip(b).map(t => t._1 + t._2))
170 | val sumSquares = HttpRDDcount.fold(
171 | new Array[Double](numCols)
172 | )(
173 | (a,b) => a.zip(b).map(t => t._1 + t._2*t._2)
174 | )
175 |
176 | val stdevs = sumSquares.zip(sums).map{
177 | case(sumSq,sum) => math.sqrt(n*sumSq - sum*sum)/n
178 | }
179 |
180 | val means = sums.map(_/n)
181 |
182 | def normalize(vector: Vector):Vector = {
183 | val normArray = (vector.toArray,means,stdevs).zipped.map(
184 | (value,mean,std) =>
185 | if(std<=0) (value-mean) else (value-mean)/std)
186 | return Vectors.dense(normArray)
187 | }
188 |
189 | println("Normalizing data...")
190 | val labelAndData = HttpRDD.map { flow =>
191 | val vector = Vectors.dense(features.map { feature => flow.get(feature).toDouble })
192 | ( (flow.get("flow:detected_protocol"),
193 | if (flow.get("event:priority_id")!=null && flow.get("event:priority_id").equals("1")) 1 else 0 ,
194 | flow.get("flow:host_server_name"),flow),
195 | normalize(vector)
196 | )
197 | }
198 |
199 | println("Estimating model...")
200 | val data = labelAndData.values.cache()
201 | val kmeans = new KMeans()
202 | kmeans.setK(numberOfClusters)
203 | val vectorCount = data.count()
204 | println("Number of vectors: "+vectorCount)
205 | val model = kmeans.run(data)
206 |
207 | println("Predicting points (ie, find cluster for each point)...")
208 | val clusterLabel = labelAndData.map({
209 | case (label,datum) =>
210 | val cluster = model.predict(datum)
211 | (cluster,label,datum)
212 | })
213 |
214 | println("Generating histogram...")
215 | val clusterLabelCount = clusterLabel.map({
216 | case (cluster,label,datum) =>
217 | val map: Map[(Int,String),(Double,Int)] = new HashMap[(Int,String),(Double,Int)]
218 | map.put((cluster,label._1), (label._2.toDouble,1))
219 | map
220 | }).reduce((a,b) => {
221 |
222 | b./:(0){
223 | case (c,((key:(Int,String)),(avg2,count2))) =>
224 |
225 | val avg = (a.get(key).get._1*a.get(key).get._2 + b.get(key).get._1*b.get(key).get._2)/
226 | (a.get(key).get._2+b.get(key).get._2)
227 |
228 | a.put(key, (avg,a.get(key).get._2+b.get(key).get._2))
229 |
230 | 0
231 | }
232 | /*
233 | b.keySet().toArray()
234 | .map {
235 | case key: (Int,String) =>
236 | if (a.containsKey(key))
237 | {
238 | val avg = (a.get(key)._1*a.get(key)._2 + b.get(key)._1*b.get(key)._2)/
239 | (a.get(key)._2+b.get(key)._2)
240 |
241 | a.put(key, (avg,a.get(key)._2+b.get(key)._2))
242 | }else
243 | a.put(key,b.get(key))
244 | }*/
245 | a
246 | })
247 |
248 | println("######################################################################################")
249 | println("######################################################################################")
250 | println("######################################################################################")
251 | println("######################################################################################")
252 | println("HTTP K-Means Clustering")
253 | println("Centroids")
254 | val centroids = ""+model.clusterCenters.mkString(",\n")
255 | //model.clusterCenters.foreach { center => centroids.concat("\n"+center.toString) }
256 |
257 | clusterLabelCount./:(0)
258 | { case (z,(key:(Int,String),(avg,count))) =>
259 | val cluster = key._1
260 | val label = key._2
261 | //val count =clusterLabelCount.get(key).get._2.toString
262 | //val avg = clusterLabelCount.get(key).get._1.toString
263 | println(f"Cluster: $cluster%1s\t\tLabel: $label%20s\t\tCount: $count%10s\t\tAvg: $avg%10s")
264 | 0
265 | }
266 |
267 | val thr=maxAnomalousClusterProportion*RDDtotalSize
268 |
269 | println("Selecting cluster to be tainted...")
270 | val taintedArray = clusterLabelCount.filter({ case (key:(Int,String),(avg,count)) =>
271 | (count.toDouble < thr
272 | && avg.toDouble >= minDirtyProportion )
273 | }).map(_._1)
274 | //.
275 | // sortBy ({ case (cluster:Int,label:String) => clusterLabelCount.get((cluster,label))._1.toDouble }).reverse
276 |
277 | taintedArray.par.map
278 | {
279 | tainted =>
280 |
281 | //val tainted = taintedArray.apply(0)
282 |
283 | println("######################################################################################")
284 | println("Tainted flows of: "+tainted.toString())
285 |
286 | println("Generating events into HBase...")
287 | clusterLabel.filter({ case (cluster,(group,tagged,hostname,flow),datum) => (cluster,group).equals(tainted) && tagged.equals(0) }).
288 | foreach{ case (cluster,(group,tagged,hostname,flow),datum) =>
289 | val event = new HogEvent(flow)
290 | event.data.put("centroids", centroids)
291 | event.data.put("vector", datum.toString)
292 | event.data.put("clusterLabel", "("+cluster.toString()+","+group+")")
293 | event.data.put("hostname", flow.get("flow:host_server_name")+"/"+flow.get("flow:http_url"))
294 | kmeansPopulate(event).alert()
295 | }
296 |
297 | /*
298 | (1 to 9).map{ k =>
299 | println("######################################################################################")
300 | println(f"Hosts from cluster $k%1s")
301 | clusterLabel.filter(_._1.equals(k)).foreach{ case (cluster,label,datum) =>
302 | print(label._3+"|")
303 | }
304 | println("")
305 | }
306 | */
307 | println("######################################################################################")
308 | println("######################################################################################")
309 | println("######################################################################################")
310 | println("######################################################################################")
311 |
312 | }
313 |
314 | if(taintedArray.isEmpty)
315 | {
316 | println("No flow matched!")
317 | }
318 |
319 | }
320 |
321 |
322 |
323 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/http/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 | /**
23 | * @author pa
24 | */
25 | package org.hogzilla.http {
26 |
27 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/initiate/HogInitiate.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.initiate
21 |
22 | import org.apache.spark._
23 | import org.hogzilla.hbase.HogHBaseRDD
24 | import org.apache.hadoop.hbase.util.Bytes
25 | import org.apache.hadoop.hbase.client.Get
26 | import org.apache.hadoop.hbase.client.Put
27 |
28 |
29 | object HogInitiate {
30 |
31 | val sensor_description="Hogzilla IDS"
32 | val sensor_hostname="hoghostname"
33 |
34 |
35 | def initiate(spark: SparkContext)
36 | {
37 |
38 | val get = new Get(Bytes.toBytes("1"))
39 |
40 | if(!HogHBaseRDD.hogzilla_sensor.exists(get))
41 | {
42 | val put = new Put(Bytes.toBytes("1"))
43 | put.add(Bytes.toBytes("sensor"), Bytes.toBytes("description"), Bytes.toBytes(sensor_description))
44 | put.add(Bytes.toBytes("sensor"), Bytes.toBytes("hostname"), Bytes.toBytes(sensor_hostname))
45 | HogHBaseRDD.hogzilla_sensor.put(put)
46 | }
47 |
48 | }
49 |
50 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/initiate/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 | /**
23 | * @author pa
24 | */
25 | package org.hogzilla.initiate {
26 |
27 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/prepare/HogPrepare.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.prepare
21 |
22 | import java.util.HashMap
23 | import java.util.Map
24 | import org.apache.hadoop.hbase.util.Bytes
25 | import org.apache.spark.rdd.RDD
26 | import org.hogzilla.hbase.HogHBaseRDD
27 | import org.apache.hadoop.hbase.client.RowMutations
28 | import org.apache.hadoop.hbase.client.Put
29 | import org.apache.hadoop.hbase.client.Delete
30 | import org.apache.hadoop.hbase.client.Scan
31 | import org.apache.hadoop.hbase.filter.Filter
32 | import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
33 | import org.apache.hadoop.hbase.filter.BinaryComparator
34 | import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp
35 | import org.apache.hadoop.hbase.filter.CompareFilter
36 |
37 |
38 | object HogPrepare {
39 |
40 | def prepare(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)])
41 | {
42 | println("Cleaning HBase...")
43 | cleanFlows(HogRDD)
44 | cleanSFlows(HogRDD)
45 | cleanAuthRecords(HogRDD)
46 | }
47 |
48 | def cleanFlows(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)])
49 | {
50 |
51 |
52 | /**
53 | * This is an illustration of the purge process in a fancy time-line.
54 | *
55 | *
56 | * Sup1-denseTime tSup1 tSup2 now
57 | * old flows | dense period | training dirty period | don't touch | future
58 | * ------------------------------------------------------------------------------------------------------------->
59 | * remove all remove all Remove flows w/o events
60 | * in par priority_id=1 in par
61 | *
62 | * You can change this, but the time below are reasonable
63 | *
64 | * tSup2 = now - timeUnit
65 | * tSup1 = now - 100*timeUnit
66 | * denseTime = 2*timeUnit
67 | *
68 | * 24h = 86400000
69 | * 12h = 43200000
70 | * 06h = 21600000
71 | */
72 |
73 | // Delete old data from HBase 86400 is one day. You should need even more, depends on your available resources.
74 |
75 | println("Cleaning hogzilla_flows...")
76 | val now = System.currentTimeMillis
77 |
78 | val timeUnit:Long = 21600000 /* maybe one day (86400000) or half (43200000) */
79 | val timeSuperior1 = now - (timeUnit*100)
80 | val timeSuperior2 = now - timeUnit
81 | val nSplits = 4 /* number of parallel tasks */
82 | val denseTime = timeUnit*4
83 | val deltaT1 = denseTime/nSplits
84 | val deltaT2 = (timeSuperior2-timeSuperior1)/nSplits
85 |
86 | println("Removing all older than "+timeSuperior1)
87 | val totalOld = (0 to nSplits).toList.par.map({ k =>
88 |
89 | val scan = new Scan
90 |
91 | if(k.equals(0))
92 | scan.setTimeRange(0, timeSuperior1-denseTime)
93 | else
94 | scan.setTimeRange(timeSuperior1-denseTime + deltaT1*(k-1), timeSuperior1-denseTime + deltaT1*k)
95 |
96 |
97 | println("TimeRange: "+scan.getTimeRange.toString())
98 |
99 | val scanner = HogHBaseRDD.hogzilla_flows.getScanner(scan).iterator()
100 |
101 | var counter=0;
102 | while(scanner.hasNext())
103 | {
104 | HogHBaseRDD.hogzilla_flows.delete(new Delete(scanner.next().getRow))
105 | counter+=1
106 | }
107 |
108 | counter
109 | }).reduce( (a,b) => a+b)
110 |
111 | println("Old rows dropped: "+totalOld)
112 |
113 | println("Removing flows w/o events priority 1, which are between "+timeSuperior1+" and "+timeSuperior2)
114 | val totalWOEvent = (1 to nSplits).toList.par.map({ k =>
115 |
116 | val scan = new Scan
117 | val filter = new SingleColumnValueFilter(Bytes.toBytes("event"),
118 | Bytes.toBytes("priority_id"),
119 | CompareOp.valueOf("NOT_EQUAL"),
120 | new BinaryComparator(Bytes.toBytes("1")))
121 |
122 | filter.setFilterIfMissing(false)
123 |
124 | scan.setTimeRange(timeSuperior1 + deltaT2*(k-1), timeSuperior1 + deltaT2*k)
125 |
126 | scan.setFilter(filter)
127 |
128 | println("TimeRange: "+scan.getTimeRange.toString())
129 |
130 | val scanner = HogHBaseRDD.hogzilla_flows.getScanner(scan).iterator()
131 |
132 | var counter=0;
133 | while(scanner.hasNext())
134 | {
135 | HogHBaseRDD.hogzilla_flows.delete(new Delete(scanner.next().getRow))
136 | counter+=1
137 | }
138 | counter
139 | }).reduce((a,b) => a+b)
140 |
141 | println("Flows without event priority 1 dropped: "+totalWOEvent)
142 |
143 | /*
144 |
145 |
146 | //scan.setStartRow(Bytes.toBytes("0"))
147 | //scan.setStopRow(Bytes.toBytes(time))
148 | *
149 | //THIS CODE HAS BUGS
150 |
151 | // TODO HZ: Update flow:inter_time_stddev and flow:packet_size_stddev using "flow:inter_time-%d","flow:packet_size-%d"
152 |
153 |
154 | val prepareRDD = HogRDD.
155 | map { case (id,result) => {
156 | val map: Map[String,String] = new HashMap[String,String]
157 | map.put("flow:id",Bytes.toString(id.get).toString())
158 | HogHBaseRDD.columns.foreach { column => map.put(column,
159 | Bytes.toString(result.getValue(Bytes.toBytes(column.split(":")(0).toString()),Bytes.toBytes(column.split(":")(1).toString()))))
160 | }
161 | map
162 | }
163 | }
164 |
165 | prepareRDD.filter(_.get("flow:packet_size_stddev").isEmpty()).map({
166 | map =>
167 | val avg=map.get("flow:avg_packet_size").toDouble
168 | var total:Double =0
169 | for(i <- 0 to map.get("flow:packets").toInt-1)
170 | {
171 | total=total+ (map.get("flow:packet_size-"+i.toString).toDouble-avg) * (map.get("flow:packet_size-"+i.toString).toDouble-avg)
172 | }
173 | // TODO HZ: Salve in HBase here
174 | // ID: map.get("flow:id")
175 | map.put("flow:packet_size_stddev",total.toString())
176 |
177 | val mutation = new RowMutations()
178 | val put = new Put(Bytes.toBytes(map.get("flow:id")))
179 | put.add(Bytes.toBytes("flow"), Bytes.toBytes("packet_size_stddev"), Bytes.toBytes(Math.sqrt(total)))
180 | mutation.add(put)
181 | HogHBaseRDD.hogzilla_flows.mutateRow(mutation)
182 | })
183 | */
184 |
185 | }
186 |
187 |
188 |
189 | def cleanSFlows(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)])
190 | {
191 |
192 |
193 | /**
194 | * This is an illustration of the purge process in a fancy time-line.
195 | *
196 | *
197 | * Sup1-denseTime tSup1 now
198 | * old flows | dense period | don't touch | future
199 | * ------------------------------------------------------------------------------------------------->
200 | * remove all remove all
201 | * in par
202 | *
203 | * You can change this, but the time below are reasonable
204 | *
205 | * tSup2 = now - timeUnit
206 | * tSup1 = now - 100*timeUnit
207 | * denseTime = 2*timeUnit
208 | *
209 | * 24h = 86400000
210 | * 12h = 43200000
211 | * 06h = 21600000
212 | */
213 |
214 | // Delete old data from HBase 86400 is one day. You should need even more, depends on your available resources.
215 |
216 | println("Cleaning hogzilla_sflows...")
217 | val now = System.currentTimeMillis
218 |
219 | val timeUnit:Long = 21600000 /* maybe one day (86400000) or half (43200000) or quarter (21600000) */
220 | val timeSuperior1 = now - timeUnit
221 | val nSplits = 5 /* number of parallel tasks */
222 | val denseTime = timeUnit*1
223 | val deltaT1 = denseTime/nSplits
224 | //val deltaT2 = (timeSuperior2-timeSuperior1)/nSplits
225 |
226 | println("Removing all older than "+timeSuperior1)
227 | val totalOld = (0 to nSplits).toList.par.map({ k =>
228 |
229 | val scan = new Scan
230 |
231 | if(k.equals(0))
232 | scan.setTimeRange(0, timeSuperior1-denseTime)
233 | else
234 | scan.setTimeRange(timeSuperior1-denseTime + deltaT1*(k-1), timeSuperior1-denseTime + deltaT1*k)
235 |
236 |
237 | println("TimeRange: "+scan.getTimeRange.toString())
238 |
239 | val scanner = HogHBaseRDD.hogzilla_sflows.getScanner(scan).iterator()
240 |
241 | var counter=0;
242 | while(scanner.hasNext())
243 | {
244 | HogHBaseRDD.hogzilla_sflows.delete(new Delete(scanner.next().getRow))
245 | counter+=1
246 | }
247 |
248 | counter
249 | }).reduce( (a,b) => a+b)
250 |
251 | println("Old rows dropped: "+totalOld)
252 |
253 |
254 | }
255 |
256 |
257 |
258 |
259 |
260 | def cleanAuthRecords(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)])
261 | {
262 |
263 |
264 |
265 | // Delete old data from HBase 86400 is one day. You should need even more, depends on your available resources.
266 |
267 | println("Cleaning hogzilla_authrecords...")
268 | val now = System.currentTimeMillis
269 |
270 | val timeUnit:Long = 21600000 /* maybe one day (86400000) or half (43200000) or quarter (21600000) */
271 | val timeSuperior1 = now - timeUnit
272 | val nSplits = 5 /* number of parallel tasks */
273 | val denseTime = timeUnit*1
274 | val deltaT1 = denseTime/nSplits
275 | //val deltaT2 = (timeSuperior2-timeSuperior1)/nSplits
276 |
277 | println("Removing all older than "+timeSuperior1)
278 | val totalOld = (0 to nSplits).toList.par.map({ k =>
279 |
280 | val scan = new Scan
281 |
282 | if(k.equals(0))
283 | scan.setTimeRange(0, timeSuperior1-denseTime)
284 | else
285 | scan.setTimeRange(timeSuperior1-denseTime + deltaT1*(k-1), timeSuperior1-denseTime + deltaT1*k)
286 |
287 |
288 | println("TimeRange: "+scan.getTimeRange.toString())
289 |
290 | val scanner = HogHBaseRDD.hogzilla_authrecords.getScanner(scan).iterator()
291 |
292 | var counter=0;
293 | while(scanner.hasNext())
294 | {
295 | HogHBaseRDD.hogzilla_authrecords.delete(new Delete(scanner.next().getRow))
296 | counter+=1
297 | }
298 |
299 | counter
300 | }).reduce( (a,b) => a+b)
301 |
302 | println("Old rows dropped: "+totalOld)
303 |
304 |
305 | }
306 |
307 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/prepare/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 | /**
23 | * @author pa
24 | */
25 | package object prepare {
26 |
27 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/sflow/HogSFlowHistograms.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 | /**
20 | * REFERENCES:
21 | * - http://ids-hogzilla.org/xxx/826000101
22 | */
23 |
24 |
25 | package org.hogzilla.sflow
26 |
27 | import java.net.InetAddress
28 | import scala.collection.mutable.HashMap
29 | import scala.collection.mutable.HashSet
30 | import scala.collection.mutable.Map
31 | import scala.math.floor
32 | import scala.math.log
33 | import org.apache.hadoop.hbase.client.Scan
34 | import org.apache.hadoop.hbase.util.Bytes
35 | import org.apache.spark.SparkContext
36 | import org.apache.spark.rdd.PairRDDFunctions
37 | import org.apache.spark.rdd.RDD
38 | import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
39 | import org.hogzilla.event.HogEvent
40 | import org.hogzilla.event.HogSignature
41 | import org.hogzilla.hbase.HogHBaseHistogram
42 | import org.hogzilla.hbase.HogHBaseRDD
43 | import org.hogzilla.hbase.HogHBaseReputation
44 | import org.hogzilla.histogram.Histograms
45 | import org.hogzilla.histogram.HogHistogram
46 | import org.hogzilla.util.HogFlow
47 | import org.apache.commons.math3.analysis.function.Min
48 | import org.apache.spark.mllib.linalg.Vectors
49 | import org.apache.spark.mllib.linalg.Vector
50 | import org.apache.spark.mllib.clustering.KMeans
51 | import org.hogzilla.hbase.HogHBaseCluster
52 | import org.hogzilla.cluster.HogClusterMember
53 |
54 |
55 | /**
56 | *
57 | */
58 | object HogSFlowHistograms {
59 |
60 |
61 | val signature = HogSignature(3,"HZ: Top talker identified" , 2,1,826001101,826).saveHBase() //1
62 |
63 |
64 |
65 | /**
66 | *
67 | *
68 | *
69 | */
70 | def run(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)],spark:SparkContext)
71 | {
72 |
73 | // XXX: Organize it!
74 | realRun(HogRDD,spark)
75 |
76 | }
77 |
78 |
79 | def isMyIP(ip:String,myNets:Set[String]):Boolean =
80 | {
81 | myNets.map ({ net => if( ip.startsWith(net) )
82 | { true }
83 | else{false}
84 | }).contains(true)
85 | }
86 |
87 |
88 | /**
89 | *
90 | *
91 | *
92 | */
93 | def realRun(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)],spark:SparkContext)
94 | {
95 |
96 | val myNetsTemp = new HashSet[String]
97 |
98 | val it = HogHBaseRDD.hogzilla_mynets.getScanner(new Scan()).iterator()
99 | while(it.hasNext())
100 | {
101 | myNetsTemp.add(Bytes.toString(it.next().getValue(Bytes.toBytes("net"),Bytes.toBytes("prefix"))))
102 | }
103 |
104 | val myNets:scala.collection.immutable.Set[String] = myNetsTemp.toSet
105 |
106 |
107 | val summary1: RDD[(String,Long,Set[Long],HashMap[String,Double])]
108 | = HogRDD
109 | .map ({ case (id,result) =>
110 |
111 | val histogramSize = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("size"))).toLong
112 | val histogramName = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")))
113 | val histMap = HogHBaseHistogram.mapByResult(result)._1
114 |
115 | val keys:Set[Long] = histMap.filter({ case (key,value) =>
116 |
117 |
118 | try {
119 | histogramName.startsWith("HIST01") & key.toDouble < 10000 & value>0.001
120 | } catch {
121 | case t: Throwable => // t.printStackTrace() // TODO: handle error
122 | histogramName.startsWith("HIST01") & value>0.001
123 | }
124 |
125 | })
126 | .keySet
127 | .map({ case key =>
128 | try {
129 | key.toDouble.toLong
130 | } catch {
131 | case t: Throwable => t.printStackTrace() // TODO: handle error
132 | 0L
133 | }
134 |
135 | })
136 | .toSet
137 | //"HIST01-"+myIP
138 |
139 | (histogramName,histogramSize,keys,histMap)
140 | })
141 | .filter({case (histogramName,histogramSize,keys,histMap) =>
142 | histogramSize>20 &
143 | isMyIP(histogramName.subSequence(histogramName.lastIndexOf("-")+1, histogramName.length()).toString,myNets)
144 | })
145 | .cache
146 |
147 | val summary1Count = summary1.count()
148 | if(summary1Count.equals(0))
149 | return
150 |
151 |
152 | val allKeys = summary1
153 | .map(_._3)
154 | .reduce(_++_)
155 | .toList
156 | .sorted
157 |
158 | val vectorSize = allKeys.size
159 |
160 | val summary: RDD[(String,Long,Set[Long],Vector)]
161 | = summary1
162 | .map({ case (histogramName,histogramSize,keys,histMap) =>
163 | val vector =
164 | Vectors.dense({ allKeys.map({ key =>
165 |
166 | if(keys.contains(key))
167 | histMap.get(key.toString).get*100D
168 | else
169 | 0D
170 | }).toArray
171 | })
172 |
173 | (histogramName,histogramSize,keys,vector)
174 | }).cache
175 |
176 | println("Keys: "+allKeys.mkString(","))
177 |
178 | //(5 to 30 by 5).toList.par
179 |
180 | val k=10
181 |
182 | println("Estimating model, k="+k)
183 | val kmeans = new KMeans()
184 | kmeans.setK(k)
185 | val model = kmeans.run(summary.map(_._4))
186 |
187 | println("Centroids("+k+"): \n"+model.clusterCenters.mkString(",\n"))
188 |
189 | val kmeansResult=summary.map({
190 | case (histogramName,histogramSize,keys,vector) =>
191 | val cluster = model.predict(vector)
192 | val centroid = model.clusterCenters(cluster)
193 |
194 | val distance=math.sqrt(vector.toArray.zip(centroid.toArray).map({case (p1,p2) => p1-p2}).map(p => p*p).sum)
195 |
196 | val memberIP=histogramName.subSequence(histogramName.lastIndexOf("-")+1, histogramName.length()).toString
197 |
198 | (cluster,(distance,histogramName,histogramSize,keys,vector,memberIP))
199 | }).cache
200 |
201 | val mean = kmeansResult.map(_._2._1).mean
202 | val stdDev = kmeansResult.map(_._2._1).stdev
203 | val max = kmeansResult.map(_._2._1).max
204 | val elementsPerCluster = kmeansResult.countByKey().toList.sortBy(_._1).toMap
205 |
206 | println("(Mean,StdDev,Max)("+k+"): "+mean+","+stdDev+","+max+".")
207 | println("Elements per cluster:\n"+elementsPerCluster.mkString(",\n"))
208 |
209 | // Delete saved clusters
210 | (0 to k by 1).toList.foreach { HogHBaseCluster.deleteCluster(_) }
211 |
212 |
213 |
214 | val members =
215 | kmeansResult
216 | .map({case (cluster,(distance,histogramName,histogramSize,keys,vector,memberIP)) =>
217 | (cluster,histogramName.subSequence(histogramName.lastIndexOf("-")+1, histogramName.length()).toString)
218 | }).cache().collect().toArray
219 |
220 |
221 | val grouped = kmeansResult.groupByKey()
222 | grouped
223 | .foreach({ case ((clusterIdx,iterator)) =>
224 |
225 | val centroid = model.clusterCenters(clusterIdx)
226 | val centroidMain = allKeys.zip(centroid.toArray)//.filter(_._2>10)
227 | val clusterSize = elementsPerCluster.get(clusterIdx).get
228 |
229 | if(centroidMain.filter(_._2>10).size>0 & clusterSize > 4)
230 | {
231 | println("################################################################\n"+
232 | "CLUSTER: "+clusterIdx+"\n"+
233 | "Centroid:\n"+centroidMain.filter(_._2>10).mkString(",")+"\n"+
234 | "clusterSize: "+clusterSize+"\n")
235 |
236 | HogHBaseCluster.saveCluster(clusterIdx,centroidMain,clusterSize,members.filter(_._1.equals(clusterIdx)).map({_._2}))
237 | }
238 | })
239 |
240 |
241 |
242 | /*
243 | * Save members
244 | *
245 | */
246 |
247 | kmeansResult
248 | .foreach({
249 | case (clusterIdx,(distance,histogramName,histogramSize,ports,vector,memberIP)) =>
250 |
251 | val clusterSize = elementsPerCluster.get(clusterIdx).get
252 | val centroidMain = allKeys.zip(model.clusterCenters(clusterIdx).toArray)//.filter(_._2>10)
253 |
254 | HogHBaseCluster.deleteClusterMember(memberIP)
255 |
256 | if(centroidMain.filter(_._2>10).size>0 & clusterSize > 4)
257 | {
258 | val frequency_vector = allKeys.zip(vector.toArray)
259 |
260 | val clusterMember = new HogClusterMember(clusterIdx, centroidMain, clusterSize, allKeys,
261 | memberIP, ports, frequency_vector, distance)
262 |
263 | HogHBaseCluster.saveClusterMember(clusterMember)
264 | }
265 | })
266 |
267 | /*
268 | grouped
269 | .foreach({ case ((clusterIdx,iterator)) =>
270 |
271 | val centroid = model.clusterCenters(clusterIdx)
272 | val centroidMain = allKeys.zip(centroid.toArray).filter(_._2>20)
273 | val clusterSize = elementsPerCluster.get(clusterIdx).get
274 |
275 | if(clusterSize>10 & centroidMain.size>0)
276 | {
277 | val group=iterator
278 | .map({ case (distance,histogramName,histogramSize,keys,vector) =>
279 | val hogAccessHistogram = HogHBaseHistogram
280 | .getHistogram("HIST02"
281 | +histogramName
282 | .subSequence(histogramName.lastIndexOf("-"), histogramName.length()))
283 | (distance,histogramName,histogramSize,keys,vector,hogAccessHistogram)
284 | })
285 |
286 |
287 | val groupHistogram =
288 | group
289 | .map({case (distance,histogramName,histogramSize,keys,vector,hogAccessHistogram) => hogAccessHistogram})
290 | .reduce({(hogAccessHistogram1,hogAccessHistogram2) =>
291 | Histograms.merge(hogAccessHistogram1,hogAccessHistogram2)
292 | })
293 |
294 | group
295 | .filter({ case (distance,histogramName,histogramSize,keys,vector,hogAccessHistogram) =>
296 | hogAccessHistogram.histSize>20
297 | })
298 | .map({ case (distance,histogramName,histogramSize,keys,vector,hogAccessHistogram) =>
299 |
300 | val groupHistogramMinus = Histograms.difference(groupHistogram,hogAccessHistogram)
301 |
302 | val atypical = Histograms.atypical(groupHistogramMinus.histMap, hogAccessHistogram.histMap)
303 |
304 | if(atypical.size>0)
305 | {
306 | println("################################################################\n"+
307 | "CLUSTER: "+clusterIdx+"\n"+
308 | "Centroid:\n"+centroidMain.mkString(",\n")+"\n"+
309 | "HistSize mean: "+(groupHistogram.histSize/clusterSize)+"\n"+
310 | "HistSize:"+hogAccessHistogram.histSize+"\n"+
311 | "Atypicals: "+atypical.mkString(",")+"\n"+
312 | "Histogram: "+hogAccessHistogram.histName+"\n"+
313 | hogAccessHistogram.histMap.mkString(",\n")+"\n"+
314 | "GroupHistogram:\n"+groupHistogram.histMap.mkString(",\n")+"\n")
315 | }
316 | })
317 |
318 | }
319 | })
320 | */
321 |
322 |
323 |
324 | }
325 |
326 |
327 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/sflow/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 | /**
23 | * @author pa
24 | */
25 | package object sflow {
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/src/org/hogzilla/snort/HogSnort.scala:
--------------------------------------------------------------------------------
1 |
2 | package org.hogzilla.snort
3 |
4 | import org.apache.hadoop.hbase.client.Put
5 | import org.apache.hadoop.hbase.util.Bytes
6 | import org.apache.spark.SparkContext
7 | import org.apache.spark.ml.Pipeline
8 | import org.apache.spark.ml.PipelineModel
9 | import org.apache.spark.ml.classification.RandomForestClassificationModel
10 | import org.apache.spark.ml.classification.RandomForestClassifier
11 | import org.apache.spark.ml.feature.StringIndexer
12 | import org.apache.spark.ml.feature.VectorAssembler
13 | import org.apache.spark.rdd.RDD
14 | import org.apache.spark.sql.Row
15 | import org.apache.spark.sql.SQLContext
16 | import org.apache.spark.sql.types.DataTypes
17 | import org.apache.spark.sql.types.StructField
18 | import org.apache.spark.sql.types.StructType
19 | import org.hogzilla.hbase.HogHBaseRDD
20 | import org.hogzilla.util.HogFeature
21 | import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
22 | import org.apache.spark.rdd.PairRDDFunctions
23 | import org.hogzilla.event.HogEvent
24 | import org.hogzilla.util.HogFlow
25 | import scala.collection.mutable.HashMap
26 | import org.hogzilla.event.HogSignature
27 |
28 |
29 |
30 |
31 | object HogSnort {
32 |
33 | val signature = ( HogSignature(3,"HZ: Suspicious flow detected by similarity with Snort alerts",2,1,826000001,826).saveHBase(), null )
34 |
35 |
36 |
37 | def run(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)],spark:SparkContext):String = {
38 |
39 | val excludedSigs:Array[(String,String)] = Array() // Ex. Array((1,1),(1,2))
40 | val (maxbin,maxdepth,mtry,malThreshold) = (500,30,90,80)
41 |
42 |
43 | val sqlContext = new SQLContext(spark)
44 | import sqlContext.implicits._
45 |
46 |
47 | val filteredColumns = HogHBaseRDD.columns.filter({_.useOnTrain}).toSeq
48 | val orderedColumns = filteredColumns.zip(0 to filteredColumns.size-1)
49 |
50 |
51 | val convertFn: PartialFunction[Any,Any] = {
52 | case (column:HogFeature,value:String) =>
53 | try {
54 |
55 | if(column.ctype.equals("char"))
56 | value
57 | else if(column.ctype.equals("u_int64_t"))
58 | value.toLong
59 | else
60 | value.toInt
61 | } catch {
62 | case t: Throwable =>
63 | //println("ERROR - column name: "+column.name)
64 | //t.printStackTrace()
65 |
66 | if(column.ctype.equals("char"))
67 | ""
68 | else if(column.ctype.equals("u_int64_t"))
69 | 0L
70 | else
71 | 0
72 |
73 | }
74 | }
75 |
76 |
77 | val labRDD2 = HogRDD.
78 | map { case (id,result) => {
79 |
80 | val rowId = Bytes.toString(id.get).toString()
81 |
82 | val ndpi_risk = Bytes.toString(result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("ndpi_risk")))
83 | val event_signature = Bytes.toString(result.getValue(Bytes.toBytes("event"),Bytes.toBytes("signature_id")))
84 | val event_generator = Bytes.toString(result.getValue(Bytes.toBytes("event"),Bytes.toBytes("generator_id")))
85 | val src_name = Bytes.toString(result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("src_name")))
86 | val dst_name = Bytes.toString(result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("dst_name")))
87 | val ctu_label = Bytes.toString(result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("ctu_label")))
88 | val duration = Bytes.toString(result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("flow_duration")))
89 |
90 | val actualclass = 1 // Not known at this time. Supposing all is an actual intrusion.
91 |
92 | val tuple = orderedColumns
93 | .map ({ case (column,index) =>
94 | val ret = result.getValue(Bytes.toBytes(column.getColumn1()),Bytes.toBytes(column.getColumn2()))
95 | val value = Bytes.toString(ret)
96 | if(value==null||value.equals(""))
97 | (column,"-1")
98 | else
99 | (column,value)
100 | })
101 |
102 |
103 | if(event_signature!=null && !event_signature.isEmpty()
104 | && event_generator!=null && !event_generator.isEmpty())
105 | (1,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk)
106 | else if(ndpi_risk!=null && ( ndpi_risk.equals("Safe") || ndpi_risk.equals("Fun") ) )
107 | (0,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk)
108 | else
109 | (-1,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) // discard
110 | }
111 | }
112 |
113 | val Signatures:PairRDDFunctions[(String,String),Long] = labRDD2
114 | .map({case (label,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) =>
115 | ((event_generator,event_signature),1L)
116 | })
117 |
118 | val Sarray = Signatures.reduceByKey(_+_).sortBy(_._2, false, 5).collect()
119 | val Sarray_size = Sarray.size
120 |
121 | // Print the found signatures. It may be useful to define what is FP and should be considered to be removed.
122 | Sarray.foreach({case ((gen,sig),count) => println(s"($gen,$sig) => $count")})
123 |
124 |
125 |
126 | val labRDD1 = labRDD2
127 | .map({case (label,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) =>
128 |
129 | if(event_signature!=null && !event_signature.isEmpty()
130 | && event_generator!=null && !event_generator.isEmpty() && !excludedSigs.contains((event_generator,event_signature)))
131 | (1,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk)
132 | else if(ndpi_risk!=null && ( ndpi_risk.equals("Safe") || ndpi_risk.equals("Fun") ) )
133 | (0,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk)
134 | else
135 | (-1,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) // discard
136 |
137 | })
138 | .map({case (label,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) =>
139 | Row.fromSeq({
140 | Seq(label,rowId,src_name,dst_name,actualclass)++tuple.collect(convertFn)
141 | })
142 |
143 | })
144 |
145 | val ccRDD = labRDD1.filter { x => x.get(0) == 1 }
146 | // val cleanRDD = sqlContext.sparkContext.parallelize(labRDD1.filter { x => x.get(0) == 0 }.takeSample(false, 12000, 123L))
147 | val cleanRDD = labRDD1.filter { x => x.get(0) == 0 }
148 |
149 | val trainRDD = ccRDD++cleanRDD
150 |
151 |
152 | println("0: "+trainRDD.filter { x => x.get(0) == 0 }.count+" 1:"+trainRDD.filter { x => x.get(0) == 1 }.count)
153 |
154 |
155 | val rawFeaturesStructsSeq = orderedColumns.map({case (column,index) =>
156 | if(column.ctype.equals("char"))
157 | StructField(column.name, DataTypes.StringType,true)
158 | else if(column.ctype.equals("u_int64_t"))
159 | StructField(column.name, DataTypes.LongType,false)
160 | else
161 | StructField(column.name, DataTypes.IntegerType,false)
162 | })
163 |
164 | val dataScheme = new StructType(Array(StructField("label", DataTypes.IntegerType,true),
165 | StructField("rowId", DataTypes.StringType,true),
166 | StructField("src_name", DataTypes.StringType,true),
167 | StructField("dst_name", DataTypes.StringType,true),
168 | StructField("actual_class", DataTypes.IntegerType,true))
169 | ++ rawFeaturesStructsSeq)
170 |
171 |
172 | val data = sqlContext.createDataFrame(trainRDD, dataScheme).cache()
173 | val dataSize = data.count
174 | println("Sample size: "+dataSize)
175 |
176 |
177 | val dataOut = sqlContext.createDataFrame(labRDD1.filter { x => x.get(0).toString.toInt < 0 }, dataScheme).cache()
178 | val dataSizeOut = dataOut.count
179 | println("Sample size Out (not labelled): "+dataSizeOut)
180 |
181 | val trainingData = data
182 |
183 | val stringIndexers = Array(new StringIndexer().setInputCol("label").setOutputCol("indexedLabel").setHandleInvalid("keep"))++
184 | Array(new StringIndexer().setInputCol("actual_class").setOutputCol("indexedActual_class"))++
185 | orderedColumns.filter({case (column,index) => column.ctype.equals("char") })
186 | .map({case (column,index) => new StringIndexer().setInputCol(column.name).setOutputCol(column.name+"CAT").setHandleInvalid("skip").fit(data) })
187 |
188 | val selectedFeaturesStringArray = orderedColumns.map({case (column,index) => if(column.ctype.equals("char")) column.name+"CAT" else column.name }).toArray
189 |
190 |
191 | val assembler = new VectorAssembler()
192 | .setInputCols(selectedFeaturesStringArray)
193 | .setOutputCol("rawFeatures")
194 |
195 |
196 |
197 | val rf = new RandomForestClassifier()
198 | .setLabelCol("indexedLabel").setFeaturesCol("rawFeatures").setProbabilityCol("probabilities")
199 | .setNumTrees(100).setImpurity("gini").setPredictionCol("prediction").setRawPredictionCol("rawPrediction")
200 | .setMaxBins(maxbin).setMaxDepth(maxdepth).setFeatureSubsetStrategy(mtry.toString)
201 | .setThresholds(Array((100D-malThreshold.toDouble)/100D,malThreshold.toDouble/100D,0D))
202 |
203 |
204 | val pipeline = new Pipeline().setStages(stringIndexers++Array(assembler,rf))
205 | val model = pipeline.fit(trainingData)
206 |
207 |
208 | // val predictionsOut = model.transform(dataOut.union(testData))
209 | val predictionsOut = model.transform(dataOut)
210 |
211 |
212 | // ALERT
213 | predictionsOut.filter( $"prediction" > 0 ) // prediction==1
214 | .select("src_name","dst_name","flow:src_port","flow:dst_port","prediction")
215 | .foreach({ row =>
216 | val (src,dst,src_port,dst_port,predicted) = (row.get(0),row.get(1),row.get(2),row.get(3),row.get(4))
217 |
218 | val flowMap: scala.collection.mutable.Map[String,String] = new HashMap[String,String]
219 | flowMap.put("flow:id",System.currentTimeMillis.toString)
220 | val event = new HogEvent(new HogFlow(flowMap,src.toString,dst.toString))
221 |
222 | event.title = f"HZ: Suspicious flow detected by similarity with Snort alerts"
223 |
224 | event.ports = ""
225 |
226 | event.text = "This flow was detected by Hogzilla based on its similarities with Snort alerts.\n\n"+
227 | s"$src:$src_port -> $dst:$dst_port"
228 |
229 | event.signature_id = signature._1.signature_id
230 | println("")
231 |
232 | })
233 |
234 | ""
235 |
236 | }
237 |
238 |
239 | }
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
--------------------------------------------------------------------------------
/src/org/hogzilla/snort/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2018 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 | /**
23 | * @author pa
24 | */
25 | package object snort {
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/src/org/hogzilla/util/HogConfig.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.util
21 |
22 | import java.security.MessageDigest
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import javax.xml.bind.DatatypeConverter
25 | import math._
26 | import com.typesafe.config.Config
27 | import scala.collection.mutable.HashSet
28 |
29 |
30 | /**
31 | * @author pa
32 | */
33 | object HogConfig {
34 |
35 |
36 |
37 | def get(config:Config,key:String,valueType:String,default:Any):Any =
38 | {
39 | if(config==null)
40 | return default
41 |
42 |
43 | try {
44 |
45 | val value = config.getString(key)
46 |
47 | if(value.isEmpty())
48 | return default // Return default value
49 |
50 | println(f"Configuration: $key => $value")
51 |
52 | if(valueType.equals("Int"))
53 | value.toInt
54 | else if(valueType.equals("Double"))
55 | value.toDouble
56 | else if(valueType.equals("Long"))
57 | value.toLong
58 | else if(valueType.equals("Set(Int)"))
59 | {
60 | val patternSet="Set\\(".r
61 | val patternSetEnd="\\)".r
62 |
63 | if(value.equals("Set()"))
64 | return Set()
65 |
66 | return (patternSetEnd replaceAllIn((patternSet replaceAllIn(value, "")),""))
67 | .split(",").map({x => x.toInt}).toSet
68 | }
69 | else if(valueType.equals("Set(String)"))
70 | {
71 | val patternSet="Set\\(".r
72 | val patternSetEnd="\\)".r
73 |
74 | if(value.equals("Set()"))
75 | return Set()
76 |
77 | return (patternSetEnd replaceAllIn((patternSet replaceAllIn(value, "")),""))
78 | .split(",").map({x => println(x.toString.trim()) ; x.toString.trim()}).toSet
79 | }
80 | else
81 | default // Create type first
82 |
83 | } catch {
84 | case t: Throwable => t.printStackTrace()
85 | println(f"Problem parsing $key . Check if it is ok. Using default value")
86 |
87 | return default
88 | }
89 |
90 | }
91 |
92 | def getInt(config:Config,key:String,default:Any):Int =
93 | {
94 | get(config,key,"Int",default).asInstanceOf[Int]
95 | }
96 |
97 | def getLong(config:Config,key:String,default:Any):Long =
98 | {
99 | get(config,key,"Long",default).asInstanceOf[Long]
100 | }
101 |
102 | def getDouble(config:Config,key:String,default:Any):Double =
103 | {
104 | get(config,key,"Double",default).asInstanceOf[Long]
105 | }
106 |
107 | def getSetInt(config:Config,key:String,default:Any):Set[Int] =
108 | {
109 | get(config,key,"Set(Int)",default).asInstanceOf[Set[Int]]
110 | }
111 |
112 | def getSetString(config:Config,key:String,default:Any):Set[String] =
113 | {
114 | get(config,key,"Set(String)",default).asInstanceOf[Set[String]]
115 | }
116 |
117 |
118 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/util/HogFeature.scala:
--------------------------------------------------------------------------------
1 | package org.hogzilla.util
2 |
3 | import scala.collection.immutable.HashMap
4 |
5 |
6 |
7 | /**
8 | * @author pa
9 | */
10 | case class HogFeature(name:String,ctype:String,useOnTrain:Boolean = true, isNumeric:Boolean = true /*or categorical*/, label:Int=0) {
11 |
12 | var index=0;
13 | var possibleCategoricalValues:Map[String,Int] = new HashMap;
14 |
15 | def getColumn1():String = {
16 | name.split(":")(0).toString()
17 | }
18 |
19 | def getColumn2():String = {
20 | name.split(":")(1).toString()
21 | }
22 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/util/HogFlow.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.util
21 |
22 | import scala.collection.mutable.Map
23 |
24 |
25 |
26 | /**
27 | * @author pa
28 | */
29 | case class HogFlow(map:Map[String,String],lower_ip:String,upper_ip:String) {
30 |
31 | def get(key:String):String =
32 | {
33 | map.get(key).get
34 | }
35 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/util/HogGeograph.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.util
21 |
22 | import java.security.MessageDigest
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import javax.xml.bind.DatatypeConverter
25 | import math._
26 |
27 |
28 | /**
29 | * @author pa
30 | */
31 | object HogGeograph {
32 |
33 | val R = 6372.8 //radius in km
34 |
35 | def haversineDistance(lat1:Double, lon1:Double, lat2:Double, lon2:Double):Double =
36 | {
37 | val dLat=(lat2 - lat1).toRadians
38 | val dLon=(lon2 - lon1).toRadians
39 |
40 | val a = pow(sin(dLat/2),2) + pow(sin(dLon/2),2) * cos(lat1.toRadians) * cos(lat2.toRadians)
41 | val c = 2 * asin(sqrt(a))
42 | R * c
43 | }
44 |
45 |
46 | def haversineDistanceFromStrings(coords1:String, coords2:String):Double =
47 | {
48 | try {
49 | val coordsDouble1 = coords1.split(",").map({ x => x.toDouble })
50 | val coordsDouble2 = coords2.split(",").map({ x => x.toDouble })
51 |
52 | haversineDistance(coordsDouble1(0),coordsDouble1(1),coordsDouble2(0),coordsDouble2(1))
53 | } catch {
54 | case t: Throwable => // t.printStackTrace()
55 | // Return a large distance
56 | 999999999D
57 | }
58 | }
59 |
60 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/util/HogStringUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla.util
21 |
22 | import java.security.MessageDigest
23 | import org.apache.hadoop.hbase.util.Bytes
24 | import javax.xml.bind.DatatypeConverter
25 |
26 |
27 | /**
28 | * @author pa
29 | */
30 | object HogStringUtils {
31 |
32 | def md5(string:String):String =
33 | {
34 | DatatypeConverter.printHexBinary(MessageDigest.getInstance("MD5").digest(string.getBytes))
35 | }
36 | }
--------------------------------------------------------------------------------
/src/org/hogzilla/util/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende
3 | *
4 | * This program is free software; you can redistribute it and/or modify
5 | * it under the terms of the GNU General Public License Version 2 as
6 | * published by the Free Software Foundation. You may not use, modify or
7 | * distribute this program under any other version of the GNU General
8 | * Public License.
9 | *
10 | * This program is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public License for more details.
14 | *
15 | * You should have received a copy of the GNU General Public License
16 | * along with this program; if not, write to the Free Software
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 | */
19 |
20 | package org.hogzilla
21 |
22 |
23 | package object util {
24 |
25 | }
26 |
--------------------------------------------------------------------------------