├── .gitignore ├── LICENSE ├── README.md ├── docs ├── HBaseCommands.txt └── HbaseQueryExamples.txt ├── scripts ├── auth │ ├── LOCATION.html │ ├── PHPMailerAutoload.php │ ├── UA.html │ ├── class.phpmailer.php │ ├── class.pop3.php │ ├── class.smtp.php │ ├── mail.awk │ └── sendmail.php ├── bsfl ├── conf │ ├── auth.conf │ └── sflow.conf ├── createReputationList.sh ├── create_conf.sh ├── hz-utils │ ├── genCnCList.sh │ ├── getReposList.sh │ ├── start-all.sh │ ├── start-dbupdates.sh │ ├── start-hogzilla.sh │ ├── start-pigtail.sh │ ├── start-sflow2hz.sh │ ├── stop-all.sh │ ├── stop-dbupdates.sh │ ├── stop-hogzilla.sh │ ├── stop-pigtail.sh │ ├── stop-sflow2hz.sh │ └── updateReputationList.php ├── install_hogzilla.sh ├── myFuncs └── run.sh └── src ├── Hogzilla.scala ├── HogzillaContinuous.scala ├── HogzillaStream.scala └── org └── hogzilla ├── auth ├── HogAuth.scala └── package.scala ├── cluster ├── HogClusterMember.scala └── package.scala ├── dns ├── HogDNS.scala └── package.scala ├── event ├── HogEvent.scala ├── HogSignature.scala └── package.scala ├── hbase ├── HogHBaseCluster.scala ├── HogHBaseHistogram.scala ├── HogHBaseInventory.scala ├── HogHBaseRDD.scala ├── HogHBaseReputation.scala └── package.scala ├── histogram ├── Histograms.scala ├── HogHistogram.scala └── package.scala ├── http ├── HogHTTP.scala └── package.scala ├── initiate ├── HogInitiate.scala └── package.scala ├── prepare ├── HogPrepare.scala └── package.scala ├── sflow ├── HogSFlow.scala ├── HogSFlowHistograms.scala └── package.scala ├── snort ├── HogSnort.scala └── package.scala └── util ├── HogConfig.scala ├── HogFeature.scala ├── HogFlow.scala ├── HogGeograph.scala ├── HogStringUtils.scala └── package.scala /.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .project 3 | .settings/ 4 | .cache-main 5 | bin/ 6 | lib/ 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### 0. SUMMARY 2 | 3 | ### 1. COPYRIGHT 4 | 5 | ### 2. CONTACT 6 | 7 | -------------------------------------------------------------------------------- /docs/HBaseCommands.txt: -------------------------------------------------------------------------------- 1 | 2 | create 'hogzilla_flows','flow','event' 3 | create 'hogzilla_sflows','flow' 4 | create 'hogzilla_events','event' 5 | create 'hogzilla_sensor','sensor' 6 | create 'hogzilla_signatures','signature' 7 | create 'hogzilla_mynets','net' 8 | create 'hogzilla_reputation','rep' 9 | create 'hogzilla_histograms','info','values','labels' 10 | create 'hogzilla_clusters','info' 11 | create 'hogzilla_cluster_members','info','member','cluster' 12 | create 'hogzilla_inventory','info' 13 | create 'hogzilla_authrecords','auth' 14 | 15 | put 'hogzilla_mynets', '10.', 'net:description', 'Intranet 1' 16 | put 'hogzilla_mynets', '10.', 'net:prefix', '10.' 17 | 18 | put 'hogzilla_reputation', '10.1.1.1', 'rep:description', 'SMTP Server' 19 | put 'hogzilla_reputation', '10.1.1.1', 'rep:ip', '10.1.1.1' 20 | put 'hogzilla_reputation', '10.1.1.1', 'rep:list', 'MX' 21 | put 'hogzilla_reputation', '10.1.1.1', 'rep:list_type', 'whitelist' 22 | 23 | 24 | put 'hogzilla_reputation', '10.1.1.2', 'rep:description', 'Big Talker 1' 25 | put 'hogzilla_reputation', '10.1.1.2', 'rep:ip', '10.1.1.1' 26 | put 'hogzilla_reputation', '10.1.1.2', 'rep:list', 'TTalker' 27 | put 'hogzilla_reputation', '10.1.1.2', 'rep:list_type', 'whitelist' 28 | 29 | put 'hogzilla_reputation', '10.1.111.', 'rep:description', 'DMZ' 30 | put 'hogzilla_reputation', '10.1.111.', 'rep:ip', '10.1.111.' 31 | put 'hogzilla_reputation', '10.1.111.', 'rep:list', 'TTalker' 32 | put 'hogzilla_reputation', '10.1.111.', 'rep:list_type', 'whitelist' 33 | 34 | -------------------------------------------------------------------------------- /docs/HbaseQueryExamples.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | scan 'hogzilla_flows', {COLUMNS => ['flow:lower_name','flow:upper_name','flow:detected_protocol','flow:bytes','flow:host_server_name','flow:packets'], 7 | FILTER => "(SingleColumnValueFilter('flow','detected_protocol',=,'regexstring:.*5/DNS.*',true,true)) AND (SingleColumnValueFilter('flow','bytes',=,'regexstring:......*',true,true))", 8 | LIMIT => 10} 9 | 10 | 11 | 12 | scan 'hogzilla_flows', {COLUMNS => ['flow:lower_name','flow:upper_name','flow:detected_protocol','flow:bytes','flow:host_server_name','flow:packets','event:signature_id'], 13 | FILTER => "(SingleColumnValueFilter('flow','detected_protocol',=,'regexstring:.*5.178/DNS.Amazon.*',true,true)) AND (SingleColumnValueFilter('flow','bytes',=,'regexstring:......*',true,true))", 14 | LIMIT => 10} 15 | 16 | scan 'hogzilla_flows', {COLUMNS => ['flow:lower_name','flow:upper_name','flow:detected_protocol','flow:lower_port', 17 | 'flow:bytes','flow:host_server_name','flow:packets','event:signature_id', 18 | 'flow:dns_num_ret_code'], 19 | FILTER => "(SingleColumnValueFilter('flow','lower_port',=,'regexstring:53',true,true)) AND (SingleColumnValueFilter('flow','dns_num_ret_code',!=,'regexstring:xyz',true,true))", 20 | LIMIT => 10} 21 | 22 | scan 'hogzilla_flows', {COLUMNS => ['flow:lower_name','flow:upper_name','flow:detected_protocol','flow:lower_port', 23 | 'flow:bytes','flow:host_server_name','flow:packets','event:signature_id', 24 | 'flow:dns_ret_code','flow:packets_without_payload'], 25 | FILTER => "(SingleColumnValueFilter('flow','lower_port',=,'regexstring:53',true,true))", 26 | LIMIT => 10} 27 | 28 | COLUMNS => ['flow:lower_name','flow:upper_name','flow:detected_protocol','flow:lower_port', 29 | 'flow:bytes','flow:host_server_name','flow:packets','event:signature_id', 30 | 'flow:dns_ret_code','flow:packets_without_payload','flow:host_server_name'], 31 | 32 | scan 'hogzilla_flows', { 33 | FILTER => "(SingleColumnValueFilter('flow','lower_port',=,'regexstring:53',true,true)) AND (SingleColumnValueFilter('flow','host_server_name',=,'regexstring:msnsolution.nicaze.net',true,true))", 34 | LIMIT => 1} 35 | 36 | scan 'hogzilla_flows', { 37 | FILTER => "(SingleColumnValueFilter('flow','lower_port',=,'regexstring:53',true,true)) AND (SingleColumnValueFilter('flow','detected_protocol',=,'regexstring:.*5/DNS.*',true,true))", 38 | LIMIT => 1} 39 | 40 | scan 'hogzilla_flows',{LIMIT => 10, FILTER => "(SingleColumnValueFilter('flow','upper_port',=,'regexstring:80',true,true) OR SingleColumnValueFilter('flow','lower_port',=,'regexstring:80',true,true)) AND (SingleColumnValueFilter('flow','upper_name',=,'regexstring:189.9.39.12',true,true) OR SingleColumnValueFilter('flow','lower_name',=,'regexstring:189.9.39.12',true,true)) AND SingleColumnValueFilter('event','priority',=,'regexstring:1',true,true)" } 41 | 42 | scan 'hogzilla_sflows',{LIMIT => 10, FILTER => "(SingleColumnValueFilter('flow','tcpFlags',=,'regexstring:0x12',true,true) AND SingleColumnValueFilter('flow','srcIP',=,'regexstring:10.1.1',true,true)) " } 43 | -------------------------------------------------------------------------------- /scripts/auth/LOCATION.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |

New login from [LOCATION]

8 | 9 | Dear user, 10 | 11 | You account [USERNAME] .... 12 | 13 | UA: [UA] 14 | Location: [LOCATION] 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /scripts/auth/PHPMailerAutoload.php: -------------------------------------------------------------------------------- 1 | 8 | * @author Jim Jagielski (jimjag) 9 | * @author Andy Prevost (codeworxtech) 10 | * @author Brent R. Matzelle (original founder) 11 | * @copyright 2013 Marcus Bointon 12 | * @copyright 2010 - 2012 Jim Jagielski 13 | * @copyright 2004 - 2009 Andy Prevost 14 | * @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License 15 | * @note This program is distributed in the hope that it will be useful - WITHOUT 16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 | * FITNESS FOR A PARTICULAR PURPOSE. 18 | */ 19 | 20 | /** 21 | * PHPMailer SPL autoloader. 22 | * @param string $classname The name of the class to load 23 | */ 24 | function PHPMailerAutoload($classname) 25 | { 26 | //Can't use __DIR__ as it's only in PHP 5.3+ 27 | $filename = dirname(__FILE__).DIRECTORY_SEPARATOR.'class.'.strtolower($classname).'.php'; 28 | if (is_readable($filename)) { 29 | require $filename; 30 | } 31 | } 32 | 33 | if (version_compare(PHP_VERSION, '5.1.2', '>=')) { 34 | //SPL autoloading was introduced in PHP 5.1.2 35 | if (version_compare(PHP_VERSION, '5.3.0', '>=')) { 36 | spl_autoload_register('PHPMailerAutoload', true, true); 37 | } else { 38 | spl_autoload_register('PHPMailerAutoload'); 39 | } 40 | } else { 41 | //Fall back to traditional autoload for old PHP versions 42 | function __autoload($classname) 43 | { 44 | PHPMailerAutoload($classname); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /scripts/auth/UA.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |

New login from [LOCATION]

8 | 9 | Dear user, 10 | 11 | You account [USERNAME] .... 12 | 13 | UA: [UA] 14 | Location: [LOCATION] 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /scripts/auth/class.pop3.php: -------------------------------------------------------------------------------- 1 | 9 | * @author Jim Jagielski (jimjag) 10 | * @author Andy Prevost (codeworxtech) 11 | * @author Brent R. Matzelle (original founder) 12 | * @copyright 2013 Marcus Bointon 13 | * @copyright 2010 - 2012 Jim Jagielski 14 | * @copyright 2004 - 2009 Andy Prevost 15 | * @license http://www.gnu.org/copyleft/lesser.html GNU Lesser General Public License 16 | * @note This program is distributed in the hope that it will be useful - WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 | * FITNESS FOR A PARTICULAR PURPOSE. 19 | */ 20 | 21 | /** 22 | * PHPMailer POP-Before-SMTP Authentication Class. 23 | * Specifically for PHPMailer to use for RFC1939 POP-before-SMTP authentication. 24 | * Does not support APOP. 25 | * @package PHPMailer 26 | * @author Richard Davey (original author) 27 | * @author Marcus Bointon (coolbru) 28 | * @author Jim Jagielski (jimjag) 29 | * @author Andy Prevost (codeworxtech) 30 | */ 31 | 32 | class POP3 33 | { 34 | /** 35 | * The POP3 PHPMailer Version number. 36 | * @type string 37 | * @access public 38 | */ 39 | public $Version = '5.2.7'; 40 | 41 | /** 42 | * Default POP3 port number. 43 | * @type int 44 | * @access public 45 | */ 46 | public $POP3_PORT = 110; 47 | 48 | /** 49 | * Default timeout in seconds. 50 | * @type int 51 | * @access public 52 | */ 53 | public $POP3_TIMEOUT = 30; 54 | 55 | /** 56 | * POP3 Carriage Return + Line Feed. 57 | * @type string 58 | * @access public 59 | * @deprecated Use the constant instead 60 | */ 61 | public $CRLF = "\r\n"; 62 | 63 | /** 64 | * Debug display level. 65 | * Options: 0 = no, 1+ = yes 66 | * @type int 67 | * @access public 68 | */ 69 | public $do_debug = 0; 70 | 71 | /** 72 | * POP3 mail server hostname. 73 | * @type string 74 | * @access public 75 | */ 76 | public $host; 77 | 78 | /** 79 | * POP3 port number. 80 | * @type int 81 | * @access public 82 | */ 83 | public $port; 84 | 85 | /** 86 | * POP3 Timeout Value in seconds. 87 | * @type int 88 | * @access public 89 | */ 90 | public $tval; 91 | 92 | /** 93 | * POP3 username 94 | * @type string 95 | * @access public 96 | */ 97 | public $username; 98 | 99 | /** 100 | * POP3 password. 101 | * @type string 102 | * @access public 103 | */ 104 | public $password; 105 | 106 | /** 107 | * Resource handle for the POP3 connection socket. 108 | * @type resource 109 | * @access private 110 | */ 111 | private $pop_conn; 112 | 113 | /** 114 | * Are we connected? 115 | * @type bool 116 | * @access private 117 | */ 118 | private $connected; 119 | 120 | /** 121 | * Error container. 122 | * @type array 123 | * @access private 124 | */ 125 | private $error; 126 | 127 | /** 128 | * Line break constant 129 | */ 130 | const CRLF = "\r\n"; 131 | 132 | /** 133 | * Constructor. 134 | * @access public 135 | */ 136 | public function __construct() 137 | { 138 | $this->pop_conn = 0; 139 | $this->connected = false; 140 | $this->error = null; 141 | } 142 | 143 | /** 144 | * Simple static wrapper for all-in-one POP before SMTP 145 | * @param $host 146 | * @param bool $port 147 | * @param bool $tval 148 | * @param string $username 149 | * @param string $password 150 | * @param int $debug_level 151 | * @return bool 152 | */ 153 | public static function popBeforeSmtp( 154 | $host, 155 | $port = false, 156 | $tval = false, 157 | $username = '', 158 | $password = '', 159 | $debug_level = 0 160 | ) { 161 | $pop = new POP3; 162 | return $pop->authorise($host, $port, $tval, $username, $password, $debug_level); 163 | } 164 | 165 | /** 166 | * Authenticate with a POP3 server. 167 | * A connect, login, disconnect sequence 168 | * appropriate for POP-before SMTP authorisation. 169 | * @access public 170 | * @param string $host 171 | * @param bool|int $port 172 | * @param bool|int $tval 173 | * @param string $username 174 | * @param string $password 175 | * @param int $debug_level 176 | * @return bool 177 | */ 178 | public function authorise($host, $port = false, $tval = false, $username = '', $password = '', $debug_level = 0) 179 | { 180 | $this->host = $host; 181 | // If no port value provided, use default 182 | if ($port === false) { 183 | $this->port = $this->POP3_PORT; 184 | } else { 185 | $this->port = $port; 186 | } 187 | // If no timeout value provided, use default 188 | if ($tval === false) { 189 | $this->tval = $this->POP3_TIMEOUT; 190 | } else { 191 | $this->tval = $tval; 192 | } 193 | $this->do_debug = $debug_level; 194 | $this->username = $username; 195 | $this->password = $password; 196 | // Refresh the error log 197 | $this->error = null; 198 | // connect 199 | $result = $this->connect($this->host, $this->port, $this->tval); 200 | if ($result) { 201 | $login_result = $this->login($this->username, $this->password); 202 | if ($login_result) { 203 | $this->disconnect(); 204 | return true; 205 | } 206 | } 207 | // We need to disconnect regardless of whether the login succeeded 208 | $this->disconnect(); 209 | return false; 210 | } 211 | 212 | /** 213 | * Connect to a POP3 server. 214 | * @access public 215 | * @param string $host 216 | * @param bool|int $port 217 | * @param integer $tval 218 | * @return boolean 219 | */ 220 | public function connect($host, $port = false, $tval = 30) 221 | { 222 | // Are we already connected? 223 | if ($this->connected) { 224 | return true; 225 | } 226 | 227 | //On Windows this will raise a PHP Warning error if the hostname doesn't exist. 228 | //Rather than suppress it with @fsockopen, capture it cleanly instead 229 | set_error_handler(array($this, 'catchWarning')); 230 | 231 | // connect to the POP3 server 232 | $this->pop_conn = fsockopen( 233 | $host, // POP3 Host 234 | $port, // Port # 235 | $errno, // Error Number 236 | $errstr, // Error Message 237 | $tval 238 | ); // Timeout (seconds) 239 | // Restore the error handler 240 | restore_error_handler(); 241 | // Does the Error Log now contain anything? 242 | if ($this->error && $this->do_debug >= 1) { 243 | $this->displayErrors(); 244 | } 245 | // Did we connect? 246 | if ($this->pop_conn == false) { 247 | // It would appear not... 248 | $this->error = array( 249 | 'error' => "Failed to connect to server $host on port $port", 250 | 'errno' => $errno, 251 | 'errstr' => $errstr 252 | ); 253 | if ($this->do_debug >= 1) { 254 | $this->displayErrors(); 255 | } 256 | return false; 257 | } 258 | 259 | // Increase the stream time-out 260 | // Check for PHP 4.3.0 or later 261 | if (version_compare(phpversion(), '5.0.0', 'ge')) { 262 | stream_set_timeout($this->pop_conn, $tval, 0); 263 | } else { 264 | // Does not work on Windows 265 | if (substr(PHP_OS, 0, 3) !== 'WIN') { 266 | socket_set_timeout($this->pop_conn, $tval, 0); 267 | } 268 | } 269 | 270 | // Get the POP3 server response 271 | $pop3_response = $this->getResponse(); 272 | // Check for the +OK 273 | if ($this->checkResponse($pop3_response)) { 274 | // The connection is established and the POP3 server is talking 275 | $this->connected = true; 276 | return true; 277 | } 278 | return false; 279 | } 280 | 281 | /** 282 | * Log in to the POP3 server. 283 | * Does not support APOP (RFC 2828, 4949). 284 | * @access public 285 | * @param string $username 286 | * @param string $password 287 | * @return boolean 288 | */ 289 | public function login($username = '', $password = '') 290 | { 291 | if ($this->connected == false) { 292 | $this->error = 'Not connected to POP3 server'; 293 | 294 | if ($this->do_debug >= 1) { 295 | $this->displayErrors(); 296 | } 297 | } 298 | if (empty($username)) { 299 | $username = $this->username; 300 | } 301 | if (empty($password)) { 302 | $password = $this->password; 303 | } 304 | 305 | // Send the Username 306 | $this->sendString("USER $username" . self::CRLF); 307 | $pop3_response = $this->getResponse(); 308 | if ($this->checkResponse($pop3_response)) { 309 | // Send the Password 310 | $this->sendString("PASS $password" . self::CRLF); 311 | $pop3_response = $this->getResponse(); 312 | if ($this->checkResponse($pop3_response)) { 313 | return true; 314 | } 315 | } 316 | return false; 317 | } 318 | 319 | /** 320 | * Disconnect from the POP3 server. 321 | * @access public 322 | */ 323 | public function disconnect() 324 | { 325 | $this->sendString('QUIT'); 326 | //The QUIT command may cause the daemon to exit, which will kill our connection 327 | //So ignore errors here 328 | @fclose($this->pop_conn); 329 | } 330 | 331 | /** 332 | * Get a response from the POP3 server. 333 | * $size is the maximum number of bytes to retrieve 334 | * @param integer $size 335 | * @return string 336 | * @access private 337 | */ 338 | private function getResponse($size = 128) 339 | { 340 | $r = fgets($this->pop_conn, $size); 341 | if ($this->do_debug >= 1) { 342 | echo "Server -> Client: $r"; 343 | } 344 | return $r; 345 | } 346 | 347 | /** 348 | * Send raw data to the POP3 server. 349 | * @param string $string 350 | * @return integer 351 | * @access private 352 | */ 353 | private function sendString($string) 354 | { 355 | if ($this->pop_conn) { 356 | if ($this->do_debug >= 2) { //Show client messages when debug >= 2 357 | echo "Client -> Server: $string"; 358 | } 359 | return fwrite($this->pop_conn, $string, strlen($string)); 360 | } 361 | return 0; 362 | } 363 | 364 | /** 365 | * Checks the POP3 server response. 366 | * Looks for for +OK or -ERR. 367 | * @param string $string 368 | * @return boolean 369 | * @access private 370 | */ 371 | private function checkResponse($string) 372 | { 373 | if (substr($string, 0, 3) !== '+OK') { 374 | $this->error = array( 375 | 'error' => "Server reported an error: $string", 376 | 'errno' => 0, 377 | 'errstr' => '' 378 | ); 379 | if ($this->do_debug >= 1) { 380 | $this->displayErrors(); 381 | } 382 | return false; 383 | } else { 384 | return true; 385 | } 386 | } 387 | 388 | /** 389 | * Display errors if debug is enabled. 390 | * @access private 391 | */ 392 | private function displayErrors() 393 | { 394 | echo '
';
395 |         foreach ($this->error as $single_error) {
396 |             print_r($single_error);
397 |         }
398 |         echo '
'; 399 | } 400 | 401 | /** 402 | * POP3 connection error handler. 403 | * @param integer $errno 404 | * @param string $errstr 405 | * @param string $errfile 406 | * @param integer $errline 407 | * @access private 408 | */ 409 | private function catchWarning($errno, $errstr, $errfile, $errline) 410 | { 411 | $this->error[] = array( 412 | 'error' => "Connecting to the POP3 server raised a PHP warning: ", 413 | 'errno' => $errno, 414 | 'errstr' => $errstr, 415 | 'errfile' => $errfile, 416 | 'errline' => $errline 417 | ); 418 | } 419 | } 420 | -------------------------------------------------------------------------------- /scripts/auth/mail.awk: -------------------------------------------------------------------------------- 1 | function ltrim(s) { sub(/^[ \t\r\n]+/, "", s); return s } 2 | function rtrim(s) { sub(/[ \t\r\n]+$/, "", s); return s } 3 | function trim(s) { return rtrim(ltrim(s)); } 4 | 5 | $1 ~ /^Username/ { 6 | 7 | username = $2; 8 | getline; 9 | tag=$2 10 | content = gensub(/(.*):(.*)/,"\\2","g",$0); 11 | getline; getline; getline; 12 | location = gensub(/.*Location: (.*), UA:.*/,"\\1","g",$0); 13 | UA = gensub(/.*UA: (.*), AuthMethod:.*/,"\\1","g",$0); 14 | DATE = gensub(/.*TIME: (.*), .*/,"\\1","g",$0); 15 | IP = gensub(/^(.*) =.*/,"\\1","g",$0); 16 | IP = gensub(/(.*)\(.*\)/,"\\1","g",IP); 17 | 18 | 19 | if(tag ~ /Cities/){ 20 | type="LOCATION" 21 | } else 22 | if(tag ~ /UserAgents/){ 23 | type="UA" 24 | } 25 | print type"|"username"|"trim(DATE)"|"trim(IP)"|"trim(location)"|"trim(UA); 26 | 27 | 28 | } 29 | -------------------------------------------------------------------------------- /scripts/auth/sendmail.php: -------------------------------------------------------------------------------- 1 | IsSMTP(); 35 | $mail->Host = "$SMTP"; 36 | $mail->Port = "$PORTASMTP"; 37 | 38 | $mail->SetFrom($EMAILREMETENTE, $NOMEREMETENTE); 39 | //$mail->AddReplyTo($REPLYTO,$REPLYTONOME); 40 | $mail->Subject = $ASSUNTO; 41 | 42 | $mail->AltBody = "use a HTML client"; // optional, comment out and test 43 | $mail->CharSet = "UTF-8"; 44 | $mail->MsgHTML($body); 45 | //$mail->AddEmbeddedImage('cab.jpg', 'cab'); 46 | //$mail->AddAttachment("imagem/phpmailer_mini.gif"); // attachment 47 | 48 | $mail->AddAddress($to); 49 | if(!$mail->Send()) { 50 | //echo "Mailer Error: " . $mail->ErrorInfo; 51 | //print_r($mail); 52 | } else { 53 | //echo "Message sent! to $to\n"; 54 | } 55 | 56 | } 57 | ?> 58 | -------------------------------------------------------------------------------- /scripts/bsfl: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # ########################### 3 | # Bash Shell Function Library 4 | # ########################### 5 | # 6 | # Author: Louwrentius 7 | # Contributions by: Jani Hurskainen 8 | # 9 | # Copyright © 2010 10 | # 11 | # Released under the curren GPL version. 12 | # 13 | # Description: 14 | # 15 | # This is a shell script library. It contains functions that can be called by 16 | # programs that include (source) this library. 17 | # 18 | # By simply sourcing this library, you can use all available functions as 19 | # documented on the projects page. 20 | # 21 | # 22 | 23 | BSFL_VERSION="2.00-beta-2" 24 | 25 | # 26 | # Do not edit this file. Just source it into your script 27 | # and override the variables to change their value. 28 | # 29 | 30 | init () { 31 | 32 | # 33 | # Debug mode shows more verbose output to screen and log files. 34 | # Value: yes or no (y / n) 35 | # 36 | DEBUG=no 37 | 38 | # 39 | # Syslog style log messages 40 | # 41 | if ! defined LOGDATEFORMAT 42 | then 43 | LOGDATEFORMAT="%b %e %H:%M:%S" 44 | fi 45 | if ! defined LOG_FILE 46 | then 47 | LOG_FILE=$0.log 48 | fi 49 | 50 | # 51 | # Enable / disable logging to a file 52 | # Value: yes or no (y / n) 53 | # 54 | if ! defined LOG_ENABLED 55 | then 56 | LOG_ENABLED=no 57 | fi 58 | if ! defined SYSLOG_ENABLED 59 | then 60 | SYSLOG_ENABLED=no 61 | fi 62 | if ! defined SYSLOG_TAG 63 | then 64 | SYSLOG_TAG=$0 65 | fi 66 | 67 | # 68 | # Use colours in output. 69 | # 70 | RED="tput setaf 1" 71 | GREEN="tput setaf 2" 72 | YELLOW="tput setaf 3" 73 | BLUE="tput setaf 4" 74 | MAGENTA="tput setaf 5" 75 | CYAN="tput setaf 6" 76 | LIGHT_BLUE="$CYAN" 77 | BOLD="tput bold" 78 | DEFAULT="tput sgr0" 79 | 80 | RED_BG="tput setab 1" 81 | GREEN_BG="tput setab 2" 82 | YELLOW_BG="tput setab 3" 83 | BLUE_BG="tput setab 4" 84 | MAGENTA_BG="tput setab 5" 85 | CYAN_BG="tput setab 6" 86 | 87 | # 88 | # Bug fix for Bash, parsing exclamation mark. 89 | # 90 | set +o histexpand 91 | # 92 | # returns 0 if a variable is defined (set) 93 | # returns 1 if a variable is unset 94 | # 95 | 96 | } 97 | 98 | function defined { 99 | [[ ${!1-X} == ${!1-Y} ]] 100 | } 101 | 102 | # 103 | # returns 0 if a variable is defined (set) and value's length > 0 104 | # returns 1 otherwise 105 | # 106 | function has_value { 107 | if defined $1; then 108 | if [[ -n ${!1} ]]; then 109 | return 0 110 | fi 111 | fi 112 | return 1 113 | } 114 | 115 | # 116 | # returns 0 if a directory exists 117 | # returns 1 otherwise 118 | # 119 | function directory_exists { 120 | if [[ -d "$1" ]]; then 121 | return 0 122 | fi 123 | return 1 124 | } 125 | 126 | # 127 | # returns 0 if a (regular) file exists 128 | # returns 1 otherwise 129 | # 130 | function file_exists { 131 | if [[ -f "$1" ]]; then 132 | return 0 133 | fi 134 | return 1 135 | } 136 | 137 | # 138 | # returns lowercase string 139 | # 140 | function tolower { 141 | echo "$1" | tr '[:upper:]' '[:lower:]' 142 | } 143 | 144 | # 145 | # returns uppercase string 146 | # 147 | function toupper { 148 | echo "$1" | tr '[:lower:]' '[:upper:]' 149 | } 150 | 151 | # 152 | # Only returns the first part of a string, delimited by tabs or spaces 153 | # 154 | function trim { 155 | echo $1 156 | } 157 | 158 | # 159 | # Dummy function to provide usage instructions. 160 | # Override this function if required. 161 | # 162 | show_usage () { 163 | 164 | MESSAGE="$1" 165 | echo "$MESSAGE" 166 | exit 1 167 | } 168 | 169 | # 170 | # Checks if a variable is set to "y" or "yes". 171 | # Usefull for detecting if a configurable option is set or not. 172 | # 173 | option_enabled () { 174 | 175 | VAR="$1" 176 | VAR_VALUE=$(eval echo \$$VAR) 177 | if [[ "$VAR_VALUE" == "y" ]] || [[ "$VAR_VALUE" == "yes" ]] 178 | then 179 | return 0 180 | else 181 | return 1 182 | fi 183 | } 184 | 185 | # 186 | # The log funcion just puts a string into a file, prepended with a date & time in 187 | # syslog format. 188 | # 189 | 190 | log2syslog () { 191 | 192 | if option_enabled SYSLOG_ENABLED 193 | then 194 | MESSAGE="$1" 195 | logger -t "$SYSLOG_TAG" " $MESSAGE" #The space is not a typo!" 196 | fi 197 | } 198 | 199 | # 200 | # This function writes messages to a log file and/or syslog 201 | # The only argument is a message that has to be logged. 202 | # 203 | 204 | log () { 205 | 206 | if option_enabled LOG_ENABLED || option_enabled SYSLOG_ENABLED 207 | then 208 | LOG_MESSAGE="$1" 209 | DATE=`date +"$LOGDATEFORMAT"` 210 | 211 | if has_value LOG_MESSAGE 212 | then 213 | LOG_STRING="$DATE $LOG_MESSAGE" 214 | else 215 | LOG_STRING="$DATE -- empty log message, no input received --" 216 | fi 217 | 218 | if option_enabled LOG_ENABLED 219 | then 220 | echo "$LOG_STRING" >> "$LOG_FILE" 221 | fi 222 | 223 | if option_enabled SYSLOG_ENABLED 224 | then 225 | # 226 | # Syslog already prepends a date/time stamp so only the message 227 | # is logged. 228 | # 229 | log2syslog "$LOG_MESSAGE" 230 | fi 231 | fi 232 | } 233 | 234 | 235 | # 236 | # This function basically replaces the 'echo' function in bash scripts. 237 | # The added functionality over echo is logging and using colors. 238 | # 239 | # The first argument is the string / message that must be displayed. 240 | # The second argument is the text color. 241 | 242 | msg () { 243 | 244 | MESSAGE="$1" 245 | COLOR="$2" 246 | 247 | if ! has_value COLOR 248 | then 249 | COLOR="$DEFAULT" 250 | fi 251 | 252 | if has_value "MESSAGE" 253 | then 254 | $COLOR 255 | echo "$MESSAGE" 256 | $DEFAULT 257 | log "$MESSAGE" 258 | else 259 | echo "-- no message received --" 260 | log "$MESSAGE" 261 | fi 262 | } 263 | 264 | # 265 | # This function echos a message 266 | # and displays the status at the end of the line. 267 | # 268 | # It can be used to create status messages other 269 | # than the default messages available such as 270 | # OK or FAIL 271 | # 272 | msg_status () { 273 | 274 | MESSAGE="$1" 275 | STATUS="$2" 276 | 277 | msg "$MESSAGE" 278 | display_status "$STATUS" 279 | } 280 | 281 | # 282 | # These functions are just short hand for messages like 283 | # msg_status "this message is ok" OK 284 | # 285 | 286 | # 287 | # The following functions are shorthand for 288 | # msg_status "a message" OK 289 | # msg_status "another message" FAIL 290 | 291 | 292 | msg_emergency () { 293 | 294 | MESSAGE="$1" 295 | STATUS="EMERGENCY" 296 | msg_status "$MESSAGE" "$STATUS" 297 | } 298 | 299 | msg_alert () { 300 | 301 | MESSAGE="$1" 302 | STATUS="ALERT" 303 | msg_status "$MESSAGE" "$STATUS" 304 | } 305 | 306 | msg_critical () { 307 | 308 | MESSAGE="$1" 309 | STATUS="CRITICAL" 310 | msg_status "$MESSAGE" "$STATUS" 311 | } 312 | 313 | msg_error () { 314 | 315 | MESSAGE="$1" 316 | STATUS="ERROR" 317 | msg_status "$MESSAGE" "$STATUS" 318 | } 319 | 320 | msg_warning () { 321 | 322 | MESSAGE="$1" 323 | STATUS="WARNING" 324 | msg_status "$MESSAGE" "$STATUS" 325 | } 326 | 327 | msg_notice () { 328 | MESSAGE="$1" 329 | STATUS="NOTICE" 330 | msg_status "$MESSAGE" "$STATUS" 331 | } 332 | 333 | msg_info () { 334 | MESSAGE="$1" 335 | STATUS="INFO" 336 | msg_status "$MESSAGE" "$STATUS" 337 | } 338 | 339 | msg_debug () { 340 | MESSAGE="$1" 341 | STATUS="DEBUG" 342 | msg_status "$MESSAGE" "$STATUS" 343 | } 344 | 345 | msg_ok () { 346 | 347 | MESSAGE="$1" 348 | STATUS="OK" 349 | msg_status "$MESSAGE" "$STATUS" 350 | } 351 | 352 | msg_not_ok () { 353 | 354 | MESSAGE="$1" 355 | STATUS="NOT_OK" 356 | msg_status "$MESSAGE" "$STATUS" 357 | } 358 | 359 | msg_fail () { 360 | 361 | MESSAGE="$1" 362 | STATUS="FAILED" 363 | msg_status "$MESSAGE" "$STATUS" 364 | } 365 | 366 | msg_success () { 367 | MESSAGE="$1" 368 | STATUS="SUCCESS" 369 | msg_status "$MESSAGE" "$STATUS" 370 | } 371 | 372 | msg_passed () { 373 | MESSAGE="$1" 374 | STATUS="PASSED" 375 | msg_status "$MESSAGE" "$STATUS" 376 | } 377 | 378 | check_status () { 379 | 380 | CMD="$1" 381 | STATUS="$2" 382 | 383 | if [ "$STATUS" == "0" ] 384 | then 385 | msg_ok "$CMD" 386 | else 387 | msg_fail "$CMD" 388 | fi 389 | } 390 | 391 | # 392 | # Private function 393 | # 394 | # This is a function that just positions 395 | # the cursor one row up and to the right. 396 | # It then prints a message with specified 397 | # Color 398 | # It is used for displaying colored status messages on the 399 | # Right side of the screen. 400 | # 401 | # ARG1 = "status message (OK / FAIL)" 402 | # ARG2 = The color in which the status is displayed. 403 | # 404 | raw_status () { 405 | 406 | STATUS="$1" 407 | COLOR="$2" 408 | 409 | function position_cursor () { 410 | 411 | let RES_COL=`tput cols`-12 412 | tput cuf $RES_COL 413 | tput cuu1 414 | } 415 | 416 | position_cursor 417 | echo -n "[" 418 | $DEFAULT 419 | $BOLD 420 | $COLOR 421 | echo -n "$STATUS" 422 | $DEFAULT 423 | echo "]" 424 | log "Status = $STATUS" 425 | } 426 | 427 | # 428 | # This function converts a status message to a particular color. 429 | # 430 | display_status () { 431 | 432 | 433 | STATUS="$1" 434 | 435 | case $STATUS in 436 | 437 | EMERGENCY ) 438 | STATUS="EMERGENCY" 439 | COLOR="$RED" 440 | ;; 441 | ALERT ) 442 | STATUS=" ALERT " 443 | COLOR="$RED" 444 | ;; 445 | CRITICAL ) 446 | STATUS="CRITICAL " 447 | COLOR="$RED" 448 | ;; 449 | ERROR ) 450 | STATUS=" ERROR " 451 | COLOR="$RED" 452 | ;; 453 | 454 | WARNING ) 455 | STATUS=" WARNING " 456 | COLOR="$YELLOW" 457 | ;; 458 | 459 | NOTICE ) 460 | STATUS=" NOTICE " 461 | COLOR="$BLUE" 462 | ;; 463 | INFO ) 464 | STATUS=" INFO " 465 | COLOR="$LIGHT_BLUE" 466 | ;; 467 | DEBUG ) 468 | STATUS=" DEBUG " 469 | COLOR="$DEFAULT" 470 | ;; 471 | 472 | OK ) 473 | STATUS=" OK " 474 | COLOR="$GREEN" 475 | ;; 476 | NOT_OK) 477 | STATUS=" NOT OK " 478 | COLOR="$RED" 479 | ;; 480 | 481 | PASSED ) 482 | STATUS=" PASSED " 483 | COLOR="$GREEN" 484 | ;; 485 | 486 | SUCCESS ) 487 | STATUS=" SUCCESS " 488 | COLOR="$GREEN" 489 | ;; 490 | 491 | FAILURE | FAILED ) 492 | STATUS=" FAILED " 493 | COLOR="$RED" 494 | ;; 495 | 496 | *) 497 | STATUS="UNDEFINED" 498 | COLOR="$YELLOW" 499 | esac 500 | 501 | raw_status "$STATUS" "$COLOR" 502 | } 503 | 504 | # 505 | # Exit with error status 506 | # 507 | bail () { 508 | 509 | ERROR="$?" 510 | MSG="$1" 511 | if [ ! "$ERROR" = "0" ] 512 | then 513 | msg_fail "$MSG" 514 | exit "$ERROR" 515 | fi 516 | } 517 | 518 | # 519 | # This function executes a command provided as a parameter 520 | # The function then displays if the command succeeded or not. 521 | # 522 | cmd () { 523 | 524 | COMMAND="$1" 525 | msg "Executing: $COMMAND" 526 | 527 | RESULT=`eval $COMMAND 2>&1` 528 | msg "$RESULT" 529 | ERROR="$?" 530 | 531 | MSG="Command: ${COMMAND:0:29}..." 532 | 533 | tput cuu1 534 | 535 | if [ "$ERROR" == "0" ] 536 | then 537 | msg_ok "$MSG" 538 | if [ "$DEBUG" == "1" ] 539 | then 540 | msg "$RESULT" 541 | fi 542 | else 543 | msg_fail "$MSG" 544 | log "$RESULT" 545 | fi 546 | 547 | return "$ERROR" 548 | } 549 | 550 | # 551 | # These functions can be used for timing how long (a) command(s) take to 552 | # execute. 553 | # 554 | now () { 555 | 556 | echo $(date +%s) 557 | } 558 | 559 | elapsed () { 560 | 561 | START="$1" 562 | STOP="$2" 563 | 564 | echo $(( STOP - START )) 565 | } 566 | 567 | # 568 | # Prints an error message ($2) to stderr and exits with the return code ($1). 569 | # The message is also logged. 570 | # 571 | function die { 572 | local -r err_code="$1" 573 | local -r err_msg="$2" 574 | local -r err_caller="${3:-$(caller 0)}" 575 | 576 | msg_fail "ERROR: $err_msg" 577 | msg_fail "ERROR: At line $err_caller" 578 | msg_fail "ERROR: Error code = $err_code" 579 | exit "$err_code" 580 | } >&2 # function writes to stderr 581 | 582 | # 583 | # Check if a return code ($1) indicates an error (i.e. >0) and prints an error 584 | # message ($2) to stderr and exits with the return code ($1). 585 | # The error is also logged. 586 | # 587 | # Die if error code is false. 588 | # 589 | function die_if_false { 590 | local -r err_code=$1 591 | local -r err_msg=$2 592 | local -r err_caller=$(caller 0) 593 | 594 | if [[ "$err_code" != "0" ]] 595 | then 596 | die $err_code "$err_msg" "$err_caller" 597 | fi 598 | } >&2 # function writes to stderr 599 | 600 | # 601 | # Dies when error code is true 602 | # 603 | function die_if_true { 604 | local -r err_code=$1 605 | local -r err_msg=$2 606 | local -r err_caller=$(caller 0) 607 | 608 | if [[ "$err_code" == "0" ]] 609 | then 610 | die $err_code "$err_msg" "$err_caller" 611 | fi 612 | } >&2 # function writes to stderr 613 | 614 | # 615 | # Replace some text inside a string. 616 | # 617 | function str_replace () { 618 | local ORIG="$1" 619 | local DEST="$2" 620 | local DATA="$3" 621 | 622 | echo "$DATA" | sed "s/$ORIG/$DEST/g" 623 | } 624 | 625 | # 626 | # Replace string of text in file. 627 | # Uses the ed editor to replace the string. 628 | # 629 | # arg1 = string to be matched 630 | # arg2 = new string that replaces matched string 631 | # arg3 = file to operate on. 632 | # 633 | function str_replace_in_file () { 634 | local ORIG="$1" 635 | local DEST="$2" 636 | local FILE="$3" 637 | 638 | has_value FILE 639 | die_if_false $? "Empty argument 'file'" 640 | file_exists "$FILE" 641 | die_if_false $? "File does not exist" 642 | 643 | printf ",s/$ORIG/$DEST/g\nw\nQ" | ed -s "$FILE" > /dev/null 2>&1 644 | return "$?" 645 | } 646 | 647 | cmd_su () { 648 | 649 | USER="$1" 650 | COMMAND="$2" 651 | 652 | msg "Executing: $COMMAND" 653 | 654 | RESULT=`su $USER -c "$COMMAND" 2>&1` 655 | ERROR="$?" 656 | 657 | MSG="Command: ${COMMAND:0:29}..." 658 | 659 | tput cuu1 660 | 661 | if [ "$ERROR" == "0" ] 662 | then 663 | msg_ok "$MSG" 664 | if [ "$DEBUG" == "1" ] 665 | then 666 | msg "$RESULT" 667 | fi 668 | else 669 | msg_fail "$MSG" 670 | log "$RESULT" 671 | fi 672 | 673 | return "$ERROR" 674 | } 675 | 676 | init 677 | 678 | -------------------------------------------------------------------------------- /scripts/conf/auth.conf: -------------------------------------------------------------------------------- 1 | location { 2 | disabled = "0" 3 | allowedRadix = "300" 4 | excludedCities = "Set(Campinas)" 5 | reverseDomainsWhitelist = "Set(google.com,gmail.com)" 6 | } 7 | system { 8 | disabled = "0" 9 | excludedCities = "Set()" 10 | reverseDomainsWhitelist = "Set(google.com,gmail.com)" 11 | } 12 | useragent { 13 | disabled = "0" 14 | excludedCities = "Set()" 15 | reverseDomainsWhitelist = "Set(google.com,gmail.com)" 16 | } 17 | -------------------------------------------------------------------------------- /scripts/conf/sflow.conf: -------------------------------------------------------------------------------- 1 | general { 2 | excludeIPs = "Set()" 3 | } 4 | 5 | abusedSMTP { 6 | disabled = "0" 7 | minBytes = "50000000" 8 | excludeIPs = "Set()" 9 | } 10 | 11 | alert { 12 | maxFlowList = "1000" 13 | } 14 | 15 | alien { 16 | disabled = "0" 17 | minPairs = "20" 18 | excludeIPs = "Set()" 19 | } 20 | 21 | atypicalAlienPorts { 22 | disabled = "0" 23 | minPacketsPerFlow = "2" 24 | excludeIPs = "Set()" 25 | } 26 | 27 | atypicalData { 28 | disabled = "0" 29 | minBytes = "5737418240" 30 | excludeIPs = "Set()" 31 | } 32 | 33 | atypicalPairs { 34 | disabled = "0" 35 | minPairs = "300" 36 | excludeIPs = "Set()" 37 | } 38 | 39 | atypicalPorts { 40 | disabled = "0" 41 | excludePorts = "Set(80,443,587,465,993,995)" 42 | minPacketsPerFlow = "2" 43 | excludeIPs = "Set()" 44 | } 45 | 46 | bigProviders { 47 | disabled = "0" 48 | minBytes = "1073741824" 49 | } 50 | 51 | BotNet { 52 | disabled = "0" 53 | minPktsPerFlow = "20" 54 | excludeIPs = "Set()" 55 | } 56 | 57 | DDoS { 58 | disabled = "0" 59 | minFlows = "50" 60 | minPairs = "20" 61 | excludeIPs = "Set()" 62 | } 63 | 64 | dnsTunnel { 65 | disabled = "0" 66 | minBytes = "50000000" 67 | excludeIPs = "Set()" 68 | } 69 | 70 | hPortScan { 71 | disabled = "0" 72 | excludeAlienPorts = "Set(80,443,53)" 73 | excludeAlienPorts = "Set(80,443,587,465,993,995)" 74 | excludeMyPorts = "Set(123)" 75 | minFlows = "100" 76 | excludeIPs = "Set()" 77 | } 78 | 79 | ICMPTunnel { 80 | disabled = "0" 81 | minBytes = "100000000" 82 | minPacket = "200" 83 | excludeIPs = "Set()" 84 | } 85 | 86 | mediaStreaming { 87 | disabled = "0" 88 | maxDuration = "7200" 89 | maxUploadBytes = "10000000" 90 | minDownloadBytes = "1000000" 91 | minDuration = "300" 92 | excludePorts = "Set(1194)" 93 | excludeIPs = "Set()" 94 | } 95 | 96 | p2p { 97 | disabled = "0" 98 | minBytes2nd = "10000000" 99 | minPairs2nd = "10" 100 | minPairs = "5" 101 | minPorts2nd = "10" 102 | minPorts = "4" 103 | excludeIPs = "Set()" 104 | } 105 | 106 | SMTPTalkers { 107 | disabled = "0" 108 | minBytes = "20971520" 109 | excludeIPs = "Set()" 110 | } 111 | 112 | topTalkers { 113 | disabled = "0" 114 | threshold = "21474836480" 115 | excludeIPs = "Set()" 116 | } 117 | 118 | vPortScan { 119 | disabled = "0" 120 | maxPortNumber = "1024" 121 | minPorts = "3" 122 | excludeIPs = "Set()" 123 | } 124 | 125 | UDPAmplifier { 126 | disabled = "0" 127 | excludeIPs = "Set()" 128 | minPktsPerFlow = "2000" 129 | } 130 | -------------------------------------------------------------------------------- /scripts/createReputationList.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | FILE=$1 4 | 5 | 6 | #TTalker|whitelist|Big Talker|10.1.2.226 7 | 8 | cat $FILE | while read line ; do 9 | list=`echo $line | cut -d'|' -f1` 10 | listType=`echo $line | cut -d'|' -f2` 11 | description=`echo $line | cut -d'|' -f3` 12 | ip=`echo $line | cut -d'|' -f4` 13 | 14 | cat << EOF 15 | put 'hogzilla_reputation', '$ip', 'rep:description', '$description' 16 | put 'hogzilla_reputation', '$ip', 'rep:ip', '$ip' 17 | put 'hogzilla_reputation', '$ip', 'rep:list', '$list' 18 | put 'hogzilla_reputation', '$ip', 'rep:list_type', '$listType' 19 | 20 | EOF 21 | done 22 | -------------------------------------------------------------------------------- /scripts/create_conf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | grep -r HogConfig.get ../src | sed 's/.*HogConfig.get.*(config[ ]*,[ ]*"\([a-Z0-9.]*\)",\([ a-Z0-9,"()]*\)).*/\1 = \2/' | sort | sed 's/\"//g' | sed 's/\(.*\)\.\(.*\) = \(.*\)/\1 {\n\t\t\2 = "\3"\n\t}/g' 4 | -------------------------------------------------------------------------------- /scripts/hz-utils/genCnCList.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # C&C list 5 | 6 | URL="https://rules.emergingthreats.net/blockrules/emerging-botcc.rules" 7 | 8 | 9 | TMPRULES=`mktemp -t rules.XXXXXXX` || exit 1 10 | 11 | wget -q -O $TMPRULES https://rules.emergingthreats.net/blockrules/emerging-botcc.rules 12 | 13 | cat $TMPRULES | grep -v "^#" | cut -d "[" -f2 | cut -d "]" -f1 | sed 's/,/\n/g' 14 | 15 | rm -f $TMPRULES 16 | -------------------------------------------------------------------------------- /scripts/hz-utils/getReposList.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Create repositories IPs list 4 | # 5 | # Usage: ./getReposList.sh 6 | # Ex: ./getReposList.sh windows 7 | 8 | 9 | if test -z $1 10 | then 11 | echo "Usage: ./getReposList.sh " 12 | echo "System: windows, linux, freebsd, apple, or android" 13 | exit 1 14 | fi 15 | 16 | #### URLs utilizadas pelos sistemas 17 | 18 | 19 | # Windows 20 | URLs_windows="windowsupdate.microsoft.com update.microsoft.com windowsupdate.com download.windowsupdate.com download.microsoft.com download.windowsupdate.com ntservicepack.microsoft.com time.windows.com javadl-esd.sun.com fpdownload.adobe.com cache.pack.google.com aus2.mozilla.org aus3.mozilla.org aus4.mozilla.org avast.com files.avast.com" 21 | 22 | # Linux 23 | URLs_linux="security.ubuntu.com security.debian.org mirrorlist.centos.org 0.rhel.pool.ntp.org 1.rhel.pool.ntp.org 2.rhel.pool.ntp.org ntp.ubuntu.com linux.dropbox.com" 24 | 25 | #Android 26 | URLs_android="play.google.com android.clients.google.com" 27 | 28 | # Apple 29 | URLs_ios="phobos.apple.com deimos3.apple.com albert.apple.com gs.apple.com itunes.apple.com ax.itunes.apple.com" 30 | 31 | # FreeBSD 32 | URLs_bsd="ftp.freebsd.org" 33 | 34 | function IP6_to_long() 35 | { 36 | INPUT="$(tr 'A-F' 'a-f' <<< "$@")" 37 | O="" 38 | while [ "$O" != "$INPUT" ]; do 39 | O="$INPUT" 40 | # fill all words with zeroes 41 | INPUT="$( sed 's|:\([0-9a-f]\{3\}\):|:0\1:|g' <<< "$INPUT" )" 42 | INPUT="$( sed 's|:\([0-9a-f]\{3\}\)$|:0\1|g' <<< "$INPUT")" 43 | INPUT="$( sed 's|^\([0-9a-f]\{3\}\):|0\1:|g' <<< "$INPUT" )" 44 | INPUT="$( sed 's|:\([0-9a-f]\{2\}\):|:00\1:|g' <<< "$INPUT")" 45 | INPUT="$( sed 's|:\([0-9a-f]\{2\}\)$|:00\1|g' <<< "$INPUT")" 46 | INPUT="$( sed 's|^\([0-9a-f]\{2\}\):|00\1:|g' <<< "$INPUT")" 47 | INPUT="$( sed 's|:\([0-9a-f]\):|:000\1:|g' <<< "$INPUT")" 48 | INPUT="$( sed 's|:\([0-9a-f]\)$|:000\1|g' <<< "$INPUT")" 49 | INPUT="$( sed 's|^\([0-9a-f]\):|000\1:|g' <<< "$INPUT")" 50 | done 51 | # now expand the :: 52 | ZEROES="" 53 | grep -qs "::" <<< "$INPUT" 54 | if [ "$?" -eq 0 ]; then 55 | GRPS="$(sed 's|[0-9a-f]||g' <<< "$INPUT" | wc -m)" 56 | ((GRPS--)) # carriage return 57 | ((MISSING=8-GRPS)) 58 | for ((i=0;i<$MISSING;i++)); do 59 | ZEROES="$ZEROES:0000" 60 | done 61 | # be careful where to place the : 62 | INPUT="$( sed 's|\(.\)::\(.\)|\1'$ZEROES':\2|g' <<< "$INPUT")" 63 | INPUT="$( sed 's|\(.\)::$|\1'$ZEROES':0000|g' <<< "$INPUT")" 64 | INPUT="$( sed 's|^::\(.\)|'$ZEROES':0000:\1|g;s|^:||g' <<< "$INPUT")" 65 | 66 | fi 67 | 68 | # an expanded address has 39 chars + CR 69 | if [ $(echo $INPUT | wc -m) != 40 ]; then 70 | echo "invalid IPv6 Address" 71 | fi 72 | 73 | # echo the fully expanded version of the address 74 | echo $INPUT 75 | } 76 | 77 | function GET_IP_WINDOWS() 78 | { 79 | IPs_win="" 80 | for url in $(echo $URLs_windows) 81 | do 82 | IPs_win=`echo $IPs_win & host $url | sed 's/IPv6 //g'| cut -d " " -f4 | grep "^[0-9]"` 83 | done 84 | echo $IPs_win | sed 's/ /\n/g' | sort -u | grep -v ":" 85 | IPv6=`echo $IPs_win | sed 's/ /\n/g' | sort -u | grep ":"` 86 | for ip6 in $(echo $IPv6) 87 | do 88 | IP6_to_long $ip6 89 | done 90 | } 91 | function GET_IP_LINUX() 92 | { 93 | IPs_lnx="" 94 | for url in $(echo $URLs_linux) 95 | do 96 | IPs_lnx=`echo $IPs_lnx & host $url | sed 's/IPv6 //g'| cut -d " " -f4 | grep "^[0-9]"` 97 | done 98 | echo $IPs_lnx | sed 's/ /\n/g' |sort -u | grep -v ":" 99 | IPv6=`echo $IPs_lnx | sed 's/ /\n/g' | sort -u | grep ":"` 100 | for ip6 in $(echo $IPv6) 101 | do 102 | IP6_to_long $ip6 103 | done 104 | 105 | } 106 | 107 | function GET_IP_ANDROID() 108 | { 109 | IPs_andr="" 110 | for url in $(echo $URLs_android) 111 | do 112 | IPs_andr=`echo $IPs_andr & host $url | sed 's/IPv6 //g'| cut -d " " -f4 | grep "^[0-9]"` 113 | done 114 | echo $IPs_andr | sed 's/ /\n/g' |sort -u | grep -v ":" 115 | IPv6=`echo $IPs_andr | sed 's/ /\n/g' | sort -u | grep ":"` 116 | for ip6 in $(echo $IPv6) 117 | do 118 | IP6_to_long $ip6 119 | done 120 | 121 | } 122 | function GET_IP_IOS() 123 | { 124 | IPs_ios="" 125 | for url in $(echo $URLs_ios) 126 | do 127 | IPs_ios=`echo $IPs_ios & host $url | sed 's/IPv6 //g'| cut -d " " -f4 | grep "^[0-9]"` 128 | done 129 | echo $IPs_ios | sed 's/ /\n/g' |sort -u | grep -v ":" 130 | IPv6=`echo $IPs_ios | sed 's/ /\n/g' | sort -u | grep ":"` 131 | for ip6 in $(echo $IPv6) 132 | do 133 | IP6_to_long $ip6 134 | done 135 | 136 | } 137 | 138 | 139 | function GET_IP_FREEBSD() 140 | { 141 | IPs_bsd="" 142 | for url in $(echo $URLs_bsd) 143 | do 144 | IPs_bsd=`echo $IPs_bsd & host $url | sed 's/IPv6 //g'| cut -d " " -f4 | grep "^[0-9]"` 145 | done 146 | echo $IPs_bsd | sed 's/ /\n/g' |sort -u | grep -v ":" 147 | IPv6=`echo $IPs_bsd | sed 's/ /\n/g' | sort -u | grep ":"` 148 | for ip6 in $(echo $IPv6) 149 | do 150 | IP6_to_long $ip6 151 | done 152 | } 153 | 154 | 155 | case $1 in 156 | windows) GET_IP_WINDOWS;; 157 | linux) GET_IP_LINUX;; 158 | freebsd) GET_IP_FREEBSD;; 159 | android) GET_IP_ANDROID;; 160 | apple) GET_IP_IOS;; 161 | 162 | *) echo "Invalid Option" 163 | ;; 164 | esac 165 | -------------------------------------------------------------------------------- /scripts/hz-utils/start-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | HADOOP_HOME=/home/hogzilla/hadoop 4 | HBASE_HOME=/home/hogzilla/hbase 5 | 6 | whoami | grep root > /dev/null 7 | if [ $? -eq 0 ] ; then 8 | su - hogzilla -c "$HADOOP_HOME/sbin/start-dfs.sh" 9 | su - hogzilla -c "$HADOOP_HOME/sbin/start-yarn.sh" 10 | su - hogzilla -c "$HBASE_HOME/bin/start-hbase.sh" 11 | su - hogzilla -c "$HBASE_HOME/bin/hbase-daemon.sh start thrift" 12 | su - hogzilla -c "/home/hogzilla/bin/start-pigtail.sh" 13 | su - hogzilla -c "/home/hogzilla/hadoop/bin/hdfs dfsadmin -safemode leave" 14 | su - hogzilla -c "/home/hogzilla/bin/start-hogzilla.sh" 15 | su - hogzilla -c "/home/hogzilla/bin/start-sflow2hz.sh" 16 | su - hogzilla -c "/home/hogzilla/bin/start-dbupdates.sh" 17 | else 18 | $HADOOP_HOME/sbin/start-dfs.sh 19 | $HADOOP_HOME/sbin/start-yarn.sh 20 | $HBASE_HOME/bin/start-hbase.sh 21 | $HBASE_HOME/bin/hbase-daemon.sh start thrift 22 | /home/hogzilla/bin/start-pigtail.sh 23 | /home/hogzilla/hadoop/bin/hdfs dfsadmin -safemode leave 24 | /home/hogzilla/bin/start-hogzilla.sh 25 | /home/hogzilla/bin/start-sflow2hz.sh 26 | /home/hogzilla/bin/start-dbupdates.sh 27 | fi 28 | -------------------------------------------------------------------------------- /scripts/hz-utils/start-dbupdates.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIN="/home/hogzilla/bin" 4 | 5 | cd $BIN 6 | 7 | ( 8 | while : ; do 9 | $BIN/genCnCList.sh > /tmp/cclist.temp 10 | php $BIN/updateReputationList.php -t blacklist -n CCBotNet -f /tmp/cclist.temp &>/dev/null 11 | rm -f /tmp/cclist.temp 12 | 13 | for os in windows linux freebsd android apple ; do 14 | $BIN/getReposList.sh $os > /tmp/$os.txt 15 | php $BIN/updateReputationList.php -t $os -n OSRepo -f /tmp/$os.txt &>/dev/null 16 | rm -f /tmp/$os.txt 17 | done 18 | 19 | sleep 86400 # daily 20 | done 21 | )& 22 | -------------------------------------------------------------------------------- /scripts/hz-utils/start-hogzilla.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Confira as variaveis abaixo 4 | HBASE_PATH=/home/hogzilla/hbase 5 | HBASE_VERSION="1.2.6" 6 | 7 | # Needed by the AuthModule 8 | HOGDIR="/home/hogzilla/hogzilla" 9 | EXTRAJAR=",$HOGDIR/jars/uap-scala_2.10-0.2.1-SNAPSHOT.jar,$HOGDIR/jars/snakeyaml-1.18.jar" 10 | FILES="--files $HOGDIR/conf/sflow.conf,$HOGDIR/conf/auth.conf" 11 | 12 | 13 | 14 | (while : ; do 15 | #cd /home/hogzilla 16 | /home/hogzilla/spark/bin/spark-submit \ 17 | --master yarn-cluster \ 18 | --num-executors 2 \ 19 | --driver-memory 1g \ 20 | --executor-memory 3g \ 21 | --executor-cores 4 \ 22 | --jars $HBASE_PATH/lib/hbase-annotations-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-annotations-$HBASE_VERSION-tests.jar,$HBASE_PATH/lib/hbase-client-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-common-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-common-$HBASE_VERSION-tests.jar,$HBASE_PATH/lib/hbase-examples-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-hadoop2-compat-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-hadoop-compat-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-it-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-it-$HBASE_VERSION-tests.jar,$HBASE_PATH/lib/hbase-prefix-tree-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-procedure-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-protocol-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-rest-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-server-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-server-$HBASE_VERSION-tests.jar,$HBASE_PATH/lib/hbase-shell-$HBASE_VERSION.jar,$HBASE_PATH/lib/hbase-thrift-$HBASE_VERSION.jar,$HBASE_PATH/lib/htrace-core-3.1.0-incubating.jar,$HBASE_PATH/lib/guava-12.0.1.jar,$HBASE_PATH/lib/metrics-core-2.2.0.jar$EXTRAJAR --driver-class-path ./$HBASE_PATH/conf/ $FILES --class Hogzilla /home/hogzilla/Hogzilla.jar &> /tmp/hogzilla.log & 23 | 24 | sleep 21600 # 6h 25 | 26 | #rm -rf /tmp/hadoop-hogzilla* 27 | 28 | done) & 29 | 30 | -------------------------------------------------------------------------------- /scripts/hz-utils/start-pigtail.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ( 4 | cd /home/hogzilla/pigtail/ 5 | while : ; do 6 | php /home/hogzilla/pigtail/pigtail.php 7 | sleep 10 8 | done 9 | )& 10 | -------------------------------------------------------------------------------- /scripts/hz-utils/start-sflow2hz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BINPATH="/home/hogzilla/bin" 4 | 5 | ( 6 | while : ; do 7 | sflowtool -p 6343 -l | $BINPATH/sflow2hz -h 127.0.0.1 -p 9090 &> /dev/null 8 | sleep 300 9 | done )>&/dev/null & 10 | -------------------------------------------------------------------------------- /scripts/hz-utils/stop-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | HADOOP_HOME=/home/hogzilla/hadoop 4 | HBASE_HOME=/home/hogzilla/hbase 5 | 6 | /home/hogzilla/bin/stop-pigtail.sh 7 | /home/hogzilla/bin/stop-hogzilla.sh 8 | /home/hogzilla/bin/stop-sflow2hz.sh 9 | /home/hogzilla/bin/stop-dbupdates.sh 10 | 11 | $HBASE_HOME/bin/hbase-daemon.sh stop thrift 12 | $HBASE_HOME/bin/stop-hbase.sh 13 | $HADOOP_HOME/sbin/stop-dfs.sh 14 | $HADOOP_HOME/sbin/stop-yarn.sh 15 | -------------------------------------------------------------------------------- /scripts/hz-utils/stop-dbupdates.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ps auxw | grep start-dbupdates.sh | grep -v grep | awk '{print $2}' | xargs kill -9 4 | -------------------------------------------------------------------------------- /scripts/hz-utils/stop-hogzilla.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ps auxw | grep start-hogzilla | grep -v grep | awk '{print $2}' | xargs kill -9 4 | -------------------------------------------------------------------------------- /scripts/hz-utils/stop-pigtail.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | ps auxw | grep start-pigtail | grep -v grep | awk '{print $2}' | xargs kill -9 5 | -------------------------------------------------------------------------------- /scripts/hz-utils/stop-sflow2hz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | ps auxw | grep start-sflow2hz | grep -v grep | awk '{print $2}' | xargs kill -9 5 | ps auxw | grep sflowtool | grep -v grep | awk '{print $2}' | xargs kill -9 6 | ps auxw | grep sflow2hz | grep -v grep | awk '{print $2}' | xargs kill -9 7 | -------------------------------------------------------------------------------- /scripts/hz-utils/updateReputationList.php: -------------------------------------------------------------------------------- 1 | 5 | * 6 | * This program is free software; you can redistribute it and/or modify 7 | * it under the terms of the GNU General Public License Version 2 as 8 | * published by the Free Software Foundation. You may not use, modify or 9 | * distribute this program under any other version of the GNU General 10 | * Public License. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU General Public License 18 | * along with this program; if not, write to the Free Software 19 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 20 | * 21 | * MORE CREDITS 22 | * - Contribute and put your "Name - Contribution" here. 23 | * 24 | * USING THIS SCRIPT 25 | * 26 | * 1. Run 27 | * /usr/bin/php updateReputationList.php list 28 | * 29 | * ATTENTION: This PHP script must run in CLI! 30 | * 31 | * If you have any problems, let us know! 32 | * See how to get help at http://ids-hogzilla.org/post/community/ 33 | */ 34 | 35 | // Some useful variables 36 | $hbaseHost="localhost"; /* Host or IP of your HBase */ 37 | $hbasePort=9090; 38 | 39 | $GLOBALS['THRIFT_ROOT'] = '/usr/share/php'; 40 | 41 | define("DEBUG",true); 42 | 43 | // Thrift stuff 44 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/ClassLoader/ThriftClassLoader.php'); 45 | 46 | $classLoader = new Thrift\ClassLoader\ThriftClassLoader(); 47 | $classLoader->registerNamespace('Thrift', $GLOBALS['THRIFT_ROOT']); 48 | $classLoader->register(); 49 | 50 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/Transport/TSocket.php'); 51 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/Transport/TBufferedTransport.php'); 52 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/Protocol/TBinaryProtocol.php'); 53 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/Packages/Hbase/Hbase.php'); 54 | require_once($GLOBALS['THRIFT_ROOT'].'/Thrift/Packages/Hbase/Types.php'); 55 | 56 | $socket = new Thrift\Transport\TSocket($hbaseHost, $hbasePort); 57 | $socket->setSendTimeout(10000); 58 | $socket->setRecvTimeout(20000); 59 | $transport = new Thrift\Transport\TBufferedTransport($socket); 60 | $protocol = new Thrift\Protocol\TBinaryProtocol($transport); 61 | $client = new Hbase\HbaseClient($protocol); 62 | 63 | 64 | /* 65 | * BEGIN 66 | */ 67 | 68 | // Parse arguments 69 | if(DEBUG) { echo "Parse options\n" ;} 70 | $options = getopt("t:n:f:"); 71 | $listType=@$options["t"]; 72 | $listName=@$options["n"]; 73 | $listFile=@$options["f"]; 74 | 75 | if(strlen($listType) ==0 || strlen($listName) ==0 || strlen($listFile) ==0 ) 76 | { 77 | echo "Usage: php updateReputationList.php -t ListType -n ListName -f file \n"; 78 | echo "Examples: php updateReputationList.php -t whitelist -n MX -f file_one_ip_per_line.txt \n"; 79 | echo " php updateReputationList.php -t whitelist -n TTalker -f file_one_ip_per_line.txt \n"; 80 | exit; 81 | } 82 | 83 | 84 | // Open file 85 | if(DEBUG) { echo "Open file\n" ;} 86 | $fileHandle = fopen($listFile, "r"); 87 | if(!$fileHandle) { 88 | echo "Error opening file $listFile ."; 89 | exit; 90 | } 91 | 92 | // Open connections 93 | if(DEBUG) { echo "Open connection\n" ;} 94 | $transport->open(); 95 | 96 | // Scan+Filter on HBase 97 | $filter = array(); 98 | $filter[] = "SingleColumnValueFilter('rep', 'list_type', =, 'binary:".$listType."')"; 99 | $filter[] = "SingleColumnValueFilter('rep', 'list', =, 'binary:".$listName."')"; 100 | $filterString = implode(" AND ", $filter); 101 | $scanFilter = new Hbase\TScan(); 102 | $scanFilter->filterString = $filterString; 103 | $scanner = $client->scannerOpenWithScan("hogzilla_reputation", $scanFilter, array()); 104 | 105 | // Delete rows, iterating 106 | if(DEBUG) { echo "Deleting current list from HBase\n" ;} 107 | try 108 | { 109 | while (true) 110 | { 111 | $row=$client->scannerGet($scanner); 112 | if(sizeof($row)==0) break; 113 | if(DEBUG) { 114 | $values = $row[0]->columns; 115 | $ip = $values["rep:ip"]->value; 116 | echo "Deleting $ip from list $listName/$listType\n" ; 117 | } 118 | $client->deleteAllRow("hogzilla_reputation", $row[0]->row, array()); 119 | } 120 | $client->scannerClose($scanner); 121 | 122 | // Iterate file 123 | while (($ip = fgets($fileHandle)) !== false) 124 | { 125 | // Parse 126 | preg_replace( "/\r|\n/", "", $ip ); 127 | $ip=trim($ip); 128 | // Create mutation 129 | $mutations = array(); 130 | $dataIP = array( 131 | 'column' => "rep:ip", 132 | 'value' => $ip 133 | ); 134 | $dataListName = array('column' => "rep:list", 'value' => $listName ); 135 | $dataListType = array('column' => "rep:list_type", 'value' => $listType ); 136 | $dataListDesc = array('column' => "rep:description", 'value' => "" ); 137 | $mutations[] = new Hbase\Mutation($dataIP); 138 | $mutations[] = new Hbase\Mutation($dataListName); 139 | $mutations[] = new Hbase\Mutation($dataListType); 140 | $mutations[] = new Hbase\Mutation($dataListDesc); 141 | // Insert mutations 142 | $client->mutateRow("hogzilla_reputation", $ip."-".$listName."-".$listType, $mutations, array()); 143 | } 144 | } catch(Exception $e) 145 | { 146 | echo 'ERROR: ', $e->getMessage(), "\n"; 147 | } 148 | 149 | // Close file 150 | fclose($fileHandle); 151 | 152 | // Close connections (HBase) 153 | $transport->close(); 154 | 155 | ?> 156 | -------------------------------------------------------------------------------- /scripts/myFuncs: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function package_install_cmd { 4 | PKGCMD=$1 5 | PKGNAME=$2 6 | PKGDESC=$3 7 | 8 | $PKGCMD &>/dev/null 9 | if [ $? -gt 0 ] ; then 10 | msg_info "$PKGDESC not installed. Installing now..." 11 | apt-get --force-yes -y install $PKGNAME 12 | $PKGCMD &>/dev/null 13 | if [ $? -eq 0 ] ; then 14 | msg_ok "$PKGDESC installed." 15 | else 16 | msg_fail "$PKGDESC installation failed!" 17 | die 1 "I could NOT install $PKGDESC. Check your sources.list and/or Internet access or try to do it manually!" 18 | fi 19 | else 20 | msg_ok "$PKGDESC installed." 21 | fi 22 | } 23 | 24 | function package_install { 25 | PKGNAME=$1 26 | PKGDESC=$2 27 | 28 | dpkg -l | awk '{print $2}' | grep ^$PKGNAME$ &>/dev/null 29 | if [ $? -gt 0 ] ; then 30 | msg_info "$PKGDESC not installed. Installing now..." 31 | apt-get --force-yes -y install $PKGNAME 32 | dpkg -l | awk '{print $2}' | grep $PKGNAME &>/dev/null 33 | if [ $? -eq 0 ] ; then 34 | msg_ok "$PKGDESC installed." 35 | else 36 | msg_fail "$PKGDESC installation failed!" 37 | die 1 "I could NOT install $PKGDESC. Check your Internet access or try to do it manually!" 38 | fi 39 | else 40 | msg_ok "$PKGDESC installed." 41 | fi 42 | } 43 | 44 | function cmd_if_0_info 45 | { 46 | CMDCHECK=$1 47 | CMD=$2 48 | ELSEMSG=$3 49 | 50 | eval $CMDCHECK &>/dev/null 51 | if [ $? -eq 0 ] ; then 52 | cmd "$CMD" 53 | else 54 | msg_info "$ELSEMSG" 55 | fi 56 | } 57 | 58 | function cmd_if_n0_info 59 | { 60 | CMDCHECK=$1 61 | CMD=$2 62 | ELSEMSG=$3 63 | 64 | eval $CMDCHECK &>/dev/null 65 | if [ $? -gt 0 ] ; then 66 | cmd "$CMD" 67 | else 68 | msg_info "$ELSEMSG" 69 | fi 70 | } 71 | -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../bin 4 | jar -cf /tmp/Hogzilla.jar * 5 | -------------------------------------------------------------------------------- /src/Hogzilla.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | import org.apache.spark.SparkConf 21 | import org.apache.spark.SparkContext 22 | import org.hogzilla.hbase.HogHBaseRDD 23 | import org.hogzilla.initiate.HogInitiate 24 | import org.hogzilla.prepare.HogPrepare 25 | import org.hogzilla.sflow._ 26 | import org.hogzilla.http.HogHTTP 27 | import org.hogzilla.auth.HogAuth 28 | import org.hogzilla.dns.HogDNS 29 | import org.hogzilla.snort.HogSnort 30 | 31 | /** 32 | * 33 | * Keep it useful, simple, robust, and scalable. 34 | * 35 | * 36 | */ 37 | object Hogzilla { 38 | 39 | def main(args: Array[String]) 40 | { 41 | val sparkConf = new SparkConf() 42 | .setAppName("Hogzilla") 43 | .set("spark.executor.memory", "1g") 44 | .set("spark.default.parallelism", "160") // 160 45 | 46 | val spark = new SparkContext(sparkConf) 47 | 48 | // Get the HBase RDD 49 | val HogRDD = HogHBaseRDD.connect(spark); 50 | 51 | // Initiate HogZilla 52 | HogInitiate.initiate(spark); 53 | 54 | 55 | // Prepare the data 56 | HogPrepare.prepare(HogRDD) 57 | 58 | // General module 59 | HogSnort.run(HogRDD,spark) 60 | 61 | // Run algorithms for DNS protocol 62 | HogDNS.run(HogRDD,spark); 63 | 64 | // Run algorithms for HTTP protocol 65 | HogHTTP.run(HogRDD,spark); 66 | 67 | // Run algorithms for SMTP protocol 68 | //HogSMTP.run(HogRDD); 69 | 70 | 71 | // ============================ Run algorithms for SFlows ============================ 72 | 73 | val HogRDDSFlow = HogHBaseRDD.connectSFlow(spark); 74 | HogSFlow.run(HogRDDSFlow,spark); 75 | 76 | 77 | val HogRDDHistograms = HogHBaseRDD.connectHistograms(spark); 78 | HogSFlowHistograms.run(HogRDDHistograms,spark); 79 | 80 | // Use continuous mode 81 | //val HogRDDAuth = HogHBaseRDD.connectAuth(spark); 82 | //HogAuth.run(HogRDDAuth,spark); 83 | 84 | 85 | 86 | // Stop Spark 87 | spark.stop() 88 | 89 | // Close the HBase Connection 90 | HogHBaseRDD.close(); 91 | 92 | } 93 | 94 | } -------------------------------------------------------------------------------- /src/HogzillaContinuous.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | import org.apache.spark.SparkConf 21 | import org.apache.spark.SparkContext 22 | import org.hogzilla.hbase.HogHBaseRDD 23 | import org.hogzilla.initiate.HogInitiate 24 | import org.hogzilla.prepare.HogPrepare 25 | import org.hogzilla.sflow._ 26 | import org.hogzilla.http.HogHTTP 27 | import org.hogzilla.auth.HogAuth 28 | import org.hogzilla.dns.HogDNS 29 | import org.apache.hadoop.hbase.client.Delete 30 | import scala.concurrent.Await 31 | 32 | /** 33 | * 34 | * Keep it useful, simple, robust, and scalable. 35 | * 36 | * 37 | */ 38 | object HogzillaContinuous { 39 | 40 | def main(args: Array[String]) 41 | { 42 | val sparkConf = new SparkConf() 43 | .setAppName("HogzillaContinuous") 44 | .set("spark.executor.memory", "512m") 45 | .set("spark.default.parallelism", "16") 46 | 47 | val spark = new SparkContext(sparkConf) 48 | 49 | // Get the HBase RDD 50 | val HogRDD = HogHBaseRDD.connect(spark); 51 | 52 | //var i=0 53 | while(true) { 54 | //i=i+1 55 | val HogRDDAuth = HogHBaseRDD.connectAuth(spark) 56 | val summary = HogAuth.runDeleting(HogRDDAuth,spark) 57 | Thread.sleep(10000) // 10s 58 | } 59 | 60 | // Stop Spark 61 | spark.stop() 62 | 63 | // Close the HBase Connection 64 | HogHBaseRDD.close(); 65 | 66 | } 67 | 68 | } -------------------------------------------------------------------------------- /src/HogzillaStream.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | import org.apache.spark.SparkConf 21 | import org.apache.spark.SparkContext 22 | import org.hogzilla.hbase.HogHBaseRDD 23 | import org.hogzilla.initiate.HogInitiate 24 | import org.hogzilla.prepare.HogPrepare 25 | import org.hogzilla.sflow._ 26 | import org.hogzilla.http.HogHTTP 27 | import org.hogzilla.auth.HogAuth 28 | import org.hogzilla.dns.HogDNS 29 | import org.apache.spark.streaming.Seconds 30 | import org.apache.spark.streaming.StreamingContext 31 | import org.apache.spark.storage.StorageLevel 32 | 33 | /** 34 | * 35 | * Keep it useful, simple, robust, and scalable. 36 | * 37 | * NOT RUNNING! DEPENDS ON IMPLEMENTATION ON AUTH2HZ! 38 | * 39 | */ 40 | object HogzillaStream { 41 | 42 | def main(args: Array[String]) 43 | { 44 | val sparkConf = new SparkConf() 45 | .setAppName("HogzillaStream") 46 | .setMaster("local[2]") 47 | .set("spark.executor.memory", "512m") 48 | .set("spark.default.parallelism", "16") // 160 49 | 50 | val ssc = new StreamingContext(sparkConf, Seconds(1)) 51 | val spark = new SparkContext(sparkConf) 52 | 53 | // Get the HBase RDD 54 | val HogRDD = HogHBaseRDD.connect(spark); 55 | 56 | val lines = ssc.socketTextStream("localhost", 9999,StorageLevel.MEMORY_AND_DISK_SER) 57 | 58 | val HogRDDAuth = HogHBaseRDD.connectAuth(spark); 59 | HogAuth.run(HogRDDAuth,spark); 60 | 61 | val words = lines.flatMap(_.split(" ")) 62 | val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _) 63 | wordCounts.print() 64 | 65 | 66 | ssc.start() 67 | ssc.awaitTermination() 68 | 69 | 70 | // Stop Spark 71 | spark.stop() 72 | 73 | // Close the HBase Connection 74 | HogHBaseRDD.close(); 75 | 76 | } 77 | 78 | } -------------------------------------------------------------------------------- /src/org/hogzilla/auth/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | /** 23 | * @author pa 24 | */ 25 | package object auth { 26 | 27 | } 28 | -------------------------------------------------------------------------------- /src/org/hogzilla/cluster/HogClusterMember.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.cluster 21 | 22 | import org.apache.spark.mllib.linalg.Vector 23 | 24 | 25 | /** clusterIdx,centroidMain,clusterSize,members.filter(_._1.equals(clusterIdx)).map({_._2}) 26 | * @author pa 27 | */ 28 | case class HogClusterMember(clusterIdx:Int, centroid:List[(Long,Double)], clusterSize:Long, allKeys:List[Long], 29 | memberIP:String, ports:Set[Long], frequency_vector:List[(Long,Double)], distance:Double) 30 | { 31 | 32 | def formatTitle:String = 33 | { 34 | "Group information for "+memberIP 35 | } 36 | 37 | } -------------------------------------------------------------------------------- /src/org/hogzilla/cluster/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | /** 23 | * @author pa 24 | */ 25 | package object cluster { 26 | 27 | } -------------------------------------------------------------------------------- /src/org/hogzilla/dns/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | /** 23 | * @author pa 24 | */ 25 | package object dns { 26 | 27 | } -------------------------------------------------------------------------------- /src/org/hogzilla/event/HogEvent.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.event 21 | 22 | import java.util.HashMap 23 | import java.util.Map 24 | import org.apache.hadoop.hbase.client.Put 25 | import org.apache.hadoop.hbase.util.Bytes 26 | import org.hogzilla.hbase.HogHBaseRDD 27 | import org.hogzilla.util.HogFlow 28 | import java.net.InetAddress 29 | 30 | 31 | class HogEvent(flow:HogFlow) 32 | { 33 | var sensorid:Int=0 34 | var signature_id:Double=0 35 | var priorityid:Int=0 36 | var text:String="" 37 | var data:Map[String,String]=new HashMap() 38 | var ports:String="" 39 | var title:String="" 40 | var username:String="" 41 | var coords:String="" 42 | 43 | 44 | def formatIPtoBytes(ip:String):Array[Byte] = 45 | { 46 | try { 47 | // Eca! Snorby doesn't support IPv6 yet. See https://github.com/Snorby/snorby/issues/65 48 | if(ip.contains(":")) 49 | InetAddress.getByName("255.255.6.6").getAddress 50 | else 51 | InetAddress.getByName(ip).getAddress 52 | } catch { 53 | case t: Throwable => 54 | // Bogus address! 55 | InetAddress.getByName("255.255.1.1").getAddress 56 | } 57 | 58 | } 59 | 60 | 61 | def alert() 62 | { 63 | val put = new Put(Bytes.toBytes(flow.get("flow:id"))) 64 | put.add(Bytes.toBytes("event"), Bytes.toBytes("note"), Bytes.toBytes(text)) 65 | put.add(Bytes.toBytes("event"), Bytes.toBytes("lower_ip"), formatIPtoBytes(flow.lower_ip)) 66 | put.add(Bytes.toBytes("event"), Bytes.toBytes("upper_ip"), formatIPtoBytes(flow.upper_ip)) 67 | put.add(Bytes.toBytes("event"), Bytes.toBytes("lower_ip_str"), Bytes.toBytes(flow.lower_ip)) 68 | put.add(Bytes.toBytes("event"), Bytes.toBytes("upper_ip_str"), Bytes.toBytes(flow.upper_ip)) 69 | put.add(Bytes.toBytes("event"), Bytes.toBytes("signature_id"), Bytes.toBytes("%.0f".format(signature_id))) 70 | put.add(Bytes.toBytes("event"), Bytes.toBytes("time"), Bytes.toBytes(System.currentTimeMillis)) 71 | put.add(Bytes.toBytes("event"), Bytes.toBytes("ports"), Bytes.toBytes(ports)) 72 | put.add(Bytes.toBytes("event"), Bytes.toBytes("title"), Bytes.toBytes(title)) 73 | 74 | if(!username.equals("")) 75 | put.add(Bytes.toBytes("event"), Bytes.toBytes("username"), Bytes.toBytes(username)) 76 | if(!coords.equals("")) 77 | put.add(Bytes.toBytes("event"), Bytes.toBytes("coords"), Bytes.toBytes(coords)) 78 | 79 | HogHBaseRDD.hogzilla_events.put(put) 80 | 81 | //println(f"ALERT: $text%100s\n\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") 82 | } 83 | } 84 | 85 | -------------------------------------------------------------------------------- /src/org/hogzilla/event/HogSignature.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.event 21 | 22 | import org.hogzilla.hbase.HogHBaseRDD 23 | import org.apache.hadoop.hbase.util.Bytes 24 | import org.apache.hadoop.hbase.client.Get 25 | import org.apache.hadoop.hbase.client.Put 26 | 27 | 28 | 29 | /** 30 | * @author pa 31 | */ 32 | case class HogSignature(signature_class:Int, signature_name:String, signature_priority:Int, signature_revision:Int, signature_id:Double,signature_group_id:Int) { 33 | //Example: 3,"HZ: Suspicious DNS flow identified by K-Means clustering",2,1,826000001,826 34 | 35 | def saveHBase():HogSignature = 36 | { 37 | val get = new Get(Bytes.toBytes("%.0f".format(signature_id))) 38 | 39 | if(!HogHBaseRDD.hogzilla_sensor.exists(get)) 40 | { 41 | val put = new Put(Bytes.toBytes("%.0f".format(signature_id))) 42 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("id"), Bytes.toBytes("%.0f".format(signature_id))) 43 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("class"), Bytes.toBytes(signature_class.toString())) 44 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("name"), Bytes.toBytes(signature_name)) 45 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("priority"), Bytes.toBytes(signature_priority.toString())) 46 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("revision"), Bytes.toBytes(signature_revision.toString())) 47 | put.add(Bytes.toBytes("signature"), Bytes.toBytes("group_id"), Bytes.toBytes(signature_group_id.toString())) 48 | HogHBaseRDD.hogzilla_signatures.put(put) 49 | } 50 | 51 | this 52 | } 53 | } -------------------------------------------------------------------------------- /src/org/hogzilla/event/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | /** 23 | * @author pa 24 | */ 25 | package object event { 26 | 27 | } -------------------------------------------------------------------------------- /src/org/hogzilla/hbase/HogHBaseCluster.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.hbase 21 | 22 | import org.apache.hadoop.hbase.client.Put 23 | import org.apache.hadoop.hbase.util.Bytes 24 | import org.apache.spark.rdd.RDD 25 | import org.apache.spark.mllib.linalg.Vector 26 | import org.apache.hadoop.hbase.client.Get 27 | import org.apache.hadoop.hbase.client.Delete 28 | import org.hogzilla.cluster.HogClusterMember 29 | 30 | 31 | object HogHBaseCluster { 32 | 33 | def formatClusterTitle(clusterCentroid: List[(Long,Double)], clusterIdx:Int):String = 34 | { 35 | val mainTitle = 36 | "Group "+clusterIdx.toString+" - "+ 37 | clusterCentroid 38 | .filter({case (port,rate) => 39 | rate > 4.999 40 | }) 41 | .map({case (port,rate) => 42 | port.toString()+":"+"%.0f".format(rate)+"%" 43 | }).mkString(", ") 44 | 45 | val onePercentList= 46 | clusterCentroid 47 | .filter({case (port,rate) => 48 | .9999 < rate & rate < 5 49 | }) 50 | 51 | if(onePercentList.size>0) 52 | { 53 | mainTitle+", "+ 54 | onePercentList.map({case (port,rate) => 55 | port.toString() 56 | }).mkString("(",", ",")"+"> 1%") 57 | 58 | }else 59 | { 60 | mainTitle 61 | } 62 | } 63 | 64 | def deleteCluster(clusterIdx:Int)= 65 | { 66 | val del = new Delete(Bytes.toBytes(clusterIdx.toString)) 67 | HogHBaseRDD.hogzilla_clusters.delete(del) 68 | } 69 | 70 | 71 | def deleteClusterMember(memberIP:String)= 72 | { 73 | val del = new Delete(Bytes.toBytes(memberIP)) 74 | HogHBaseRDD.hogzilla_cluster_members.delete(del) 75 | } 76 | 77 | def saveCluster(clusterIdx:Int, clusterCentroid:List[(Long,Double)], clusterSize: Long, members:Array[String]) = { 78 | 79 | val memberString = members.mkString(",") 80 | 81 | val put = new Put(Bytes.toBytes(clusterIdx.toString)) 82 | put.add(Bytes.toBytes("info"), Bytes.toBytes("title"), Bytes.toBytes(formatClusterTitle(clusterCentroid,clusterIdx))) 83 | put.add(Bytes.toBytes("info"), Bytes.toBytes("size"), Bytes.toBytes(clusterSize.toString)) 84 | put.add(Bytes.toBytes("info"), Bytes.toBytes("centroid"), Bytes.toBytes(clusterCentroid.mkString("[",",","]"))) 85 | put.add(Bytes.toBytes("info"), Bytes.toBytes("members"), Bytes.toBytes(memberString)) 86 | 87 | HogHBaseRDD.hogzilla_clusters.put(put) 88 | } 89 | 90 | def saveClusterMember(clusterMember:HogClusterMember) = { 91 | 92 | val put = new Put(Bytes.toBytes(clusterMember.memberIP.toString)) 93 | put.add(Bytes.toBytes("info"), Bytes.toBytes("title"), Bytes.toBytes(clusterMember.formatTitle)) 94 | put.add(Bytes.toBytes("cluster"),Bytes.toBytes("size"), Bytes.toBytes(clusterMember.clusterSize.toString)) 95 | put.add(Bytes.toBytes("cluster"),Bytes.toBytes("centroid"), Bytes.toBytes(clusterMember.centroid.mkString("[",",","]"))) 96 | put.add(Bytes.toBytes("cluster"),Bytes.toBytes("idx"), Bytes.toBytes(clusterMember.clusterIdx.toString)) 97 | put.add(Bytes.toBytes("cluster"),Bytes.toBytes("description"),Bytes.toBytes(formatClusterTitle(clusterMember.centroid,clusterMember.clusterIdx))) 98 | put.add(Bytes.toBytes("member"), Bytes.toBytes("ports"), Bytes.toBytes("TCP: "+clusterMember.ports.mkString(""," ",""))) 99 | put.add(Bytes.toBytes("member"), Bytes.toBytes("frequencies"),Bytes.toBytes("TCP: "+ 100 | clusterMember.frequency_vector 101 | .filter({case (port,freq) => clusterMember.ports.contains(port)}) 102 | .map({case (port,freq) => port.toString+"="+ 103 | "%.0f".format(freq)+"%" 104 | }) 105 | .mkString(""," ","") 106 | )) 107 | put.add(Bytes.toBytes("member"), Bytes.toBytes("ip"), Bytes.toBytes(clusterMember.memberIP)) 108 | put.add(Bytes.toBytes("member"), Bytes.toBytes("distance"), Bytes.toBytes("%.2f".format(clusterMember.distance))) 109 | 110 | 111 | HogHBaseRDD.hogzilla_cluster_members.put(put) 112 | } 113 | 114 | 115 | } -------------------------------------------------------------------------------- /src/org/hogzilla/hbase/HogHBaseHistogram.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.hbase 21 | 22 | import scala.collection.mutable.HashMap 23 | import scala.collection.mutable.Map 24 | import scala.collection.mutable.Set 25 | import org.apache.hadoop.hbase.Cell 26 | import org.apache.hadoop.hbase.CellUtil 27 | import org.apache.hadoop.hbase.HBaseConfiguration 28 | import org.apache.hadoop.hbase.client.Delete 29 | import org.apache.hadoop.hbase.client.Get 30 | import org.apache.hadoop.hbase.client.Put 31 | import org.apache.hadoop.hbase.client.Result 32 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat 33 | import org.apache.hadoop.hbase.util.Bytes 34 | import org.apache.spark.SparkContext 35 | import org.hogzilla.histogram.HogHistogram 36 | import org.hogzilla.histogram.Histograms 37 | import org.apache.commons.lang3.StringUtils 38 | 39 | 40 | 41 | object HogHBaseHistogram { 42 | 43 | def mapByResult(result:Result):(HashMap[String,Double],HashMap[String,String]) = 44 | { 45 | 46 | val map=new HashMap[String,Double] 47 | val mapLabels=new HashMap[String,String] 48 | 49 | if(!result.isEmpty()) 50 | { 51 | val cells = result.listCells() 52 | 53 | val it = cells.iterator() 54 | while(it.hasNext()) 55 | { 56 | val cell = it.next() 57 | 58 | val column = new String(CellUtil.cloneFamily(cell)) 59 | val columnQualifier = new String(CellUtil.cloneQualifier(cell)) 60 | val value = new String(CellUtil.cloneValue(cell)) 61 | 62 | //println("Column: "+column+" ::"+value) 63 | 64 | if(column.equals("values")) 65 | map.put(columnQualifier,value.toDouble) 66 | else if (column.equals("labels")) { 67 | mapLabels.put(columnQualifier,value) 68 | } 69 | } 70 | } 71 | 72 | (map,mapLabels) 73 | } 74 | 75 | def getHistogram(histName:String):HogHistogram = 76 | { 77 | 78 | val get1 = new Get(Bytes.toBytes(histName)) 79 | 80 | val result = HogHBaseRDD.hogzilla_histograms.get(get1) //getScanner(new Scan()).iterator() 81 | val tuple=mapByResult(result) 82 | val map=tuple._1 83 | val mapLabels=tuple._2 84 | 85 | if(!map.isEmpty) 86 | { 87 | //val histName = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name"))) 88 | val sizeArray = result.getValue(Bytes.toBytes("info"), Bytes.toBytes("size")) 89 | if(sizeArray.length==0) 90 | { 91 | new HogHistogram(histName,0L,map,mapLabels) 92 | } 93 | else 94 | { 95 | new HogHistogram(histName,Bytes.toString(sizeArray).toLong,map,mapLabels) 96 | } 97 | 98 | }else 99 | { 100 | new HogHistogram(histName,0,map,mapLabels) 101 | } 102 | } 103 | 104 | 105 | //def saveHistogram(histName:String,size:Long,hist:Map[String,Double]) = 106 | def saveHistogram(hogHist:HogHistogram) = 107 | { 108 | val (histName,size,map,mapLabels) = (hogHist.histName, hogHist.histSize, hogHist.histMap, hogHist.histLabels) 109 | 110 | val put = new Put(Bytes.toBytes(histName)) 111 | 112 | put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(histName)) 113 | put.add(Bytes.toBytes("info"), Bytes.toBytes("size"), Bytes.toBytes(hogHist.histSize.toString())) 114 | 115 | map./:(0){ case (ac,(port,weight)) => 116 | put.add(Bytes.toBytes("values"), Bytes.toBytes(port), Bytes.toBytes(weight.toString())) 117 | 0 } 118 | if(mapLabels!=null) 119 | mapLabels./:(0){ case (ac,(key,label)) => 120 | put.add(Bytes.toBytes("labels"), Bytes.toBytes(key), Bytes.toBytes(StringUtils.stripAccents(label).take(50))) 121 | 0 } 122 | 123 | 124 | HogHBaseRDD.hogzilla_histograms.delete(new Delete(put.getRow)) 125 | 126 | try { 127 | HogHBaseRDD.hogzilla_histograms.put(put) 128 | } catch { 129 | case t: Throwable => t.printStackTrace() 130 | println(hogHist.histName) 131 | hogHist.histLabels.foreach(println(_)) 132 | hogHist.histMap.foreach({case (key,map) => println(key+" => "+map.toString)}) 133 | 134 | } 135 | 136 | } 137 | 138 | 139 | // Ex: FTP Servers 140 | def getIPListHIST01(spark: SparkContext,filterPort:String):scala.collection.immutable.Set[String] = 141 | { 142 | val table = "hogzilla_histograms" 143 | val conf = HBaseConfiguration.create() 144 | 145 | conf.set(TableInputFormat.INPUT_TABLE, table) 146 | conf.set("zookeeper.session.timeout", "600000") 147 | conf.setInt("hbase.client.scanner.timeout.period", 600000) 148 | 149 | 150 | val hBaseRDD = spark.newAPIHadoopRDD(conf, classOf[TableInputFormat], 151 | classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], 152 | classOf[org.apache.hadoop.hbase.client.Result]) 153 | 154 | hBaseRDD 155 | .map ({ case (id,result) => 156 | val port = Bytes.toString(result.getValue(Bytes.toBytes("values"),Bytes.toBytes(filterPort))) 157 | val name = Bytes.toString(result.getValue(Bytes.toBytes("info"),Bytes.toBytes("name"))) 158 | val size = Bytes.toString(result.getValue(Bytes.toBytes("info"),Bytes.toBytes("size"))) 159 | if(port==null || port.isEmpty()) 160 | (Histograms.getIPFromHistName(name),size,0D) 161 | else 162 | (Histograms.getIPFromHistName(name),size,port.toDouble) 163 | }) 164 | .filter({case (ip,size,port) => port > Histograms.atypicalThreshold}) 165 | .map({case (ip,size,port) => ip}) 166 | .collect 167 | .toSet 168 | } 169 | 170 | } -------------------------------------------------------------------------------- /src/org/hogzilla/hbase/HogHBaseInventory.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.hbase 21 | 22 | import org.apache.hadoop.hbase.client.Put 23 | import org.apache.hadoop.hbase.util.Bytes 24 | import org.apache.spark.rdd.RDD 25 | import org.apache.spark.mllib.linalg.Vector 26 | import org.apache.hadoop.hbase.client.Get 27 | import org.apache.hadoop.hbase.client.Delete 28 | import org.hogzilla.cluster.HogClusterMember 29 | 30 | 31 | object HogHBaseInventory { 32 | 33 | 34 | def deleteInventory(myIP:Int)= 35 | { 36 | val del = new Delete(Bytes.toBytes(myIP.toString)) 37 | HogHBaseRDD.hogzilla_inventory.delete(del) 38 | } 39 | 40 | def saveInventory(myIP:String, opSystem:String) = { 41 | 42 | 43 | val put = new Put(Bytes.toBytes(myIP+"-"+opSystem)) 44 | put.add(Bytes.toBytes("info"), Bytes.toBytes("title"), Bytes.toBytes("Inventory information for "+myIP)) 45 | put.add(Bytes.toBytes("info"), Bytes.toBytes("ip"), Bytes.toBytes(myIP)) 46 | put.add(Bytes.toBytes("info"), Bytes.toBytes("OS"), Bytes.toBytes(opSystem)) 47 | 48 | HogHBaseRDD.hogzilla_inventory.put(put) 49 | } 50 | 51 | 52 | 53 | } -------------------------------------------------------------------------------- /src/org/hogzilla/hbase/HogHBaseRDD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.hbase 21 | 22 | 23 | /** 24 | * @author pa 25 | */ 26 | 27 | import scala.math.random 28 | import java.lang.Math 29 | import org.apache.spark._ 30 | import org.apache.hadoop.hbase.client.HBaseAdmin 31 | import org.apache.hadoop.hbase.util.Bytes 32 | import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName} 33 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat 34 | import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD} 35 | import org.apache.spark.mllib.linalg.Vectors 36 | import org.apache.spark.rdd.RDD 37 | import org.apache.hadoop.hbase.client.HTable 38 | import org.hogzilla.util.HogFeature 39 | import scala.collection.mutable.HashSet 40 | 41 | 42 | object HogHBaseRDD { 43 | 44 | val conf = HBaseConfiguration.create() 45 | val admin = new HBaseAdmin(conf) 46 | val columns = new HashSet()++ 47 | List( 48 | new HogFeature("flow:first_seen","u_int64_t",false), 49 | new HogFeature("flow:bittorent_hash","char",false), 50 | new HogFeature("flow:info","char",false), 51 | new HogFeature("flow:host_server_name","char",false), 52 | new HogFeature("flow:ssh_ssl_client_info","char",false), 53 | new HogFeature("flow:ssh_ssl_server_info","char",false), 54 | new HogFeature("flow:src_ip","u_int32_t",false), 55 | new HogFeature("flow:dst_ip","u_int32_t",false), 56 | new HogFeature("flow:src_port","u_int16_t",false), 57 | new HogFeature("flow:dst_port","u_int16_t",false), 58 | new HogFeature("flow:protocol","char",true,false), 59 | // new HogFeature("flow:bidirectional","u_int8_t"), 60 | new HogFeature("flow:src_name","char",false), 61 | new HogFeature("flow:dst_name","char",false), 62 | new HogFeature("flow:bytes","u_int64_t"), 63 | new HogFeature("flow:packets","u_int32_t"), 64 | new HogFeature("flow:payload_bytes","u_int64_t"), 65 | new HogFeature("flow:packets_without_payload","u_int32_t"), 66 | new HogFeature("flow:payload_bytes_first","u_int32_t"), 67 | new HogFeature("flow:flow_duration","u_int64_t"), 68 | new HogFeature("flow:flow_use_time","u_int64_t"), 69 | new HogFeature("flow:flow_idle_time","u_int64_t"), 70 | new HogFeature("flow:src2dst_pay_bytes","u_int64_t"), 71 | new HogFeature("flow:dst2src_pay_bytes","u_int64_t"), 72 | new HogFeature("flow:src2dst_header_bytes","u_int64_t"), 73 | new HogFeature("flow:dst2src_header_bytes","u_int64_t"), 74 | new HogFeature("flow:src2dst_packets","u_int32_t"), 75 | new HogFeature("flow:dst2src_packets","u_int32_t"), 76 | new HogFeature("flow:src2dst_inter_time_avg","u_int64_t"), 77 | new HogFeature("flow:src2dst_inter_time_min","u_int64_t"), 78 | new HogFeature("flow:src2dst_inter_time_max","u_int64_t"), 79 | new HogFeature("flow:src2dst_inter_time_std","u_int64_t"), 80 | new HogFeature("flow:dst2src_inter_time_avg","u_int64_t"), 81 | new HogFeature("flow:dst2src_inter_time_min","u_int64_t"), 82 | new HogFeature("flow:dst2src_inter_time_max","u_int64_t"), 83 | new HogFeature("flow:dst2src_inter_time_std","u_int64_t"), 84 | new HogFeature("flow:src2dst_pay_bytes_avg","u_int64_t"), 85 | new HogFeature("flow:src2dst_pay_bytes_min","u_int64_t"), 86 | new HogFeature("flow:src2dst_pay_bytes_max","u_int64_t"), 87 | new HogFeature("flow:src2dst_pay_bytes_std","u_int64_t"), 88 | new HogFeature("flow:dst2src_pay_bytes_avg","u_int64_t"), 89 | new HogFeature("flow:dst2src_pay_bytes_min","u_int64_t"), 90 | new HogFeature("flow:dst2src_pay_bytes_max","u_int64_t"), 91 | new HogFeature("flow:dst2src_pay_bytes_std","u_int64_t"), 92 | new HogFeature("flow:dst2src_pay_bytes_rate","u_int64_t"), 93 | new HogFeature("flow:src2dst_pay_bytes_rate","u_int64_t"), 94 | new HogFeature("flow:dst2src_packets_rate","u_int64_t"), 95 | new HogFeature("flow:src2dst_packets_rate","u_int64_t"), 96 | new HogFeature("flow:inter_time_avg","u_int64_t"), 97 | new HogFeature("flow:inter_time_min","u_int64_t"), 98 | new HogFeature("flow:inter_time_max","u_int64_t"), 99 | new HogFeature("flow:inter_time_std","u_int64_t"), 100 | new HogFeature("flow:payload_bytes_avg","u_int64_t"), 101 | new HogFeature("flow:payload_bytes_std","u_int64_t"), 102 | new HogFeature("flow:payload_bytes_min","u_int64_t"), 103 | new HogFeature("flow:payload_bytes_max","u_int64_t"), 104 | new HogFeature("flow:src2dst_header_bytes_avg","u_int64_t"), 105 | new HogFeature("flow:src2dst_header_bytes_min","u_int64_t"), 106 | new HogFeature("flow:src2dst_header_bytes_max","u_int64_t"), 107 | new HogFeature("flow:src2dst_header_bytes_std","u_int64_t"), 108 | new HogFeature("flow:dst2src_header_bytes_avg","u_int64_t"), 109 | new HogFeature("flow:dst2src_header_bytes_min","u_int64_t"), 110 | new HogFeature("flow:dst2src_header_bytes_max","u_int64_t"), 111 | new HogFeature("flow:dst2src_header_bytes_std","u_int64_t"), 112 | new HogFeature("flow:packets_syn","u_int32_t"), 113 | new HogFeature("flow:packets_ack","u_int32_t"), 114 | new HogFeature("flow:packets_fin","u_int32_t"), 115 | new HogFeature("flow:packets_rst","u_int32_t"), 116 | new HogFeature("flow:packets_psh","u_int32_t"), 117 | new HogFeature("flow:packets_urg","u_int32_t"), 118 | new HogFeature("flow:tcp_retransmissions","u_int32_t"), 119 | // new HogFeature("flow:payload_size_variation","u_int32_t"), 120 | new HogFeature("flow:C_number_of_contacts","u_int32_t"), 121 | new HogFeature("flow:C_src2dst_pay_bytes_avg","u_int64_t"), 122 | new HogFeature("flow:C_src2dst_pay_bytes_min","u_int64_t"), 123 | new HogFeature("flow:C_src2dst_pay_bytes_max","u_int64_t"), 124 | new HogFeature("flow:C_src2dst_pay_bytes_std","u_int64_t"), 125 | new HogFeature("flow:C_src2dst_header_bytes_avg","u_int64_t"), 126 | new HogFeature("flow:C_src2dst_header_bytes_min","u_int64_t"), 127 | new HogFeature("flow:C_src2dst_header_bytes_max","u_int64_t"), 128 | new HogFeature("flow:C_src2dst_header_bytes_std","u_int64_t"), 129 | new HogFeature("flow:C_src2dst_packets_avg","u_int64_t"), 130 | new HogFeature("flow:C_src2dst_packets_min","u_int64_t"), 131 | new HogFeature("flow:C_src2dst_packets_max","u_int64_t"), 132 | new HogFeature("flow:C_src2dst_packets_std","u_int64_t"), 133 | new HogFeature("flow:C_dst2src_pay_bytes_avg","u_int64_t"), 134 | new HogFeature("flow:C_dst2src_pay_bytes_min","u_int64_t"), 135 | new HogFeature("flow:C_dst2src_pay_bytes_max","u_int64_t"), 136 | new HogFeature("flow:C_dst2src_pay_bytes_std","u_int64_t"), 137 | new HogFeature("flow:C_dst2src_header_bytes_avg","u_int64_t"), 138 | new HogFeature("flow:C_dst2src_header_bytes_min","u_int64_t"), 139 | new HogFeature("flow:C_dst2src_header_bytes_max","u_int64_t"), 140 | new HogFeature("flow:C_dst2src_header_bytes_std","u_int64_t"), 141 | new HogFeature("flow:C_dst2src_packets_avg","u_int64_t"), 142 | new HogFeature("flow:C_dst2src_packets_min","u_int64_t"), 143 | new HogFeature("flow:C_dst2src_packets_max","u_int64_t"), 144 | new HogFeature("flow:C_dst2src_packets_std","u_int64_t"), 145 | new HogFeature("flow:C_packets_syn_avg","u_int64_t"), 146 | new HogFeature("flow:C_packets_syn_min","u_int64_t"), 147 | new HogFeature("flow:C_packets_syn_max","u_int64_t"), 148 | new HogFeature("flow:C_packets_syn_std","u_int64_t"), 149 | new HogFeature("flow:C_packets_ack_avg","u_int64_t"), 150 | new HogFeature("flow:C_packets_ack_min","u_int64_t"), 151 | new HogFeature("flow:C_packets_ack_max","u_int64_t"), 152 | new HogFeature("flow:C_packets_ack_std","u_int64_t"), 153 | new HogFeature("flow:C_packets_fin_avg","u_int64_t"), 154 | new HogFeature("flow:C_packets_fin_min","u_int64_t"), 155 | new HogFeature("flow:C_packets_fin_max","u_int64_t"), 156 | new HogFeature("flow:C_packets_fin_std","u_int64_t"), 157 | new HogFeature("flow:C_packets_rst_avg","u_int64_t"), 158 | new HogFeature("flow:C_packets_rst_min","u_int64_t"), 159 | new HogFeature("flow:C_packets_rst_max","u_int64_t"), 160 | new HogFeature("flow:C_packets_rst_std","u_int64_t"), 161 | new HogFeature("flow:C_packets_psh_avg","u_int64_t"), 162 | new HogFeature("flow:C_packets_psh_min","u_int64_t"), 163 | new HogFeature("flow:C_packets_psh_max","u_int64_t"), 164 | new HogFeature("flow:C_packets_psh_std","u_int64_t"), 165 | new HogFeature("flow:C_packets_urg_avg","u_int64_t"), 166 | new HogFeature("flow:C_packets_urg_min","u_int64_t"), 167 | new HogFeature("flow:C_packets_urg_max","u_int64_t"), 168 | new HogFeature("flow:C_packets_urg_std","u_int64_t"), 169 | new HogFeature("flow:C_tcp_retransmissions_avg","u_int64_t"), 170 | new HogFeature("flow:C_tcp_retransmissions_min","u_int64_t"), 171 | new HogFeature("flow:C_tcp_retransmissions_max","u_int64_t"), 172 | new HogFeature("flow:C_tcp_retransmissions_std","u_int64_t"), 173 | new HogFeature("flow:C_dst2src_pay_bytes_rate_avg","u_int64_t"), 174 | new HogFeature("flow:C_dst2src_pay_bytes_rate_min","u_int64_t"), 175 | new HogFeature("flow:C_dst2src_pay_bytes_rate_max","u_int64_t"), 176 | new HogFeature("flow:C_dst2src_pay_bytes_rate_std","u_int64_t"), 177 | new HogFeature("flow:C_src2dst_pay_bytes_rate_avg","u_int64_t"), 178 | new HogFeature("flow:C_src2dst_pay_bytes_rate_min","u_int64_t"), 179 | new HogFeature("flow:C_src2dst_pay_bytes_rate_max","u_int64_t"), 180 | new HogFeature("flow:C_src2dst_pay_bytes_rate_std","u_int64_t"), 181 | new HogFeature("flow:C_dst2src_packets_rate_avg","u_int64_t"), 182 | new HogFeature("flow:C_dst2src_packets_rate_min","u_int64_t"), 183 | new HogFeature("flow:C_dst2src_packets_rate_max","u_int64_t"), 184 | new HogFeature("flow:C_dst2src_packets_rate_std","u_int64_t"), 185 | new HogFeature("flow:C_src2dst_packets_rate_avg","u_int64_t"), 186 | new HogFeature("flow:C_src2dst_packets_rate_min","u_int64_t"), 187 | new HogFeature("flow:C_src2dst_packets_rate_max","u_int64_t"), 188 | new HogFeature("flow:C_src2dst_packets_rate_std","u_int64_t"), 189 | new HogFeature("flow:C_duration_avg","u_int64_t"), 190 | new HogFeature("flow:C_duration_min","u_int64_t"), 191 | new HogFeature("flow:C_duration_max","u_int64_t"), 192 | new HogFeature("flow:C_duration_std","u_int64_t"), 193 | new HogFeature("flow:C_idletime_avg","u_int64_t"), 194 | new HogFeature("flow:C_idletime_min","u_int64_t"), 195 | new HogFeature("flow:C_idletime_max","u_int64_t"), 196 | new HogFeature("flow:C_idletime_std","u_int64_t"), 197 | new HogFeature("flow:response_rel_time","u_int32_t"), 198 | new HogFeature("flow:detection_completed","u_int8_t"), 199 | new HogFeature("flow:ndpi_risk",",char",false,false,1), 200 | new HogFeature("flow:detected_os","char",false), 201 | new HogFeature("flow:dns_num_queries","u_int32_t"), 202 | new HogFeature("flow:dns_num_answers","u_int32_t"), 203 | new HogFeature("flow:dns_reply_code","u_int32_t"), 204 | new HogFeature("flow:dns_query_type","u_int32_t"), 205 | new HogFeature("flow:dns_query_class","u_int32_t"), 206 | new HogFeature("flow:dns_rsp_type","u_int32_t"), 207 | 208 | new HogFeature("flow:http_url","char",false), 209 | new HogFeature("flow:http_content_type","char",true,false), 210 | new HogFeature("flow:http_method","u_int32_t"), 211 | new HogFeature("flow:http_num_request_headers","u_int32_t"), 212 | new HogFeature("flow:http_num_response_headers","u_int32_t"), 213 | new HogFeature("flow:http_request_version","u_int32_t"), 214 | new HogFeature("flow:http_response_status_code","u_int32_t"), 215 | 216 | 217 | new HogFeature("event:sensor_id","u_int32_t",false), 218 | new HogFeature("event:event_id","u_int32_t",false), 219 | new HogFeature("event:event_second","u_int64_t",false), 220 | new HogFeature("event:event_microsecond","u_int64_t",false), 221 | new HogFeature("event:signature_id","u_int64_t",false,false,1), 222 | new HogFeature("event:generator_id","u_int64_t",false), 223 | new HogFeature("event:classification_id","u_int32_t",false), 224 | new HogFeature("event:priority_id","u_int32_t",false) 225 | ) 226 | 227 | 228 | val columnsSFlow = List("flow:IPprotocol","flow:IPsize","flow:agentID","flow:dstIP","flow:dstMAC","flow:dstPort","flow:ethernetType","flow:inVlan","flow:inputPort","flow:ipTos", 229 | "flow:ipTtl","flow:outVlan","flow:outputPort","flow:packetSize","flow:samplingRate","flow:srcIP","flow:srcMAC","flow:srcPort","flow:tcpFlags", 230 | "flow:timestamp") 231 | 232 | // "flow:inter_time-%d","flow:packet_size-%d" 233 | 234 | val hogzilla_flows = new HTable(conf,"hogzilla_flows") 235 | val hogzilla_sflows = new HTable(conf,"hogzilla_sflows") 236 | val hogzilla_events = new HTable(conf,"hogzilla_events") 237 | val hogzilla_sensor = new HTable(conf,"hogzilla_sensor") 238 | val hogzilla_signatures = new HTable(conf,"hogzilla_signatures") 239 | val hogzilla_mynets = new HTable(conf,"hogzilla_mynets") 240 | val hogzilla_reputation = new HTable(conf,"hogzilla_reputation") 241 | val hogzilla_histograms = new HTable(conf,"hogzilla_histograms") 242 | val hogzilla_clusters = new HTable(conf,"hogzilla_clusters") 243 | val hogzilla_cluster_members = new HTable(conf,"hogzilla_cluster_members") 244 | val hogzilla_inventory = new HTable(conf,"hogzilla_inventory") 245 | val hogzilla_authrecords = new HTable(conf,"hogzilla_authrecords") 246 | 247 | 248 | def connect(spark: SparkContext):RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]= 249 | { 250 | val table = "hogzilla_flows" 251 | 252 | conf.set(TableInputFormat.INPUT_TABLE, table) 253 | conf.set("zookeeper.session.timeout", "1800000") 254 | conf.setInt("hbase.client.scanner.timeout.period", 1800000) 255 | // You can limit the SCANNED COLUMNS here 256 | conf.set("hbase.rpc.timeout", "1800000") 257 | //conf.set(TableInputFormat.SCAN_COLUMNS, "flow:packets,flow:detected_protocol"), 258 | 259 | 260 | if (!admin.isTableAvailable(table)) { 261 | println("Table hogzilla_flows does not exist.") 262 | } 263 | 264 | val hBaseRDD = spark.newAPIHadoopRDD(conf, classOf[TableInputFormat], 265 | classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], 266 | classOf[org.apache.hadoop.hbase.client.Result]) 267 | 268 | return hBaseRDD 269 | } 270 | 271 | def connectSFlow(spark: SparkContext):RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]= 272 | { 273 | val table = "hogzilla_sflows" 274 | 275 | conf.set(TableInputFormat.INPUT_TABLE, table) 276 | conf.set("zookeeper.session.timeout", "600000") 277 | conf.setInt("hbase.client.scanner.timeout.period", 600000) 278 | //conf.set("hbase.rpc.timeout", "1800000") 279 | // You can limit the SCANNED COLUMNS here 280 | //conf.set(TableInputFormat.SCAN_COLUMNS, "flow:packets,flow:detected_protocol"), 281 | 282 | 283 | if (!admin.isTableAvailable(table)) { 284 | println("Table hogzilla_sflows does not exist.") 285 | } 286 | 287 | val hBaseRDD = spark.newAPIHadoopRDD(conf, classOf[TableInputFormat], 288 | classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], 289 | classOf[org.apache.hadoop.hbase.client.Result]) 290 | 291 | return hBaseRDD 292 | } 293 | 294 | 295 | 296 | def connectHistograms(spark: SparkContext):RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]= 297 | { 298 | val table = "hogzilla_histograms" 299 | 300 | conf.set(TableInputFormat.INPUT_TABLE, table) 301 | conf.set("zookeeper.session.timeout", "600000") 302 | conf.setInt("hbase.client.scanner.timeout.period", 600000) 303 | //conf.set("hbase.rpc.timeout", "1800000") 304 | // You can limit the SCANNED COLUMNS here 305 | //conf.set(TableInputFormat.SCAN_COLUMNS, "flow:packets,flow:detected_protocol"), 306 | 307 | 308 | if (!admin.isTableAvailable(table)) { 309 | println("Table hogzilla_histograms does not exist.") 310 | } 311 | 312 | val hBaseRDD = spark.newAPIHadoopRDD(conf, classOf[TableInputFormat], 313 | classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], 314 | classOf[org.apache.hadoop.hbase.client.Result]) 315 | 316 | return hBaseRDD 317 | } 318 | 319 | 320 | 321 | def connectAuth(spark: SparkContext):RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]= 322 | { 323 | val table = "hogzilla_authrecords" 324 | 325 | conf.set(TableInputFormat.INPUT_TABLE, table) 326 | conf.set("zookeeper.session.timeout", "600000") 327 | conf.setInt("hbase.client.scanner.timeout.period", 600000) 328 | //conf.set("hbase.rpc.timeout", "1800000") 329 | // You can limit the SCANNED COLUMNS here 330 | //conf.set(TableInputFormat.SCAN_COLUMNS, "flow:packets,flow:detected_protocol"), 331 | 332 | 333 | if (!admin.isTableAvailable(table)) { 334 | println("Table hogzilla_authrecords does not exist.") 335 | } 336 | 337 | val hBaseRDD = spark.newAPIHadoopRDD(conf, classOf[TableInputFormat], 338 | classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], 339 | classOf[org.apache.hadoop.hbase.client.Result]) 340 | 341 | return hBaseRDD 342 | } 343 | 344 | def close() 345 | { 346 | admin.close() 347 | } 348 | 349 | } -------------------------------------------------------------------------------- /src/org/hogzilla/hbase/HogHBaseReputation.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.hbase 21 | 22 | 23 | /** 24 | * @author pa 25 | */ 26 | 27 | import scala.math.random 28 | import java.lang.Math 29 | import org.apache.spark._ 30 | import org.apache.hadoop.hbase.client.HBaseAdmin 31 | import org.apache.hadoop.hbase.util.Bytes 32 | import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor, TableName} 33 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat 34 | import org.apache.spark.mllib.regression.{LabeledPoint,LinearRegressionModel,LinearRegressionWithSGD} 35 | import org.apache.spark.mllib.linalg.Vectors 36 | import org.apache.spark.rdd.RDD 37 | import org.apache.hadoop.hbase.client.HTable 38 | import org.apache.hadoop.hbase.filter.SingleColumnValueFilter 39 | import org.apache.hadoop.hbase.filter.BinaryComparator 40 | import org.apache.hadoop.hbase.filter.FilterList 41 | import org.apache.hadoop.hbase.filter.CompareFilter 42 | import java.util.ArrayList 43 | import org.apache.hadoop.hbase.client.Scan 44 | import org.apache.hadoop.hbase.filter.Filter 45 | import scala.collection.mutable.HashSet 46 | import org.apache.hadoop.hbase.client.Put 47 | 48 | 49 | object HogHBaseReputation { 50 | 51 | // Ex: MX, whitelist 52 | def getReputationList(listName:String, listType:String):Set[String] = 53 | { 54 | val list = new HashSet[String] 55 | 56 | 57 | val filters: ArrayList[Filter] = new ArrayList(); 58 | 59 | val colValFilter1 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), 60 | CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listType))) 61 | colValFilter1.setFilterIfMissing(false); 62 | 63 | val colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("rep"), Bytes.toBytes("list"), 64 | CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(listName))) 65 | colValFilter2.setFilterIfMissing(false); 66 | 67 | filters.add(colValFilter1); 68 | filters.add(colValFilter2); 69 | 70 | val filterList = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters); 71 | val scan = new Scan() 72 | scan.setFilter(filterList) 73 | 74 | val it = HogHBaseRDD.hogzilla_reputation.getScanner(scan).iterator() 75 | 76 | while(it.hasNext()) 77 | { 78 | list.add( Bytes.toString(it.next().getValue(Bytes.toBytes("rep"),Bytes.toBytes("ip"))) ) 79 | } 80 | 81 | list.toSet 82 | 83 | } 84 | 85 | def saveReputationList(listName:String, listType:String, ip:String) = 86 | { 87 | val put = new Put(Bytes.toBytes(ip+"-"+listName+"-"+listType)) 88 | put.add(Bytes.toBytes("rep"), Bytes.toBytes("list_type"), Bytes.toBytes(listType)) 89 | put.add(Bytes.toBytes("rep"), Bytes.toBytes("list"), Bytes.toBytes(listName)) 90 | put.add(Bytes.toBytes("rep"), Bytes.toBytes("ip"), Bytes.toBytes(ip)) 91 | 92 | HogHBaseRDD.hogzilla_reputation.put(put) 93 | } 94 | 95 | } -------------------------------------------------------------------------------- /src/org/hogzilla/hbase/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | /** 23 | * @author pa 24 | */ 25 | package object hbase { 26 | 27 | 28 | } -------------------------------------------------------------------------------- /src/org/hogzilla/histogram/Histograms.scala: -------------------------------------------------------------------------------- 1 | package org.hogzilla.histogram 2 | 3 | import scala.collection.mutable.HashSet 4 | import scala.collection.mutable.Map 5 | import scala.collection.mutable.Set 6 | import scala.math.log 7 | 8 | 9 | /** 10 | * @author pa 11 | */ 12 | 13 | object Histograms { 14 | 15 | 16 | val atypicalThreshold = 0.0000001D 17 | 18 | def KullbackLiebler(histogram1:Map[String,Double],histogram2:Map[String,Double]):Double = 19 | { 20 | 21 | val keys = histogram1.keySet ++ histogram2.keySet 22 | 23 | keys./:(0.0){ case (ac,key) => 24 | val p:Double = { if(histogram1.get(key).isEmpty) 0 else histogram1.get(key).get } 25 | val q:Double = { if(histogram2.get(key).isEmpty) 0 else histogram2.get(key).get } 26 | if(p==0) 27 | ac 28 | else 29 | { 30 | if(q==0) 31 | ac + 0 32 | else 33 | ac + p*log(p/q) 34 | } 35 | } 36 | } 37 | 38 | 39 | def atypical(histogram1:Map[String,Double],histogram2:Map[String,Double]):Set[String] = 40 | { 41 | 42 | val ret = new HashSet[String] 43 | 44 | val keys = histogram2.keySet 45 | 46 | keys./:(0.0){ case (ac,key) => 47 | val p:Double = { if(histogram1.get(key).isEmpty) 0 else histogram1.get(key).get } 48 | val q:Double = { if(histogram2.get(key).isEmpty) 0 else histogram2.get(key).get } 49 | if(patypicalThreshold) 50 | { 51 | ret.add(key) 52 | ac+1 53 | } 54 | else 55 | 0 56 | } 57 | 58 | ret 59 | } 60 | 61 | // Return typical events in histogram1 (main saved), which occurred in histogram2 (current) 62 | def typical(histogram1:Map[String,Double],histogram2:Map[String,Double]):Set[String] = 63 | { 64 | 65 | val ret = new HashSet[String] 66 | 67 | val keys = histogram2.keySet 68 | 69 | keys./:(0.0){ case (ac,key) => 70 | val p:Double = { if(histogram1.get(key).isEmpty) 0 else histogram1.get(key).get } 71 | val q:Double = { if(histogram2.get(key).isEmpty) 0 else histogram2.get(key).get } 72 | if(p>atypicalThreshold && q>atypicalThreshold) 73 | { 74 | ret.add(key) 75 | ac+1 76 | } 77 | else 78 | 0 79 | } 80 | 81 | ret 82 | } 83 | 84 | def isTypicalEvent(histogram1:Map[String,Double],event:String):Boolean= 85 | { 86 | 87 | val p:Double = { if(histogram1.get(event).isEmpty) 0 else histogram1.get(event).get } 88 | if(p>atypicalThreshold) 89 | { 90 | true 91 | } 92 | else 93 | false 94 | 95 | } 96 | 97 | def isAtypicalEvent(histogram1:Map[String,Double],event:String):Boolean= 98 | { 99 | !isTypicalEvent(histogram1,event) 100 | } 101 | 102 | 103 | def merge(histogram1:HogHistogram,histogram2:HogHistogram):HogHistogram = 104 | { 105 | 106 | val keys = histogram1.histMap.keySet ++ histogram2.histMap.keySet 107 | val keysLabel = histogram1.histLabels.keySet ++ histogram2.histLabels.keySet 108 | var div:Double = 1 109 | if(histogram1.histSize.toDouble > 1000) 110 | div = 2 111 | 112 | keys./:(0.0){ case (ac,key) => 113 | val p:Double = { if(histogram1.histMap.get(key).isEmpty) 0 else histogram1.histMap.get(key).get } 114 | val q:Double = { if(histogram2.histMap.get(key).isEmpty) 0 else histogram2.histMap.get(key).get } 115 | 116 | if(p>0 || q>0) 117 | { 118 | val newp = ( 119 | p*histogram1.histSize.toDouble/div+ 120 | q*histogram2.histSize.toDouble 121 | 122 | )/(histogram1.histSize.toDouble/div+histogram2.histSize.toDouble) 123 | 124 | histogram1.histMap.put(key,newp) 125 | } 126 | 0D 127 | } 128 | 129 | keysLabel./:(0.0){ case (ac,key) => 130 | if(histogram1.histLabels.get(key).isEmpty) 131 | histogram1.histLabels.put(key,histogram2.histLabels.get(key).get) 132 | 133 | 0D 134 | } 135 | 136 | val total = histogram1.histSize/div+histogram2.histSize 137 | new HogHistogram(histogram1.histName,total.toInt,histogram1.histMap,histogram1.histLabels) 138 | } 139 | 140 | // It is not exactly a histogram, but... 141 | def mergeMax(histogram1:HogHistogram,histogram2:HogHistogram):HogHistogram = 142 | { 143 | 144 | 145 | val keys = histogram1.histMap.keySet ++ histogram2.histMap.keySet 146 | val keysLabel = histogram1.histLabels.keySet ++ histogram2.histLabels.keySet 147 | 148 | keys./:(0.0){ case (ac,key) => 149 | val p:Double = { if(histogram1.histMap.get(key).isEmpty) 0 else histogram1.histMap.get(key).get } 150 | val q:Double = { if(histogram2.histMap.get(key).isEmpty) 0 else histogram2.histMap.get(key).get } 151 | 152 | if(p>0 || q>0) 153 | { 154 | histogram1.histMap.put(key,p.max(q)) 155 | } 156 | 0D 157 | } 158 | 159 | keysLabel./:(0.0){ case (ac,key) => 160 | if(histogram1.histLabels.get(key).isEmpty) 161 | histogram1.histLabels.put(key,histogram2.histLabels.get(key).get) 162 | 163 | 0D 164 | } 165 | 166 | val total = histogram1.histSize+histogram2.histSize 167 | new HogHistogram(histogram1.histName,total,histogram1.histMap,histogram1.histLabels) 168 | } 169 | 170 | 171 | // hist1 - hist2 172 | def difference(histogram1:HogHistogram,histogram2:HogHistogram):HogHistogram = 173 | { 174 | 175 | val keys = histogram2.histMap.keySet // ++ histogram2.histMap.keySet 176 | 177 | keys./:(0.0){ case (ac,key) => 178 | val p:Double = { if(histogram1.histMap.get(key).isEmpty) 0 else histogram1.histMap.get(key).get } 179 | val q:Double = { if(histogram2.histMap.get(key).isEmpty) 0 else histogram2.histMap.get(key).get } 180 | 181 | if(p>0 || q>0) 182 | { 183 | val newp = ( 184 | 185 | p*histogram1.histSize.toDouble- 186 | q*histogram2.histSize.toDouble 187 | 188 | )/(histogram1.histSize.toDouble-histogram2.histSize.toDouble) 189 | 190 | histogram1.histMap.put(key,newp) 191 | } 192 | 0D 193 | } 194 | 195 | val total = histogram1.histSize-histogram2.histSize 196 | new HogHistogram(histogram1.histName,total,histogram1.histMap,histogram1.histLabels) 197 | } 198 | 199 | 200 | def getIPFromHistName(histogramName:String):String = 201 | { 202 | histogramName.subSequence(histogramName.lastIndexOf("-")+1, histogramName.length()).toString 203 | } 204 | 205 | /* 206 | final val EPS = 1e-10 207 | 208 | type DATASET = Iterator[(Double, Double)] 209 | 210 | def execute( xy: DATASET, f: Double => Double): Double = { 211 | val z = xy.filter{ case(x, y) => abs(y) > EPS} 212 | - z./:(0.0){ case(s, (x, y)) => s + y*log(f(x)/y)} 213 | } 214 | 215 | def execute( xy: DATASET, fs: Iterable[Double=>Double]): Iterable[Double] = 216 | fs.map(execute(xy, _)) 217 | 218 | 219 | */ 220 | 221 | 222 | } -------------------------------------------------------------------------------- /src/org/hogzilla/histogram/HogHistogram.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.histogram 21 | 22 | import scala.collection.mutable.Map 23 | import scala.collection.mutable.HashMap 24 | 25 | class HogHistogram(val histName:String,val histSize:Long, val histMap:Map[String,Double], val histLabels:Map[String,String]=new HashMap[String,String]) 26 | { 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/org/hogzilla/histogram/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | /** 23 | * @author pa 24 | */ 25 | package org.hogzilla.histogram { 26 | 27 | } -------------------------------------------------------------------------------- /src/org/hogzilla/http/HogHTTP.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | /** 20 | * REFERENCES: 21 | * - http://ids-hogzilla.org/xxx/826000101 22 | */ 23 | 24 | 25 | package org.hogzilla.http 26 | 27 | import scala.math.random 28 | import org.apache.hadoop.hbase.util.Bytes 29 | import org.apache.spark._ 30 | import org.apache.spark.mllib.clustering.KMeans 31 | import org.apache.spark.mllib.linalg.Vectors 32 | import org.apache.spark.mllib.linalg.Vector 33 | import org.apache.spark.rdd.RDD 34 | import org.hogzilla.hbase.HogHBaseRDD 35 | import org.hogzilla.event.{HogEvent, HogSignature} 36 | import java.util.HashSet 37 | import org.apache.spark.mllib.regression.LabeledPoint 38 | import org.apache.spark.mllib.classification.SVMWithSGD 39 | import scala.tools.nsc.doc.base.comment.OrderedList 40 | import org.apache.spark.mllib.optimization.L1Updater 41 | import org.hogzilla.util.HogFlow 42 | import scala.collection.mutable.HashMap 43 | import scala.collection.mutable.Map 44 | 45 | /** 46 | * 47 | */ 48 | object HogHTTP { 49 | 50 | val signature = (HogSignature(3,"HZ: Suspicious HTTP flow identified by K-Means clustering",2,1,826000101,826).saveHBase(), 51 | HogSignature(3,"HZ: Suspicious HTTP flow identified by SuperBag",2,1,826000102,826).saveHBase()) 52 | 53 | val numberOfClusters=32 54 | val maxAnomalousClusterProportion=0.05 55 | val minDirtyProportion=0.001 56 | 57 | /** 58 | * 59 | * 60 | * 61 | */ 62 | def run(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)],spark:SparkContext) 63 | { 64 | 65 | // HTTP K-means clustering 66 | kmeans(HogRDD) 67 | 68 | } 69 | 70 | 71 | /** 72 | * 73 | * 74 | * 75 | */ 76 | def kmeansPopulate(event:HogEvent):HogEvent = 77 | { 78 | val centroids:String = event.data.get("centroids") 79 | val vector:String = event.data.get("vector") 80 | val clusterLabel:String = event.data.get("clusterLabel") 81 | val hostname:String = event.data.get("hostname") 82 | 83 | 84 | event.text = "This flow was detected by Hogzilla as an anormal activity. In what follows you can see more information.\n"+ 85 | "Hostname mentioned in HTTP flow: "+hostname+"\n"+ 86 | "Hogzilla module: HogHTTP, Method: k-means clustering with k="+numberOfClusters+"\n"+ 87 | "URL for more information: http://ids-hogzilla.org/signature-db/"+"%.0f".format(signature._1.signature_id)+"\n"+"" 88 | //"Centroids:\n"+centroids+"\n"+ 89 | //"Vector: "+vector+"\n"+ 90 | //"(cluster,label nDPI): "+clusterLabel+"\n" 91 | 92 | event.signature_id = signature._1.signature_id 93 | 94 | event 95 | } 96 | 97 | 98 | /** 99 | * 100 | * 101 | * 102 | */ 103 | def kmeans(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]) 104 | { 105 | 106 | val features = Array("flow:avg_packet_size", 107 | "flow:packets_without_payload", 108 | "flow:avg_inter_time", 109 | "flow:flow_duration", 110 | "flow:max_packet_size", 111 | "flow:bytes", 112 | "flow:packets", 113 | "flow:min_packet_size", 114 | "flow:packet_size-0", 115 | "flow:inter_time-0", 116 | "flow:packet_size-1", 117 | "flow:inter_time-1", 118 | "flow:packet_size-2", 119 | "flow:inter_time-2", 120 | "flow:packet_size-3", 121 | "flow:inter_time-3", 122 | "flow:packet_size-4", 123 | "flow:inter_time-4", 124 | "flow:http_method") 125 | 126 | println("Filtering HogRDD...") 127 | val HttpRDD = HogRDD. 128 | map { case (id,result) => { 129 | val map: Map[String,String] = new HashMap[String,String] 130 | map.put("flow:id",Bytes.toString(id.get).toString()) 131 | HogHBaseRDD.columns.foreach { column => 132 | 133 | val ret = result.getValue(Bytes.toBytes(column.name.split(":")(0).toString()),Bytes.toBytes(column.name.split(":")(1).toString())) 134 | map.put(column.name, Bytes.toString(ret)) 135 | } 136 | if(map.get("flow:packet_size-1")==null) map.put("flow:packet_size-1","0") 137 | if(map.get("flow:inter_time-1")==null) map.put("flow:inter_time-1","0") 138 | if(map.get("flow:packet_size-2")==null) map.put("flow:packet_size-2","0") 139 | if(map.get("flow:inter_time-2")==null) map.put("flow:inter_time-2","0") 140 | if(map.get("flow:packet_size-3")==null) map.put("flow:packet_size-3","0") 141 | if(map.get("flow:inter_time-3")==null) map.put("flow:inter_time-3","0") 142 | if(map.get("flow:packet_size-4")==null) map.put("flow:packet_size-4","0") 143 | if(map.get("flow:inter_time-4")==null) map.put("flow:inter_time-4","0") 144 | if(map.get("flow:http_method")==null) map.put("flow:http_method","0") 145 | 146 | val lower_ip = result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("lower_ip")) 147 | val upper_ip = result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("upper_ip")) 148 | new HogFlow(map,Bytes.toString(lower_ip),Bytes.toString(upper_ip)) 149 | } 150 | }.filter(x => ( x.get("flow:lower_port").equals("80") || 151 | x.get("flow:upper_port").equals("80") || 152 | x.get("flow:lower_port").equals("81") || 153 | x.get("flow:upper_port").equals("81") 154 | ) && x.get("flow:packets").toDouble.>(1) 155 | && x.get("flow:id").split('.')(0).toLong.<(System.currentTimeMillis()-6000000) 156 | ).cache 157 | 158 | println("Counting HogRDD...") 159 | val RDDtotalSize= HttpRDD.count() 160 | println("Filtered HogRDD has "+RDDtotalSize+" rows!") 161 | 162 | if(RDDtotalSize==0) 163 | return 164 | 165 | println("Calculating some variables to normalize data...") 166 | val HttpRDDcount = HttpRDD.map(flow => features.map { feature => flow.get(feature).toDouble }).cache() 167 | val n = RDDtotalSize 168 | val numCols = HttpRDDcount.first.length 169 | val sums = HttpRDDcount.reduce((a,b) => a.zip(b).map(t => t._1 + t._2)) 170 | val sumSquares = HttpRDDcount.fold( 171 | new Array[Double](numCols) 172 | )( 173 | (a,b) => a.zip(b).map(t => t._1 + t._2*t._2) 174 | ) 175 | 176 | val stdevs = sumSquares.zip(sums).map{ 177 | case(sumSq,sum) => math.sqrt(n*sumSq - sum*sum)/n 178 | } 179 | 180 | val means = sums.map(_/n) 181 | 182 | def normalize(vector: Vector):Vector = { 183 | val normArray = (vector.toArray,means,stdevs).zipped.map( 184 | (value,mean,std) => 185 | if(std<=0) (value-mean) else (value-mean)/std) 186 | return Vectors.dense(normArray) 187 | } 188 | 189 | println("Normalizing data...") 190 | val labelAndData = HttpRDD.map { flow => 191 | val vector = Vectors.dense(features.map { feature => flow.get(feature).toDouble }) 192 | ( (flow.get("flow:detected_protocol"), 193 | if (flow.get("event:priority_id")!=null && flow.get("event:priority_id").equals("1")) 1 else 0 , 194 | flow.get("flow:host_server_name"),flow), 195 | normalize(vector) 196 | ) 197 | } 198 | 199 | println("Estimating model...") 200 | val data = labelAndData.values.cache() 201 | val kmeans = new KMeans() 202 | kmeans.setK(numberOfClusters) 203 | val vectorCount = data.count() 204 | println("Number of vectors: "+vectorCount) 205 | val model = kmeans.run(data) 206 | 207 | println("Predicting points (ie, find cluster for each point)...") 208 | val clusterLabel = labelAndData.map({ 209 | case (label,datum) => 210 | val cluster = model.predict(datum) 211 | (cluster,label,datum) 212 | }) 213 | 214 | println("Generating histogram...") 215 | val clusterLabelCount = clusterLabel.map({ 216 | case (cluster,label,datum) => 217 | val map: Map[(Int,String),(Double,Int)] = new HashMap[(Int,String),(Double,Int)] 218 | map.put((cluster,label._1), (label._2.toDouble,1)) 219 | map 220 | }).reduce((a,b) => { 221 | 222 | b./:(0){ 223 | case (c,((key:(Int,String)),(avg2,count2))) => 224 | 225 | val avg = (a.get(key).get._1*a.get(key).get._2 + b.get(key).get._1*b.get(key).get._2)/ 226 | (a.get(key).get._2+b.get(key).get._2) 227 | 228 | a.put(key, (avg,a.get(key).get._2+b.get(key).get._2)) 229 | 230 | 0 231 | } 232 | /* 233 | b.keySet().toArray() 234 | .map { 235 | case key: (Int,String) => 236 | if (a.containsKey(key)) 237 | { 238 | val avg = (a.get(key)._1*a.get(key)._2 + b.get(key)._1*b.get(key)._2)/ 239 | (a.get(key)._2+b.get(key)._2) 240 | 241 | a.put(key, (avg,a.get(key)._2+b.get(key)._2)) 242 | }else 243 | a.put(key,b.get(key)) 244 | }*/ 245 | a 246 | }) 247 | 248 | println("######################################################################################") 249 | println("######################################################################################") 250 | println("######################################################################################") 251 | println("######################################################################################") 252 | println("HTTP K-Means Clustering") 253 | println("Centroids") 254 | val centroids = ""+model.clusterCenters.mkString(",\n") 255 | //model.clusterCenters.foreach { center => centroids.concat("\n"+center.toString) } 256 | 257 | clusterLabelCount./:(0) 258 | { case (z,(key:(Int,String),(avg,count))) => 259 | val cluster = key._1 260 | val label = key._2 261 | //val count =clusterLabelCount.get(key).get._2.toString 262 | //val avg = clusterLabelCount.get(key).get._1.toString 263 | println(f"Cluster: $cluster%1s\t\tLabel: $label%20s\t\tCount: $count%10s\t\tAvg: $avg%10s") 264 | 0 265 | } 266 | 267 | val thr=maxAnomalousClusterProportion*RDDtotalSize 268 | 269 | println("Selecting cluster to be tainted...") 270 | val taintedArray = clusterLabelCount.filter({ case (key:(Int,String),(avg,count)) => 271 | (count.toDouble < thr 272 | && avg.toDouble >= minDirtyProportion ) 273 | }).map(_._1) 274 | //. 275 | // sortBy ({ case (cluster:Int,label:String) => clusterLabelCount.get((cluster,label))._1.toDouble }).reverse 276 | 277 | taintedArray.par.map 278 | { 279 | tainted => 280 | 281 | //val tainted = taintedArray.apply(0) 282 | 283 | println("######################################################################################") 284 | println("Tainted flows of: "+tainted.toString()) 285 | 286 | println("Generating events into HBase...") 287 | clusterLabel.filter({ case (cluster,(group,tagged,hostname,flow),datum) => (cluster,group).equals(tainted) && tagged.equals(0) }). 288 | foreach{ case (cluster,(group,tagged,hostname,flow),datum) => 289 | val event = new HogEvent(flow) 290 | event.data.put("centroids", centroids) 291 | event.data.put("vector", datum.toString) 292 | event.data.put("clusterLabel", "("+cluster.toString()+","+group+")") 293 | event.data.put("hostname", flow.get("flow:host_server_name")+"/"+flow.get("flow:http_url")) 294 | kmeansPopulate(event).alert() 295 | } 296 | 297 | /* 298 | (1 to 9).map{ k => 299 | println("######################################################################################") 300 | println(f"Hosts from cluster $k%1s") 301 | clusterLabel.filter(_._1.equals(k)).foreach{ case (cluster,label,datum) => 302 | print(label._3+"|") 303 | } 304 | println("") 305 | } 306 | */ 307 | println("######################################################################################") 308 | println("######################################################################################") 309 | println("######################################################################################") 310 | println("######################################################################################") 311 | 312 | } 313 | 314 | if(taintedArray.isEmpty) 315 | { 316 | println("No flow matched!") 317 | } 318 | 319 | } 320 | 321 | 322 | 323 | } -------------------------------------------------------------------------------- /src/org/hogzilla/http/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | /** 23 | * @author pa 24 | */ 25 | package org.hogzilla.http { 26 | 27 | } -------------------------------------------------------------------------------- /src/org/hogzilla/initiate/HogInitiate.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.initiate 21 | 22 | import org.apache.spark._ 23 | import org.hogzilla.hbase.HogHBaseRDD 24 | import org.apache.hadoop.hbase.util.Bytes 25 | import org.apache.hadoop.hbase.client.Get 26 | import org.apache.hadoop.hbase.client.Put 27 | 28 | 29 | object HogInitiate { 30 | 31 | val sensor_description="Hogzilla IDS" 32 | val sensor_hostname="hoghostname" 33 | 34 | 35 | def initiate(spark: SparkContext) 36 | { 37 | 38 | val get = new Get(Bytes.toBytes("1")) 39 | 40 | if(!HogHBaseRDD.hogzilla_sensor.exists(get)) 41 | { 42 | val put = new Put(Bytes.toBytes("1")) 43 | put.add(Bytes.toBytes("sensor"), Bytes.toBytes("description"), Bytes.toBytes(sensor_description)) 44 | put.add(Bytes.toBytes("sensor"), Bytes.toBytes("hostname"), Bytes.toBytes(sensor_hostname)) 45 | HogHBaseRDD.hogzilla_sensor.put(put) 46 | } 47 | 48 | } 49 | 50 | } -------------------------------------------------------------------------------- /src/org/hogzilla/initiate/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | /** 23 | * @author pa 24 | */ 25 | package org.hogzilla.initiate { 26 | 27 | } -------------------------------------------------------------------------------- /src/org/hogzilla/prepare/HogPrepare.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.prepare 21 | 22 | import java.util.HashMap 23 | import java.util.Map 24 | import org.apache.hadoop.hbase.util.Bytes 25 | import org.apache.spark.rdd.RDD 26 | import org.hogzilla.hbase.HogHBaseRDD 27 | import org.apache.hadoop.hbase.client.RowMutations 28 | import org.apache.hadoop.hbase.client.Put 29 | import org.apache.hadoop.hbase.client.Delete 30 | import org.apache.hadoop.hbase.client.Scan 31 | import org.apache.hadoop.hbase.filter.Filter 32 | import org.apache.hadoop.hbase.filter.SingleColumnValueFilter 33 | import org.apache.hadoop.hbase.filter.BinaryComparator 34 | import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp 35 | import org.apache.hadoop.hbase.filter.CompareFilter 36 | 37 | 38 | object HogPrepare { 39 | 40 | def prepare(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]) 41 | { 42 | println("Cleaning HBase...") 43 | cleanFlows(HogRDD) 44 | cleanSFlows(HogRDD) 45 | cleanAuthRecords(HogRDD) 46 | } 47 | 48 | def cleanFlows(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]) 49 | { 50 | 51 | 52 | /** 53 | * This is an illustration of the purge process in a fancy time-line. 54 | * 55 | * 56 | * Sup1-denseTime tSup1 tSup2 now 57 | * old flows | dense period | training dirty period | don't touch | future 58 | * -------------------------------------------------------------------------------------------------------------> 59 | * remove all remove all Remove flows w/o events 60 | * in par priority_id=1 in par 61 | * 62 | * You can change this, but the time below are reasonable 63 | * 64 | * tSup2 = now - timeUnit 65 | * tSup1 = now - 100*timeUnit 66 | * denseTime = 2*timeUnit 67 | * 68 | * 24h = 86400000 69 | * 12h = 43200000 70 | * 06h = 21600000 71 | */ 72 | 73 | // Delete old data from HBase 86400 is one day. You should need even more, depends on your available resources. 74 | 75 | println("Cleaning hogzilla_flows...") 76 | val now = System.currentTimeMillis 77 | 78 | val timeUnit:Long = 21600000 /* maybe one day (86400000) or half (43200000) */ 79 | val timeSuperior1 = now - (timeUnit*100) 80 | val timeSuperior2 = now - timeUnit 81 | val nSplits = 4 /* number of parallel tasks */ 82 | val denseTime = timeUnit*4 83 | val deltaT1 = denseTime/nSplits 84 | val deltaT2 = (timeSuperior2-timeSuperior1)/nSplits 85 | 86 | println("Removing all older than "+timeSuperior1) 87 | val totalOld = (0 to nSplits).toList.par.map({ k => 88 | 89 | val scan = new Scan 90 | 91 | if(k.equals(0)) 92 | scan.setTimeRange(0, timeSuperior1-denseTime) 93 | else 94 | scan.setTimeRange(timeSuperior1-denseTime + deltaT1*(k-1), timeSuperior1-denseTime + deltaT1*k) 95 | 96 | 97 | println("TimeRange: "+scan.getTimeRange.toString()) 98 | 99 | val scanner = HogHBaseRDD.hogzilla_flows.getScanner(scan).iterator() 100 | 101 | var counter=0; 102 | while(scanner.hasNext()) 103 | { 104 | HogHBaseRDD.hogzilla_flows.delete(new Delete(scanner.next().getRow)) 105 | counter+=1 106 | } 107 | 108 | counter 109 | }).reduce( (a,b) => a+b) 110 | 111 | println("Old rows dropped: "+totalOld) 112 | 113 | println("Removing flows w/o events priority 1, which are between "+timeSuperior1+" and "+timeSuperior2) 114 | val totalWOEvent = (1 to nSplits).toList.par.map({ k => 115 | 116 | val scan = new Scan 117 | val filter = new SingleColumnValueFilter(Bytes.toBytes("event"), 118 | Bytes.toBytes("priority_id"), 119 | CompareOp.valueOf("NOT_EQUAL"), 120 | new BinaryComparator(Bytes.toBytes("1"))) 121 | 122 | filter.setFilterIfMissing(false) 123 | 124 | scan.setTimeRange(timeSuperior1 + deltaT2*(k-1), timeSuperior1 + deltaT2*k) 125 | 126 | scan.setFilter(filter) 127 | 128 | println("TimeRange: "+scan.getTimeRange.toString()) 129 | 130 | val scanner = HogHBaseRDD.hogzilla_flows.getScanner(scan).iterator() 131 | 132 | var counter=0; 133 | while(scanner.hasNext()) 134 | { 135 | HogHBaseRDD.hogzilla_flows.delete(new Delete(scanner.next().getRow)) 136 | counter+=1 137 | } 138 | counter 139 | }).reduce((a,b) => a+b) 140 | 141 | println("Flows without event priority 1 dropped: "+totalWOEvent) 142 | 143 | /* 144 | 145 | 146 | //scan.setStartRow(Bytes.toBytes("0")) 147 | //scan.setStopRow(Bytes.toBytes(time)) 148 | * 149 | //THIS CODE HAS BUGS 150 | 151 | // TODO HZ: Update flow:inter_time_stddev and flow:packet_size_stddev using "flow:inter_time-%d","flow:packet_size-%d" 152 | 153 | 154 | val prepareRDD = HogRDD. 155 | map { case (id,result) => { 156 | val map: Map[String,String] = new HashMap[String,String] 157 | map.put("flow:id",Bytes.toString(id.get).toString()) 158 | HogHBaseRDD.columns.foreach { column => map.put(column, 159 | Bytes.toString(result.getValue(Bytes.toBytes(column.split(":")(0).toString()),Bytes.toBytes(column.split(":")(1).toString())))) 160 | } 161 | map 162 | } 163 | } 164 | 165 | prepareRDD.filter(_.get("flow:packet_size_stddev").isEmpty()).map({ 166 | map => 167 | val avg=map.get("flow:avg_packet_size").toDouble 168 | var total:Double =0 169 | for(i <- 0 to map.get("flow:packets").toInt-1) 170 | { 171 | total=total+ (map.get("flow:packet_size-"+i.toString).toDouble-avg) * (map.get("flow:packet_size-"+i.toString).toDouble-avg) 172 | } 173 | // TODO HZ: Salve in HBase here 174 | // ID: map.get("flow:id") 175 | map.put("flow:packet_size_stddev",total.toString()) 176 | 177 | val mutation = new RowMutations() 178 | val put = new Put(Bytes.toBytes(map.get("flow:id"))) 179 | put.add(Bytes.toBytes("flow"), Bytes.toBytes("packet_size_stddev"), Bytes.toBytes(Math.sqrt(total))) 180 | mutation.add(put) 181 | HogHBaseRDD.hogzilla_flows.mutateRow(mutation) 182 | }) 183 | */ 184 | 185 | } 186 | 187 | 188 | 189 | def cleanSFlows(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]) 190 | { 191 | 192 | 193 | /** 194 | * This is an illustration of the purge process in a fancy time-line. 195 | * 196 | * 197 | * Sup1-denseTime tSup1 now 198 | * old flows | dense period | don't touch | future 199 | * -------------------------------------------------------------------------------------------------> 200 | * remove all remove all 201 | * in par 202 | * 203 | * You can change this, but the time below are reasonable 204 | * 205 | * tSup2 = now - timeUnit 206 | * tSup1 = now - 100*timeUnit 207 | * denseTime = 2*timeUnit 208 | * 209 | * 24h = 86400000 210 | * 12h = 43200000 211 | * 06h = 21600000 212 | */ 213 | 214 | // Delete old data from HBase 86400 is one day. You should need even more, depends on your available resources. 215 | 216 | println("Cleaning hogzilla_sflows...") 217 | val now = System.currentTimeMillis 218 | 219 | val timeUnit:Long = 21600000 /* maybe one day (86400000) or half (43200000) or quarter (21600000) */ 220 | val timeSuperior1 = now - timeUnit 221 | val nSplits = 5 /* number of parallel tasks */ 222 | val denseTime = timeUnit*1 223 | val deltaT1 = denseTime/nSplits 224 | //val deltaT2 = (timeSuperior2-timeSuperior1)/nSplits 225 | 226 | println("Removing all older than "+timeSuperior1) 227 | val totalOld = (0 to nSplits).toList.par.map({ k => 228 | 229 | val scan = new Scan 230 | 231 | if(k.equals(0)) 232 | scan.setTimeRange(0, timeSuperior1-denseTime) 233 | else 234 | scan.setTimeRange(timeSuperior1-denseTime + deltaT1*(k-1), timeSuperior1-denseTime + deltaT1*k) 235 | 236 | 237 | println("TimeRange: "+scan.getTimeRange.toString()) 238 | 239 | val scanner = HogHBaseRDD.hogzilla_sflows.getScanner(scan).iterator() 240 | 241 | var counter=0; 242 | while(scanner.hasNext()) 243 | { 244 | HogHBaseRDD.hogzilla_sflows.delete(new Delete(scanner.next().getRow)) 245 | counter+=1 246 | } 247 | 248 | counter 249 | }).reduce( (a,b) => a+b) 250 | 251 | println("Old rows dropped: "+totalOld) 252 | 253 | 254 | } 255 | 256 | 257 | 258 | 259 | 260 | def cleanAuthRecords(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)]) 261 | { 262 | 263 | 264 | 265 | // Delete old data from HBase 86400 is one day. You should need even more, depends on your available resources. 266 | 267 | println("Cleaning hogzilla_authrecords...") 268 | val now = System.currentTimeMillis 269 | 270 | val timeUnit:Long = 21600000 /* maybe one day (86400000) or half (43200000) or quarter (21600000) */ 271 | val timeSuperior1 = now - timeUnit 272 | val nSplits = 5 /* number of parallel tasks */ 273 | val denseTime = timeUnit*1 274 | val deltaT1 = denseTime/nSplits 275 | //val deltaT2 = (timeSuperior2-timeSuperior1)/nSplits 276 | 277 | println("Removing all older than "+timeSuperior1) 278 | val totalOld = (0 to nSplits).toList.par.map({ k => 279 | 280 | val scan = new Scan 281 | 282 | if(k.equals(0)) 283 | scan.setTimeRange(0, timeSuperior1-denseTime) 284 | else 285 | scan.setTimeRange(timeSuperior1-denseTime + deltaT1*(k-1), timeSuperior1-denseTime + deltaT1*k) 286 | 287 | 288 | println("TimeRange: "+scan.getTimeRange.toString()) 289 | 290 | val scanner = HogHBaseRDD.hogzilla_authrecords.getScanner(scan).iterator() 291 | 292 | var counter=0; 293 | while(scanner.hasNext()) 294 | { 295 | HogHBaseRDD.hogzilla_authrecords.delete(new Delete(scanner.next().getRow)) 296 | counter+=1 297 | } 298 | 299 | counter 300 | }).reduce( (a,b) => a+b) 301 | 302 | println("Old rows dropped: "+totalOld) 303 | 304 | 305 | } 306 | 307 | } -------------------------------------------------------------------------------- /src/org/hogzilla/prepare/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | /** 23 | * @author pa 24 | */ 25 | package object prepare { 26 | 27 | } -------------------------------------------------------------------------------- /src/org/hogzilla/sflow/HogSFlowHistograms.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | /** 20 | * REFERENCES: 21 | * - http://ids-hogzilla.org/xxx/826000101 22 | */ 23 | 24 | 25 | package org.hogzilla.sflow 26 | 27 | import java.net.InetAddress 28 | import scala.collection.mutable.HashMap 29 | import scala.collection.mutable.HashSet 30 | import scala.collection.mutable.Map 31 | import scala.math.floor 32 | import scala.math.log 33 | import org.apache.hadoop.hbase.client.Scan 34 | import org.apache.hadoop.hbase.util.Bytes 35 | import org.apache.spark.SparkContext 36 | import org.apache.spark.rdd.PairRDDFunctions 37 | import org.apache.spark.rdd.RDD 38 | import org.apache.spark.rdd.RDD.rddToPairRDDFunctions 39 | import org.hogzilla.event.HogEvent 40 | import org.hogzilla.event.HogSignature 41 | import org.hogzilla.hbase.HogHBaseHistogram 42 | import org.hogzilla.hbase.HogHBaseRDD 43 | import org.hogzilla.hbase.HogHBaseReputation 44 | import org.hogzilla.histogram.Histograms 45 | import org.hogzilla.histogram.HogHistogram 46 | import org.hogzilla.util.HogFlow 47 | import org.apache.commons.math3.analysis.function.Min 48 | import org.apache.spark.mllib.linalg.Vectors 49 | import org.apache.spark.mllib.linalg.Vector 50 | import org.apache.spark.mllib.clustering.KMeans 51 | import org.hogzilla.hbase.HogHBaseCluster 52 | import org.hogzilla.cluster.HogClusterMember 53 | 54 | 55 | /** 56 | * 57 | */ 58 | object HogSFlowHistograms { 59 | 60 | 61 | val signature = HogSignature(3,"HZ: Top talker identified" , 2,1,826001101,826).saveHBase() //1 62 | 63 | 64 | 65 | /** 66 | * 67 | * 68 | * 69 | */ 70 | def run(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)],spark:SparkContext) 71 | { 72 | 73 | // XXX: Organize it! 74 | realRun(HogRDD,spark) 75 | 76 | } 77 | 78 | 79 | def isMyIP(ip:String,myNets:Set[String]):Boolean = 80 | { 81 | myNets.map ({ net => if( ip.startsWith(net) ) 82 | { true } 83 | else{false} 84 | }).contains(true) 85 | } 86 | 87 | 88 | /** 89 | * 90 | * 91 | * 92 | */ 93 | def realRun(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)],spark:SparkContext) 94 | { 95 | 96 | val myNetsTemp = new HashSet[String] 97 | 98 | val it = HogHBaseRDD.hogzilla_mynets.getScanner(new Scan()).iterator() 99 | while(it.hasNext()) 100 | { 101 | myNetsTemp.add(Bytes.toString(it.next().getValue(Bytes.toBytes("net"),Bytes.toBytes("prefix")))) 102 | } 103 | 104 | val myNets:scala.collection.immutable.Set[String] = myNetsTemp.toSet 105 | 106 | 107 | val summary1: RDD[(String,Long,Set[Long],HashMap[String,Double])] 108 | = HogRDD 109 | .map ({ case (id,result) => 110 | 111 | val histogramSize = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("size"))).toLong 112 | val histogramName = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name"))) 113 | val histMap = HogHBaseHistogram.mapByResult(result)._1 114 | 115 | val keys:Set[Long] = histMap.filter({ case (key,value) => 116 | 117 | 118 | try { 119 | histogramName.startsWith("HIST01") & key.toDouble < 10000 & value>0.001 120 | } catch { 121 | case t: Throwable => // t.printStackTrace() // TODO: handle error 122 | histogramName.startsWith("HIST01") & value>0.001 123 | } 124 | 125 | }) 126 | .keySet 127 | .map({ case key => 128 | try { 129 | key.toDouble.toLong 130 | } catch { 131 | case t: Throwable => t.printStackTrace() // TODO: handle error 132 | 0L 133 | } 134 | 135 | }) 136 | .toSet 137 | //"HIST01-"+myIP 138 | 139 | (histogramName,histogramSize,keys,histMap) 140 | }) 141 | .filter({case (histogramName,histogramSize,keys,histMap) => 142 | histogramSize>20 & 143 | isMyIP(histogramName.subSequence(histogramName.lastIndexOf("-")+1, histogramName.length()).toString,myNets) 144 | }) 145 | .cache 146 | 147 | val summary1Count = summary1.count() 148 | if(summary1Count.equals(0)) 149 | return 150 | 151 | 152 | val allKeys = summary1 153 | .map(_._3) 154 | .reduce(_++_) 155 | .toList 156 | .sorted 157 | 158 | val vectorSize = allKeys.size 159 | 160 | val summary: RDD[(String,Long,Set[Long],Vector)] 161 | = summary1 162 | .map({ case (histogramName,histogramSize,keys,histMap) => 163 | val vector = 164 | Vectors.dense({ allKeys.map({ key => 165 | 166 | if(keys.contains(key)) 167 | histMap.get(key.toString).get*100D 168 | else 169 | 0D 170 | }).toArray 171 | }) 172 | 173 | (histogramName,histogramSize,keys,vector) 174 | }).cache 175 | 176 | println("Keys: "+allKeys.mkString(",")) 177 | 178 | //(5 to 30 by 5).toList.par 179 | 180 | val k=10 181 | 182 | println("Estimating model, k="+k) 183 | val kmeans = new KMeans() 184 | kmeans.setK(k) 185 | val model = kmeans.run(summary.map(_._4)) 186 | 187 | println("Centroids("+k+"): \n"+model.clusterCenters.mkString(",\n")) 188 | 189 | val kmeansResult=summary.map({ 190 | case (histogramName,histogramSize,keys,vector) => 191 | val cluster = model.predict(vector) 192 | val centroid = model.clusterCenters(cluster) 193 | 194 | val distance=math.sqrt(vector.toArray.zip(centroid.toArray).map({case (p1,p2) => p1-p2}).map(p => p*p).sum) 195 | 196 | val memberIP=histogramName.subSequence(histogramName.lastIndexOf("-")+1, histogramName.length()).toString 197 | 198 | (cluster,(distance,histogramName,histogramSize,keys,vector,memberIP)) 199 | }).cache 200 | 201 | val mean = kmeansResult.map(_._2._1).mean 202 | val stdDev = kmeansResult.map(_._2._1).stdev 203 | val max = kmeansResult.map(_._2._1).max 204 | val elementsPerCluster = kmeansResult.countByKey().toList.sortBy(_._1).toMap 205 | 206 | println("(Mean,StdDev,Max)("+k+"): "+mean+","+stdDev+","+max+".") 207 | println("Elements per cluster:\n"+elementsPerCluster.mkString(",\n")) 208 | 209 | // Delete saved clusters 210 | (0 to k by 1).toList.foreach { HogHBaseCluster.deleteCluster(_) } 211 | 212 | 213 | 214 | val members = 215 | kmeansResult 216 | .map({case (cluster,(distance,histogramName,histogramSize,keys,vector,memberIP)) => 217 | (cluster,histogramName.subSequence(histogramName.lastIndexOf("-")+1, histogramName.length()).toString) 218 | }).cache().collect().toArray 219 | 220 | 221 | val grouped = kmeansResult.groupByKey() 222 | grouped 223 | .foreach({ case ((clusterIdx,iterator)) => 224 | 225 | val centroid = model.clusterCenters(clusterIdx) 226 | val centroidMain = allKeys.zip(centroid.toArray)//.filter(_._2>10) 227 | val clusterSize = elementsPerCluster.get(clusterIdx).get 228 | 229 | if(centroidMain.filter(_._2>10).size>0 & clusterSize > 4) 230 | { 231 | println("################################################################\n"+ 232 | "CLUSTER: "+clusterIdx+"\n"+ 233 | "Centroid:\n"+centroidMain.filter(_._2>10).mkString(",")+"\n"+ 234 | "clusterSize: "+clusterSize+"\n") 235 | 236 | HogHBaseCluster.saveCluster(clusterIdx,centroidMain,clusterSize,members.filter(_._1.equals(clusterIdx)).map({_._2})) 237 | } 238 | }) 239 | 240 | 241 | 242 | /* 243 | * Save members 244 | * 245 | */ 246 | 247 | kmeansResult 248 | .foreach({ 249 | case (clusterIdx,(distance,histogramName,histogramSize,ports,vector,memberIP)) => 250 | 251 | val clusterSize = elementsPerCluster.get(clusterIdx).get 252 | val centroidMain = allKeys.zip(model.clusterCenters(clusterIdx).toArray)//.filter(_._2>10) 253 | 254 | HogHBaseCluster.deleteClusterMember(memberIP) 255 | 256 | if(centroidMain.filter(_._2>10).size>0 & clusterSize > 4) 257 | { 258 | val frequency_vector = allKeys.zip(vector.toArray) 259 | 260 | val clusterMember = new HogClusterMember(clusterIdx, centroidMain, clusterSize, allKeys, 261 | memberIP, ports, frequency_vector, distance) 262 | 263 | HogHBaseCluster.saveClusterMember(clusterMember) 264 | } 265 | }) 266 | 267 | /* 268 | grouped 269 | .foreach({ case ((clusterIdx,iterator)) => 270 | 271 | val centroid = model.clusterCenters(clusterIdx) 272 | val centroidMain = allKeys.zip(centroid.toArray).filter(_._2>20) 273 | val clusterSize = elementsPerCluster.get(clusterIdx).get 274 | 275 | if(clusterSize>10 & centroidMain.size>0) 276 | { 277 | val group=iterator 278 | .map({ case (distance,histogramName,histogramSize,keys,vector) => 279 | val hogAccessHistogram = HogHBaseHistogram 280 | .getHistogram("HIST02" 281 | +histogramName 282 | .subSequence(histogramName.lastIndexOf("-"), histogramName.length())) 283 | (distance,histogramName,histogramSize,keys,vector,hogAccessHistogram) 284 | }) 285 | 286 | 287 | val groupHistogram = 288 | group 289 | .map({case (distance,histogramName,histogramSize,keys,vector,hogAccessHistogram) => hogAccessHistogram}) 290 | .reduce({(hogAccessHistogram1,hogAccessHistogram2) => 291 | Histograms.merge(hogAccessHistogram1,hogAccessHistogram2) 292 | }) 293 | 294 | group 295 | .filter({ case (distance,histogramName,histogramSize,keys,vector,hogAccessHistogram) => 296 | hogAccessHistogram.histSize>20 297 | }) 298 | .map({ case (distance,histogramName,histogramSize,keys,vector,hogAccessHistogram) => 299 | 300 | val groupHistogramMinus = Histograms.difference(groupHistogram,hogAccessHistogram) 301 | 302 | val atypical = Histograms.atypical(groupHistogramMinus.histMap, hogAccessHistogram.histMap) 303 | 304 | if(atypical.size>0) 305 | { 306 | println("################################################################\n"+ 307 | "CLUSTER: "+clusterIdx+"\n"+ 308 | "Centroid:\n"+centroidMain.mkString(",\n")+"\n"+ 309 | "HistSize mean: "+(groupHistogram.histSize/clusterSize)+"\n"+ 310 | "HistSize:"+hogAccessHistogram.histSize+"\n"+ 311 | "Atypicals: "+atypical.mkString(",")+"\n"+ 312 | "Histogram: "+hogAccessHistogram.histName+"\n"+ 313 | hogAccessHistogram.histMap.mkString(",\n")+"\n"+ 314 | "GroupHistogram:\n"+groupHistogram.histMap.mkString(",\n")+"\n") 315 | } 316 | }) 317 | 318 | } 319 | }) 320 | */ 321 | 322 | 323 | 324 | } 325 | 326 | 327 | } -------------------------------------------------------------------------------- /src/org/hogzilla/sflow/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | /** 23 | * @author pa 24 | */ 25 | package object sflow { 26 | 27 | } 28 | -------------------------------------------------------------------------------- /src/org/hogzilla/snort/HogSnort.scala: -------------------------------------------------------------------------------- 1 | 2 | package org.hogzilla.snort 3 | 4 | import org.apache.hadoop.hbase.client.Put 5 | import org.apache.hadoop.hbase.util.Bytes 6 | import org.apache.spark.SparkContext 7 | import org.apache.spark.ml.Pipeline 8 | import org.apache.spark.ml.PipelineModel 9 | import org.apache.spark.ml.classification.RandomForestClassificationModel 10 | import org.apache.spark.ml.classification.RandomForestClassifier 11 | import org.apache.spark.ml.feature.StringIndexer 12 | import org.apache.spark.ml.feature.VectorAssembler 13 | import org.apache.spark.rdd.RDD 14 | import org.apache.spark.sql.Row 15 | import org.apache.spark.sql.SQLContext 16 | import org.apache.spark.sql.types.DataTypes 17 | import org.apache.spark.sql.types.StructField 18 | import org.apache.spark.sql.types.StructType 19 | import org.hogzilla.hbase.HogHBaseRDD 20 | import org.hogzilla.util.HogFeature 21 | import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator 22 | import org.apache.spark.rdd.PairRDDFunctions 23 | import org.hogzilla.event.HogEvent 24 | import org.hogzilla.util.HogFlow 25 | import scala.collection.mutable.HashMap 26 | import org.hogzilla.event.HogSignature 27 | 28 | 29 | 30 | 31 | object HogSnort { 32 | 33 | val signature = ( HogSignature(3,"HZ: Suspicious flow detected by similarity with Snort alerts",2,1,826000001,826).saveHBase(), null ) 34 | 35 | 36 | 37 | def run(HogRDD: RDD[(org.apache.hadoop.hbase.io.ImmutableBytesWritable,org.apache.hadoop.hbase.client.Result)],spark:SparkContext):String = { 38 | 39 | val excludedSigs:Array[(String,String)] = Array() // Ex. Array((1,1),(1,2)) 40 | val (maxbin,maxdepth,mtry,malThreshold) = (500,30,90,80) 41 | 42 | 43 | val sqlContext = new SQLContext(spark) 44 | import sqlContext.implicits._ 45 | 46 | 47 | val filteredColumns = HogHBaseRDD.columns.filter({_.useOnTrain}).toSeq 48 | val orderedColumns = filteredColumns.zip(0 to filteredColumns.size-1) 49 | 50 | 51 | val convertFn: PartialFunction[Any,Any] = { 52 | case (column:HogFeature,value:String) => 53 | try { 54 | 55 | if(column.ctype.equals("char")) 56 | value 57 | else if(column.ctype.equals("u_int64_t")) 58 | value.toLong 59 | else 60 | value.toInt 61 | } catch { 62 | case t: Throwable => 63 | //println("ERROR - column name: "+column.name) 64 | //t.printStackTrace() 65 | 66 | if(column.ctype.equals("char")) 67 | "" 68 | else if(column.ctype.equals("u_int64_t")) 69 | 0L 70 | else 71 | 0 72 | 73 | } 74 | } 75 | 76 | 77 | val labRDD2 = HogRDD. 78 | map { case (id,result) => { 79 | 80 | val rowId = Bytes.toString(id.get).toString() 81 | 82 | val ndpi_risk = Bytes.toString(result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("ndpi_risk"))) 83 | val event_signature = Bytes.toString(result.getValue(Bytes.toBytes("event"),Bytes.toBytes("signature_id"))) 84 | val event_generator = Bytes.toString(result.getValue(Bytes.toBytes("event"),Bytes.toBytes("generator_id"))) 85 | val src_name = Bytes.toString(result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("src_name"))) 86 | val dst_name = Bytes.toString(result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("dst_name"))) 87 | val ctu_label = Bytes.toString(result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("ctu_label"))) 88 | val duration = Bytes.toString(result.getValue(Bytes.toBytes("flow"),Bytes.toBytes("flow_duration"))) 89 | 90 | val actualclass = 1 // Not known at this time. Supposing all is an actual intrusion. 91 | 92 | val tuple = orderedColumns 93 | .map ({ case (column,index) => 94 | val ret = result.getValue(Bytes.toBytes(column.getColumn1()),Bytes.toBytes(column.getColumn2())) 95 | val value = Bytes.toString(ret) 96 | if(value==null||value.equals("")) 97 | (column,"-1") 98 | else 99 | (column,value) 100 | }) 101 | 102 | 103 | if(event_signature!=null && !event_signature.isEmpty() 104 | && event_generator!=null && !event_generator.isEmpty()) 105 | (1,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) 106 | else if(ndpi_risk!=null && ( ndpi_risk.equals("Safe") || ndpi_risk.equals("Fun") ) ) 107 | (0,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) 108 | else 109 | (-1,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) // discard 110 | } 111 | } 112 | 113 | val Signatures:PairRDDFunctions[(String,String),Long] = labRDD2 114 | .map({case (label,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) => 115 | ((event_generator,event_signature),1L) 116 | }) 117 | 118 | val Sarray = Signatures.reduceByKey(_+_).sortBy(_._2, false, 5).collect() 119 | val Sarray_size = Sarray.size 120 | 121 | // Print the found signatures. It may be useful to define what is FP and should be considered to be removed. 122 | Sarray.foreach({case ((gen,sig),count) => println(s"($gen,$sig) => $count")}) 123 | 124 | 125 | 126 | val labRDD1 = labRDD2 127 | .map({case (label,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) => 128 | 129 | if(event_signature!=null && !event_signature.isEmpty() 130 | && event_generator!=null && !event_generator.isEmpty() && !excludedSigs.contains((event_generator,event_signature))) 131 | (1,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) 132 | else if(ndpi_risk!=null && ( ndpi_risk.equals("Safe") || ndpi_risk.equals("Fun") ) ) 133 | (0,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) 134 | else 135 | (-1,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) // discard 136 | 137 | }) 138 | .map({case (label,rowId,src_name,dst_name,actualclass,tuple,event_generator,event_signature,ndpi_risk) => 139 | Row.fromSeq({ 140 | Seq(label,rowId,src_name,dst_name,actualclass)++tuple.collect(convertFn) 141 | }) 142 | 143 | }) 144 | 145 | val ccRDD = labRDD1.filter { x => x.get(0) == 1 } 146 | // val cleanRDD = sqlContext.sparkContext.parallelize(labRDD1.filter { x => x.get(0) == 0 }.takeSample(false, 12000, 123L)) 147 | val cleanRDD = labRDD1.filter { x => x.get(0) == 0 } 148 | 149 | val trainRDD = ccRDD++cleanRDD 150 | 151 | 152 | println("0: "+trainRDD.filter { x => x.get(0) == 0 }.count+" 1:"+trainRDD.filter { x => x.get(0) == 1 }.count) 153 | 154 | 155 | val rawFeaturesStructsSeq = orderedColumns.map({case (column,index) => 156 | if(column.ctype.equals("char")) 157 | StructField(column.name, DataTypes.StringType,true) 158 | else if(column.ctype.equals("u_int64_t")) 159 | StructField(column.name, DataTypes.LongType,false) 160 | else 161 | StructField(column.name, DataTypes.IntegerType,false) 162 | }) 163 | 164 | val dataScheme = new StructType(Array(StructField("label", DataTypes.IntegerType,true), 165 | StructField("rowId", DataTypes.StringType,true), 166 | StructField("src_name", DataTypes.StringType,true), 167 | StructField("dst_name", DataTypes.StringType,true), 168 | StructField("actual_class", DataTypes.IntegerType,true)) 169 | ++ rawFeaturesStructsSeq) 170 | 171 | 172 | val data = sqlContext.createDataFrame(trainRDD, dataScheme).cache() 173 | val dataSize = data.count 174 | println("Sample size: "+dataSize) 175 | 176 | 177 | val dataOut = sqlContext.createDataFrame(labRDD1.filter { x => x.get(0).toString.toInt < 0 }, dataScheme).cache() 178 | val dataSizeOut = dataOut.count 179 | println("Sample size Out (not labelled): "+dataSizeOut) 180 | 181 | val trainingData = data 182 | 183 | val stringIndexers = Array(new StringIndexer().setInputCol("label").setOutputCol("indexedLabel").setHandleInvalid("keep"))++ 184 | Array(new StringIndexer().setInputCol("actual_class").setOutputCol("indexedActual_class"))++ 185 | orderedColumns.filter({case (column,index) => column.ctype.equals("char") }) 186 | .map({case (column,index) => new StringIndexer().setInputCol(column.name).setOutputCol(column.name+"CAT").setHandleInvalid("skip").fit(data) }) 187 | 188 | val selectedFeaturesStringArray = orderedColumns.map({case (column,index) => if(column.ctype.equals("char")) column.name+"CAT" else column.name }).toArray 189 | 190 | 191 | val assembler = new VectorAssembler() 192 | .setInputCols(selectedFeaturesStringArray) 193 | .setOutputCol("rawFeatures") 194 | 195 | 196 | 197 | val rf = new RandomForestClassifier() 198 | .setLabelCol("indexedLabel").setFeaturesCol("rawFeatures").setProbabilityCol("probabilities") 199 | .setNumTrees(100).setImpurity("gini").setPredictionCol("prediction").setRawPredictionCol("rawPrediction") 200 | .setMaxBins(maxbin).setMaxDepth(maxdepth).setFeatureSubsetStrategy(mtry.toString) 201 | .setThresholds(Array((100D-malThreshold.toDouble)/100D,malThreshold.toDouble/100D,0D)) 202 | 203 | 204 | val pipeline = new Pipeline().setStages(stringIndexers++Array(assembler,rf)) 205 | val model = pipeline.fit(trainingData) 206 | 207 | 208 | // val predictionsOut = model.transform(dataOut.union(testData)) 209 | val predictionsOut = model.transform(dataOut) 210 | 211 | 212 | // ALERT 213 | predictionsOut.filter( $"prediction" > 0 ) // prediction==1 214 | .select("src_name","dst_name","flow:src_port","flow:dst_port","prediction") 215 | .foreach({ row => 216 | val (src,dst,src_port,dst_port,predicted) = (row.get(0),row.get(1),row.get(2),row.get(3),row.get(4)) 217 | 218 | val flowMap: scala.collection.mutable.Map[String,String] = new HashMap[String,String] 219 | flowMap.put("flow:id",System.currentTimeMillis.toString) 220 | val event = new HogEvent(new HogFlow(flowMap,src.toString,dst.toString)) 221 | 222 | event.title = f"HZ: Suspicious flow detected by similarity with Snort alerts" 223 | 224 | event.ports = "" 225 | 226 | event.text = "This flow was detected by Hogzilla based on its similarities with Snort alerts.\n\n"+ 227 | s"$src:$src_port -> $dst:$dst_port" 228 | 229 | event.signature_id = signature._1.signature_id 230 | println("") 231 | 232 | }) 233 | 234 | "" 235 | 236 | } 237 | 238 | 239 | } 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | -------------------------------------------------------------------------------- /src/org/hogzilla/snort/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2018 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | /** 23 | * @author pa 24 | */ 25 | package object snort { 26 | 27 | } 28 | -------------------------------------------------------------------------------- /src/org/hogzilla/util/HogConfig.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.util 21 | 22 | import java.security.MessageDigest 23 | import org.apache.hadoop.hbase.util.Bytes 24 | import javax.xml.bind.DatatypeConverter 25 | import math._ 26 | import com.typesafe.config.Config 27 | import scala.collection.mutable.HashSet 28 | 29 | 30 | /** 31 | * @author pa 32 | */ 33 | object HogConfig { 34 | 35 | 36 | 37 | def get(config:Config,key:String,valueType:String,default:Any):Any = 38 | { 39 | if(config==null) 40 | return default 41 | 42 | 43 | try { 44 | 45 | val value = config.getString(key) 46 | 47 | if(value.isEmpty()) 48 | return default // Return default value 49 | 50 | println(f"Configuration: $key => $value") 51 | 52 | if(valueType.equals("Int")) 53 | value.toInt 54 | else if(valueType.equals("Double")) 55 | value.toDouble 56 | else if(valueType.equals("Long")) 57 | value.toLong 58 | else if(valueType.equals("Set(Int)")) 59 | { 60 | val patternSet="Set\\(".r 61 | val patternSetEnd="\\)".r 62 | 63 | if(value.equals("Set()")) 64 | return Set() 65 | 66 | return (patternSetEnd replaceAllIn((patternSet replaceAllIn(value, "")),"")) 67 | .split(",").map({x => x.toInt}).toSet 68 | } 69 | else if(valueType.equals("Set(String)")) 70 | { 71 | val patternSet="Set\\(".r 72 | val patternSetEnd="\\)".r 73 | 74 | if(value.equals("Set()")) 75 | return Set() 76 | 77 | return (patternSetEnd replaceAllIn((patternSet replaceAllIn(value, "")),"")) 78 | .split(",").map({x => println(x.toString.trim()) ; x.toString.trim()}).toSet 79 | } 80 | else 81 | default // Create type first 82 | 83 | } catch { 84 | case t: Throwable => t.printStackTrace() 85 | println(f"Problem parsing $key . Check if it is ok. Using default value") 86 | 87 | return default 88 | } 89 | 90 | } 91 | 92 | def getInt(config:Config,key:String,default:Any):Int = 93 | { 94 | get(config,key,"Int",default).asInstanceOf[Int] 95 | } 96 | 97 | def getLong(config:Config,key:String,default:Any):Long = 98 | { 99 | get(config,key,"Long",default).asInstanceOf[Long] 100 | } 101 | 102 | def getDouble(config:Config,key:String,default:Any):Double = 103 | { 104 | get(config,key,"Double",default).asInstanceOf[Long] 105 | } 106 | 107 | def getSetInt(config:Config,key:String,default:Any):Set[Int] = 108 | { 109 | get(config,key,"Set(Int)",default).asInstanceOf[Set[Int]] 110 | } 111 | 112 | def getSetString(config:Config,key:String,default:Any):Set[String] = 113 | { 114 | get(config,key,"Set(String)",default).asInstanceOf[Set[String]] 115 | } 116 | 117 | 118 | } -------------------------------------------------------------------------------- /src/org/hogzilla/util/HogFeature.scala: -------------------------------------------------------------------------------- 1 | package org.hogzilla.util 2 | 3 | import scala.collection.immutable.HashMap 4 | 5 | 6 | 7 | /** 8 | * @author pa 9 | */ 10 | case class HogFeature(name:String,ctype:String,useOnTrain:Boolean = true, isNumeric:Boolean = true /*or categorical*/, label:Int=0) { 11 | 12 | var index=0; 13 | var possibleCategoricalValues:Map[String,Int] = new HashMap; 14 | 15 | def getColumn1():String = { 16 | name.split(":")(0).toString() 17 | } 18 | 19 | def getColumn2():String = { 20 | name.split(":")(1).toString() 21 | } 22 | } -------------------------------------------------------------------------------- /src/org/hogzilla/util/HogFlow.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.util 21 | 22 | import scala.collection.mutable.Map 23 | 24 | 25 | 26 | /** 27 | * @author pa 28 | */ 29 | case class HogFlow(map:Map[String,String],lower_ip:String,upper_ip:String) { 30 | 31 | def get(key:String):String = 32 | { 33 | map.get(key).get 34 | } 35 | } -------------------------------------------------------------------------------- /src/org/hogzilla/util/HogGeograph.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.util 21 | 22 | import java.security.MessageDigest 23 | import org.apache.hadoop.hbase.util.Bytes 24 | import javax.xml.bind.DatatypeConverter 25 | import math._ 26 | 27 | 28 | /** 29 | * @author pa 30 | */ 31 | object HogGeograph { 32 | 33 | val R = 6372.8 //radius in km 34 | 35 | def haversineDistance(lat1:Double, lon1:Double, lat2:Double, lon2:Double):Double = 36 | { 37 | val dLat=(lat2 - lat1).toRadians 38 | val dLon=(lon2 - lon1).toRadians 39 | 40 | val a = pow(sin(dLat/2),2) + pow(sin(dLon/2),2) * cos(lat1.toRadians) * cos(lat2.toRadians) 41 | val c = 2 * asin(sqrt(a)) 42 | R * c 43 | } 44 | 45 | 46 | def haversineDistanceFromStrings(coords1:String, coords2:String):Double = 47 | { 48 | try { 49 | val coordsDouble1 = coords1.split(",").map({ x => x.toDouble }) 50 | val coordsDouble2 = coords2.split(",").map({ x => x.toDouble }) 51 | 52 | haversineDistance(coordsDouble1(0),coordsDouble1(1),coordsDouble2(0),coordsDouble2(1)) 53 | } catch { 54 | case t: Throwable => // t.printStackTrace() 55 | // Return a large distance 56 | 999999999D 57 | } 58 | } 59 | 60 | } -------------------------------------------------------------------------------- /src/org/hogzilla/util/HogStringUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2016 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla.util 21 | 22 | import java.security.MessageDigest 23 | import org.apache.hadoop.hbase.util.Bytes 24 | import javax.xml.bind.DatatypeConverter 25 | 26 | 27 | /** 28 | * @author pa 29 | */ 30 | object HogStringUtils { 31 | 32 | def md5(string:String):String = 33 | { 34 | DatatypeConverter.printHexBinary(MessageDigest.getInstance("MD5").digest(string.getBytes)) 35 | } 36 | } -------------------------------------------------------------------------------- /src/org/hogzilla/util/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2015 Paulo Angelo Alves Resende 3 | * 4 | * This program is free software; you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License Version 2 as 6 | * published by the Free Software Foundation. You may not use, modify or 7 | * distribute this program under any other version of the GNU General 8 | * Public License. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program; if not, write to the Free Software 17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 | */ 19 | 20 | package org.hogzilla 21 | 22 | 23 | package object util { 24 | 25 | } 26 | --------------------------------------------------------------------------------