├── README.md ├── LICENSE └── yeti_to_elasticsearch.py /README.md: -------------------------------------------------------------------------------- 1 | # YetiToElastic 2 | YETI (Your Everyday Threat Intelligence) Integration to Elastic Stack. 3 | 4 | Additional Information in [enSilo's BreakingMalware Blog](https://blog.ensilo.com/yeti-eslasticstack). 5 | 6 | Usage Example: 7 | 8 | Bash: 9 | ```bash 10 | python3 yeti_to_elasticsearch.py "HOSTNAME/IP" --elastic_index="yeti-index" --elastic_use_ssl 11 | ``` 12 | 13 | Python: 14 | ```python 15 | from yeti_to_elasticsearch import YetiFeedSender, set_logging 16 | 17 | set_logging() 18 | sender = YetiFeedSender("yeti-feeds", excluded_feeds=("AsproxTracker"), 19 | elastic_hostname="="", 20 | elastic_port=) 21 | sender.extract_and_send() 22 | ``` 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 BreakingMalwareResearch 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /yeti_to_elasticsearch.py: -------------------------------------------------------------------------------- 1 | # Author: Chen Erlich (Twitter - @chen_erlich) 2 | # YETI Version: 1 3 | # Elastic Stack Version: 6.7 4 | 5 | import logging 6 | import sys 7 | import textwrap 8 | import time 9 | 10 | from logging.handlers import RotatingFileHandler 11 | from bson.json_util import DEFAULT_JSON_OPTIONS 12 | from pymongo import MongoClient, errors 13 | from bson import json_util 14 | from datetime import datetime 15 | from elasticsearch import Elasticsearch 16 | from pymongo.errors import CursorNotFound, AutoReconnect 17 | from ssl import SSLWantReadError 18 | 19 | 20 | # Logging config 21 | def set_logging(): 22 | global logger 23 | 24 | logging.basicConfig(format='%(asctime)s - %(lineno)d - %(funcName)s - %(levelname)s - %(message)s', 25 | level=logging.INFO) 26 | DEFAULT_JSON_OPTIONS.datetime_representation = 2 27 | 28 | logger = logging.getLogger("yeti_to_elastic") 29 | 30 | formatter = logging.Formatter('%(asctime)s - %(lineno)d - %(funcName)s - %(levelname)s - %(message)s') 31 | 32 | # You may change here the path for the log file 33 | handler = RotatingFileHandler('yeti_to_elastic.log', maxBytes=20000, backupCount=5) 34 | handler.setFormatter(formatter) 35 | handler.setLevel(logging.INFO) 36 | 37 | logger.addHandler(handler) 38 | 39 | 40 | class YetiFeedSender(object): 41 | def __init__(self, elastic_index, excluded_feeds=set(), mongo_client=None, mongo_hostname="localhost", 42 | elastic_instance=None, elastic_hostname=None, elastic_port=9200, elastic_user=None, elastic_pass=None, 43 | elastic_use_ssl=None, elastic_verify_certs=None): 44 | """ 45 | This class connects to YETI's MongoDB and to Elasticsearch. 46 | It parses the observable collection in YETI's MongoDB and sends to Elasticsearch. 47 | :param elastic_index: Elastic Stack index name. 48 | :param excluded_feeds: Set that includes feeds to exclude from indexing. 49 | :param mongo_client: Mongodb client. 50 | :param mongo_hostname: Mongodb hostname. 51 | :param elastic_instance: Elastic Stack connection instance. 52 | :param elastic_hostname: Elastic Stack hostname. 53 | :param elastic_port: Elastic Stack indexing port. 54 | :param elastic_user: Elastic Stack user. 55 | :param elastic_pass: Elastic Stack password. 56 | :param elastic_use_ssl: Boolean. Flag to determine if the connection to Elastic Stack should use SSL. 57 | :param elastic_verify_certs: Boolean. Flag to determine if the connection to Elastic Stack should verify the certificate. 58 | """ 59 | 60 | self.elastic_index = elastic_index 61 | self.excluded_feeds = excluded_feeds 62 | 63 | if mongo_client: 64 | self.mongo_client = mongo_client 65 | else: 66 | mongo_hostname = mongo_hostname 67 | self.create_mongo_connection(mongo_hostname) 68 | 69 | if elastic_instance: 70 | self.elastic_instance = elastic_instance 71 | else: 72 | elastic_hostname = elastic_hostname 73 | elastic_port = elastic_port 74 | elastic_user = elastic_user 75 | elastic_pass = elastic_pass 76 | elastic_use_ssl = elastic_use_ssl 77 | elastic_verify_certs = elastic_verify_certs 78 | 79 | self.create_elastic_connection(elastic_hostname, elastic_port, use_ssl=elastic_use_ssl, 80 | verify_certs=elastic_verify_certs, username=elastic_user, 81 | password=elastic_pass) 82 | 83 | def create_mongo_connection(self, hostname="localhost"): 84 | """ 85 | Creates a connection to YETI's MongoDB. 86 | :param hostname: Hostname to connect to. Default is "localhost" 87 | :return: None 88 | """ 89 | 90 | try: 91 | # Try connecting to MongoDB for 10ms 92 | self.mongo_client = MongoClient('mongodb://{}:27017/'.format(hostname), serverSelectionTimeoutMS=10) 93 | self.mongo_client.server_info() 94 | except errors.ServerSelectionTimeoutError as mongo_conn_err: 95 | logger.exception(("MongoDB connection issue occurred. " 96 | "Error message: " + str(mongo_conn_err))) 97 | sys.exit(1) 98 | 99 | def create_elastic_connection(self, hostname, port, use_ssl=True, verify_certs=False, username=None, password=None): 100 | """ 101 | Creates an Elasticsearch connection. 102 | :param hostname: Elasticsearch hostname/ip address 103 | :param port: Elasticsearch indexing port 104 | :param use_ssl: Is the server uses ssl or not 105 | :param verify_certs: Should the request verify the certification 106 | :param username: Username in order to connect to Elasticsearch 107 | :param password: Password in order to connect to Elasticsearch 108 | :return: None 109 | """ 110 | 111 | if username and password: 112 | if use_ssl: 113 | self.elastic_instance = Elasticsearch( 114 | hosts=[{'host': hostname, 'port': port}], 115 | http_auth=(username, password), 116 | use_ssl=use_ssl, 117 | verify_certs=verify_certs) 118 | else: 119 | self.elastic_instance = Elasticsearch(hosts=[{'host': hostname, 'port': port}], 120 | http_auth=(username, password)) 121 | else: 122 | if use_ssl: 123 | self.elastic_instance = Elasticsearch(hosts=[{'host': hostname, 'port': port}], 124 | use_ssl=use_ssl, 125 | verify_certs=verify_certs) 126 | else: 127 | self.elastic_instance = Elasticsearch(hosts=[{'host': hostname, 'port': port}]) 128 | 129 | # Check if there is a connection to elastic 130 | if not self.elastic_instance.ping(): 131 | logger.error("Elastic Stack connection issue occurred.") 132 | raise ConnectionError 133 | 134 | @staticmethod 135 | def format_observable(observable, excluded_feeds=()): 136 | """ 137 | Formats an observable to Elasticsearch accepted structure 138 | :param observable: observable dict 139 | :param excluded_feeds: excluded_feeds set 140 | :return: deserialized_json str 141 | """ 142 | 143 | formatted_dict = dict() 144 | formatted_dict["@timestamp"] = datetime.now().isoformat() 145 | 146 | # Loop observable dictionary 147 | for key in observable.keys(): 148 | if key == "_id": 149 | formatted_dict["id_generation_time"] = observable[key].generation_time.isoformat() 150 | elif key == "parsed_url": 151 | for parsed_url_key in observable[key].keys(): 152 | formatted_dict["parsed_url.{}".format(parsed_url_key)] = observable[key][parsed_url_key] 153 | elif key == "created": 154 | formatted_dict["created"] = observable[key].isoformat() 155 | elif key == "_cls": 156 | formatted_dict["cls"] = observable[key] 157 | elif key == "tags": 158 | index = 0 159 | while index < len(observable[key]): 160 | observable[key][index]["first_seen"] = observable[key][index]["first_seen"].isoformat() 161 | observable[key][index]["last_seen"] = observable[key][index]["last_seen"].isoformat() 162 | index += 1 163 | formatted_dict[key] = observable[key] 164 | elif key == "last_tagged": 165 | formatted_dict[key] = observable[key].isoformat() 166 | elif key == "context": 167 | for context_entry_dict in observable[key]: 168 | 169 | if context_entry_dict["source"] in excluded_feeds: 170 | observable[key].remove(context_entry_dict) 171 | 172 | # If we excluded all feeds, return an empty string 173 | if not observable[key]: 174 | logger.warning("The value: {} from the date {} was not indexed".format(observable["value"], 175 | formatted_dict["created"])) 176 | return '' 177 | 178 | formatted_dict[key] = observable[key] 179 | else: 180 | 181 | # Check for doc values of FILES. 182 | # If it's a FILE, remove the "FILE:" prefix from the value 183 | if key == "value" and str(observable[key]).startswith("FILE:"): 184 | observable[key] = observable[key][5:] 185 | 186 | formatted_dict[key] = observable[key] 187 | 188 | # Format the dict to json. Supports mongodb structure representation 189 | json_to_elastic = json_util.dumps(formatted_dict) 190 | 191 | return json_to_elastic 192 | 193 | def extract_and_send(self, elastic_index=None): 194 | """ 195 | This method extracts data out of the mongodb and sends in to elasticsearch. 196 | :param elastic_index: Used if there is a need to change the elastic index 197 | :return: None 198 | """ 199 | 200 | if elastic_index: 201 | self.elastic_index = elastic_index 202 | 203 | db = self.mongo_client.yeti 204 | observables = db.observable 205 | 206 | response = '' 207 | processed = 0 208 | 209 | while True: 210 | try: 211 | 212 | # Loop observables 213 | for observable in observables.find(no_cursor_timeout=True).skip(processed): 214 | 215 | processed += 1 216 | json_to_index = self.format_observable(observable, excluded_feeds=self.excluded_feeds) 217 | 218 | # If the json to index is empty, don't index 219 | if not json_to_index: 220 | continue 221 | 222 | try: 223 | 224 | # Index to elasticsearch 225 | response = self.elastic_instance.index(index=self.elastic_index, doc_type="yeti_feed", 226 | id=observable.get("_id"), 227 | body=json_to_index, 228 | request_timeout=30) 229 | except TypeError as type_error: 230 | logger.warning(type_error) 231 | except SSLWantReadError as ssl_error: 232 | logger.error(ssl_error) 233 | except Exception as e: 234 | logger.error(str(e)) 235 | 236 | if response.get("result") == "created": 237 | logger.info( 238 | "Created {} in index {} - Processed: {}".format(response.get("_id"), self.elastic_index, 239 | processed)) 240 | elif response.get("result") == "updated": 241 | logger.info( 242 | "Updated {} in index {} - Processed: {}".format(response.get("_id"), self.elastic_index, 243 | processed)) 244 | else: 245 | logger.warning( 246 | "Failed to index {} in index {} - Processed: {}".format(response.get("_id"), 247 | self.elastic_index, 248 | processed)) 249 | 250 | logger.info("Finished processing all events. Sleeping for 30 seconds.") 251 | time.sleep(30) 252 | 253 | except CursorNotFound: 254 | logger.warning("Lost cursor. Retry with skip") 255 | except AutoReconnect as e: 256 | logger.error("Connection Error: " + str(e)) 257 | except Exception as e: 258 | logger.error("Unknown Error: {}".format(str(e))) 259 | 260 | 261 | def main(): 262 | import argparse 263 | set_logging() 264 | 265 | parser = argparse.ArgumentParser( 266 | prog='YetiToElastic', 267 | formatter_class=argparse.RawDescriptionHelpFormatter, 268 | epilog=textwrap.dedent('''\ 269 | Example: 270 | sender = YetiFeedSender("yeti-feeds", 271 | elastic_hostname="" 272 | excluded_feeds=("AsproxTracker", "UrlHaus"), 273 | elastic_user="ChenErlich", 274 | elastic_pass="YETI", 275 | elastic_use_ssl) 276 | sender.extract_and_send() 277 | 278 | ''')) 279 | parser.add_argument('--elastic_index', type=str, default="yeti-feeds", help='Elastic Stack index name') 280 | parser.add_argument('--excluded_feeds', type=set, default=set(), help='Set of feeds to exclude from indexing') 281 | parser.add_argument('--mongo_hostname', type=str, default="localhost", help='Mongodb hostname') 282 | parser.add_argument('elastic_hostname', type=str, help='Elastic Stack hostname/ip') 283 | parser.add_argument('--elastic_port', type=int, default=9200, help='Elastic Stack index name') 284 | parser.add_argument('--elastic_user', type=str, help='Elastic Stack user') 285 | parser.add_argument('--elastic_pass', type=str, help='Elastic Stack password') 286 | parser.add_argument('--elastic_use_ssl', action="store_true", default=False, 287 | help='Flag to determine if the connection to Elastic Stack should use SSL') 288 | parser.add_argument('--elastic_verify_certs', action="store_true", default=False, 289 | help='Flag to determine if the connection to Elastic Stack should verify the certificate') 290 | try: 291 | args = parser.parse_args() 292 | except SystemExit: 293 | parser.print_help() 294 | exit() 295 | 296 | # Note: There are elastic_instance and mongo_client arguments that can be delivered which are not 297 | # present. They are relevant if the YetiFeedSender will be called from a 3rd party and not directly from main. 298 | 299 | sender = YetiFeedSender(args.elastic_index, 300 | excluded_feeds=args.excluded_feeds, 301 | mongo_hostname=args.mongo_hostname, 302 | elastic_hostname=args.elastic_hostname, 303 | elastic_port=args.elastic_port, 304 | elastic_user=args.elastic_user, 305 | elastic_pass=args.elastic_pass, 306 | elastic_use_ssl=args.elastic_use_ssl, 307 | elastic_verify_certs=args.elastic_verify_certs) 308 | 309 | sender.extract_and_send() 310 | 311 | 312 | if __name__ == '__main__': 313 | main() 314 | --------------------------------------------------------------------------------