├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── config.ini ├── icapserver.py ├── requirements.txt ├── rules └── .gitkeep ├── server.py ├── test ├── test_base_icap_request_handler.py └── test_icap_error.py └── yarascanner.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.gitignore.io/api/python,sublimetext 2 | 3 | ### Python ### 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *,cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv/ 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | 93 | # Rope project settings 94 | .ropeproject 95 | 96 | ### SublimeText ### 97 | # cache files for sublime text 98 | *.tmlanguage.cache 99 | *.tmPreferences.cache 100 | *.stTheme.cache 101 | 102 | # workspace files are user-specific 103 | *.sublime-workspace 104 | 105 | # project files should be checked into the repository, unless a significant 106 | # proportion of contributors will probably not be using SublimeText 107 | # *.sublime-project 108 | 109 | # sftp configuration file 110 | sftp-config.json 111 | 112 | # Package control specific files 113 | Package Control.last-run 114 | Package Control.ca-list 115 | Package Control.ca-bundle 116 | Package Control.system-ca-bundle 117 | Package Control.cache/ 118 | Package Control.ca-certs/ 119 | Package Control.merged-ca-bundle 120 | Package Control.user-ca-bundle 121 | oscrypto-ca-bundle.crt 122 | bh_unicode_properties.cache 123 | 124 | # Sublime-github package stores a github token in this file 125 | # https://packagecontrol.io/packages/sublime-github 126 | GitHub.sublime-settings 127 | 128 | # End of https://www.gitignore.io/api/python,sublimetext 129 | 130 | ## Application Specific 131 | rules/*.yar 132 | yaraicap.log 133 | squid.conf 134 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Ramadhan Amizudin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Yara Python ICAP Server 2 | ===================== 3 | An ICAP Server with yara scanner for URL and content. 4 | 5 | ## Requirement 6 | * Squid Proxy 3.5 7 | * Python 3 8 | 9 | ## Squid Configuration 10 | ``` 11 | icap_enable on 12 | icap_preview_enable off 13 | icap_send_client_ip on 14 | icap_send_client_username on 15 | icap_service service_resp respmod_precache bypass=1 icap://127.0.0.1:1344/yara 16 | adaptation_access service_resp allow all 17 | ``` 18 | 19 | ## Running 20 | ``` 21 | $ git clone https://github.com/RamadhanAmizudin/python-icap-yara 22 | $ pip install -r requirements.txt 23 | $ python server.py 24 | ``` 25 | 26 | ## Config File 27 | ``` 28 | [config] 29 | content_rules = 30 | url_rules = 31 | content_dir = 32 | ``` 33 | 34 | ## Log Content 35 | ``` 36 | { 37 | "content": "", 38 | "request_header": { 39 | "accept": [ 40 | "*/*" 41 | ], 42 | "host": [ 43 | "blog.honeynet.org.my" 44 | ], 45 | "user-agent": [ 46 | "curl/7.47.0" 47 | ] 48 | }, 49 | "response_header": { 50 | "content-type": [ 51 | "text/html; charset=UTF-8" 52 | ], 53 | "date": [ 54 | "Mon, 06 Feb 2017 15:55:31 GMT" 55 | ], 56 | "link": [ 57 | "; rel=\"https://api.w.org/\"", 58 | "; rel=shortlink" 59 | ], 60 | "server": [ 61 | "Apache/2.2.22 (Ubuntu)" 62 | ], 63 | "vary": [ 64 | "Accept-Encoding" 65 | ] 66 | }, 67 | "rules": [ 68 | "list of rules triggered" 69 | ] 70 | } 71 | ``` 72 | 73 | Questions & Comments 74 | ===================== 75 | If you encounter a bug, please feel free to post it on GitHub. For questions or comments. 76 | -------------------------------------------------------------------------------- /config.ini: -------------------------------------------------------------------------------- 1 | [config] 2 | content_rules = ./rules/content.yar 3 | url_rules = ./rules/url.yar 4 | content_dir = /tmp/save/ 5 | scan_url = True 6 | -------------------------------------------------------------------------------- /icapserver.py: -------------------------------------------------------------------------------- 1 | #-*-: coding: utf-8 2 | """ 3 | # ===================================================================================================================== # 4 | # Implements an ICAP server framework # 5 | # For the ICAP specification, see RFC 3507 # 6 | # ===================================================================================================================== # 7 | # Project Name: ICAP Server framework 8 | # Version: 1.2 9 | # Author: Nikolay Ivanov 10 | # Author Email: peoplecantfly@gmail.com 11 | # License: MIT License 12 | # Project URL: https://github.com/Peoplecantfly/icapserver 13 | """ 14 | 15 | import sys 16 | import time 17 | import random 18 | import socket 19 | import string 20 | import logging 21 | import urllib.parse 22 | import socketserver 23 | 24 | __version__ = "1.2" 25 | 26 | __all__ = ['ICAPServer', 'BaseICAPRequestHandler', 'ICAPError'] 27 | 28 | LOG = logging.getLogger(__name__) 29 | level = logging.INFO 30 | logging.basicConfig(level=level, format="[%(asctime)s][%(name)s][%(levelname)s] %(message)s", filename="") 31 | 32 | class ICAPError(Exception): 33 | """ 34 | Signals a protocol error. 35 | """ 36 | def __init__(self, code=500, message=None): 37 | if message is None: 38 | if code not in BaseICAPRequestHandler._responses: 39 | message = 'Unknown error code' 40 | else: 41 | short, long = BaseICAPRequestHandler._responses[code] 42 | message = short 43 | 44 | super(ICAPError, self).__init__(message) 45 | self.code = code 46 | msg = 'Code: %d Message: %s' % (code, message) 47 | LOG.error(msg) 48 | 49 | class ICAPServer(socketserver.TCPServer): 50 | """ 51 | ICAP Server 52 | This is a simple TCPServer, that allows address reuse. 53 | """ 54 | allow_reuse_address = 1 55 | 56 | class BaseICAPRequestHandler(socketserver.StreamRequestHandler): 57 | """ 58 | ICAP request handler base class. 59 | You have to subclass it and provide methods for each service 60 | endpoint. Every endpoint MUST have an _OPTION method, 61 | and _REQMOD or a _RESPMOD method or both. 62 | """ 63 | # The version of the Python. 64 | _sys_version = "Python/" + sys.version.split()[0] 65 | 66 | # The version of the ICAP protocol. 67 | _protocol_version = "ICAP/1.0" 68 | 69 | # The server software version. 70 | _server_version = "ICAP/" + __version__ 71 | 72 | # Table mapping response codes to messages; entries have the 73 | # form {code: (shortmessage, longmessage)}. 74 | # See RFC 2616 and RFC 3507 75 | _responses = { 76 | 100: ('Continue', 'Request received, please continue'), 77 | 101: ('Switching Protocols', 'Switching to new protocol; obey Upgrade header'), 78 | 79 | 200: ('OK', 'Request fulfilled, document follows'), 80 | 201: ('Created', 'Document created, URL follows'), 81 | 202: ('Accepted', 'Request accepted, processing continues off-line'), 82 | 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), 83 | 204: ('No Content', 'Request fulfilled, nothing follows'), 84 | 205: ('Reset Content', 'Clear input form for further input.'), 85 | 206: ('Partial Content', 'Partial content follows.'), 86 | 87 | 300: ('Multiple Choices', 'Object has several resources -- see URI list'), 88 | 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), 89 | 302: ('Found', 'Object moved temporarily -- see URI list'), 90 | 303: ('See Other', 'Object moved -- see Method and URL list'), 91 | 304: ('Not Modified', 'Document has not changed since given time'), 92 | 305: ('Use Proxy', 'You must use proxy specified in Location to access this resource.'), 93 | 307: ('Temporary Redirect', 'Object moved temporarily -- see URI list'), 94 | 95 | 400: ('Bad Request', 'Bad request syntax or unsupported method'), 96 | 401: ('Unauthorized', 'No permission - see authorization schemes'), 97 | 402: ('Payment Required', 'No payment - see charging schemes'), 98 | 403: ('Forbidden', 'Request forbidden - authorization will not help'), 99 | 404: ('Not Found', 'Nothing matches the given URI'), 100 | 405: ('Method Not Allowed', 'Specified method is invalid for this resource.'), 101 | 406: ('Not Acceptable', 'URI not available in preferred format.'), 102 | 407: ('Proxy Authentication Required', 'You must authenticate with this proxy before proceeding.'), 103 | 408: ('Request Timeout', 'Request timed out; try again later.'), 104 | 409: ('Conflict', 'Request conflict.'), 105 | 410: ('Gone', 'URI no longer exists and has been permanently removed.'), 106 | 411: ('Length Required', 'Client must specify Content-Length.'), 107 | 412: ('Precondition Failed', 'Precondition in headers is false.'), 108 | 413: ('Request Entity Too Large', 'Entity is too large.'), 109 | 414: ('Request-URI Too Long', 'URI is too long.'), 110 | 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), 111 | 416: ('Requested Range Not Satisfiable', 'Cannot satisfy request range.'), 112 | 417: ('Expectation Failed', 'Expected condition could not be satisfied.'), 113 | 451: ('451 Unavailable For Legal Reasons', 'Resource access is denied for legal reasons, \ 114 | e.g. censorship or government-mandated blocked access.'), 115 | 116 | 500: ('Internal Server Error', 'Server got itself in trouble'), 117 | 501: ('Not Implemented', 'Server does not support this operation'), 118 | 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), 119 | 503: ('Service Unavailable', 'The server cannot process the request due to a high load'), 120 | 504: ('Gateway Timeout', 'The gateway server did not receive a timely response'), 121 | 505: ('Protocol Version Not Supported', 'Cannot fulfill request.'), 122 | } 123 | 124 | _weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 125 | 126 | _monthname = [None, 127 | 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 128 | 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 129 | 130 | def _read_status(self): 131 | """ 132 | Read a HTTP or ICAP status line from input stream. 133 | """ 134 | status = self.rfile.readline().strip().split(' ', 2) 135 | LOG.debug(status) 136 | return status 137 | 138 | def _read_request(self): 139 | """ Read a HTTP or ICAP request line from input stream. 140 | """ 141 | request = self.rfile.readline().strip().split(' ', 2) 142 | LOG.debug(request) 143 | return request 144 | 145 | def _read_headers(self): 146 | """ 147 | Read a sequence of header lines. 148 | """ 149 | 150 | headers = {} 151 | while True: 152 | line = self.rfile.readline().strip() 153 | if line == '': 154 | break 155 | k, v = line.split(':', 1) 156 | headers[k.lower()] = headers.get(k.lower(), []) + [v.strip()] 157 | LOG.debug(headers) 158 | return headers 159 | 160 | def read_chunk(self): 161 | """ 162 | Read a ICAP chunk 163 | Also handles the ieof chunk extension defined by the ICAP 164 | protocol by setting the ieof variable to True. It returns an 165 | empty line if the last chunk is read. Reading after the last 166 | chunks will return empty strings. 167 | """ 168 | 169 | # Don't try to read when there's no body 170 | if not self.has_body or self.eob: 171 | self.eob = True 172 | return '' 173 | 174 | line = self.rfile.readline() 175 | if line == '': 176 | # Connection was probably closed 177 | self.eob = True 178 | return '' 179 | 180 | line = line.strip() 181 | 182 | arr = line.split(';', 1) 183 | 184 | chunk_size = 0 185 | try: 186 | chunk_size = int(arr[0], 16) 187 | except ValueError: 188 | raise ICAPError(400, 'Protocol error, could not read chunk') 189 | 190 | # Look for ieof chunk extension 191 | if len(arr) > 1 and arr[1].strip() == 'ieof': 192 | self.ieof = True 193 | 194 | value = self.rfile.read(chunk_size) 195 | self.rfile.read(2) 196 | 197 | if value == '': 198 | self.eob = True 199 | 200 | return value 201 | 202 | def send_chunk(self, data): 203 | """ 204 | Send a chunk of data 205 | When finished writing, an empty chunk with data='' must be written. 206 | """ 207 | 208 | l = hex(len(data))[2:] 209 | self.wfile.write(l + '\r\n' + data + '\r\n') 210 | 211 | def cont(self): 212 | """ 213 | Send a 100 continue reply 214 | Useful when the client sends a preview request, and we have 215 | to read the entire message body. After this command, read_chunk 216 | can safely be called again. 217 | """ 218 | 219 | if self.ieof: 220 | raise ICAPError(500, 'Tried to continue on ieof condition') 221 | 222 | self.wfile.write(self._protocol_version + ' ' + '100 Continue\r\n\r\n') 223 | 224 | self.eob = False 225 | 226 | def set_enc_status(self, status): 227 | """ 228 | Set encapsulated status in response 229 | ICAP responses can only contain one encapsulated header section. 230 | Such section is either an encapsulated HTTP request, or a 231 | response. This method can be called to set encapsulated HTTP 232 | response's status line. 233 | """ 234 | 235 | self.enc_status = status 236 | msg = 'Encapsulated status: %s' % status 237 | LOG.debug(msg) 238 | 239 | def set_enc_request(self, request): 240 | """ 241 | Set encapsulated request line in response. 242 | ICAP responses can only contain one encapsulated header section. 243 | Such section is either an encapsulated HTTP request, or a 244 | response. This method can be called to set encapsulated HTTP 245 | request's request line. 246 | """ 247 | 248 | self.enc_request = request 249 | msg = 'Encapsulated request: %s' % request 250 | LOG.debug(msg) 251 | 252 | def set_enc_header(self, header, value): 253 | """ 254 | Set an encapsulated header to the given value. 255 | Multiple sets will cause the header to be sent multiple times. 256 | """ 257 | if not isinstance(header, str): 258 | raise ICAPError(500, 'Header must be a string, not %s.' % type(header)) 259 | 260 | if not isinstance(value, str): 261 | raise ICAPError(500, 'Header value must be a string, not %s.' % type(value)) 262 | 263 | self.enc_headers[header] = self.enc_headers.get(header, []) + [value] 264 | msg = 'Encapsulated header: %s : %s' % (header, value) 265 | LOG.debug(msg) 266 | 267 | def update_enc_header(self, header, value): 268 | """ 269 | Update an encapsulated header to the given value 270 | """ 271 | 272 | if not isinstance(header, str): 273 | raise ICAPError(500, 'Header must be a string, not %s.' % type(header)) 274 | 275 | if not isinstance(value, str): 276 | raise ICAPError(500, 'Header value must be a string, not %s.' % type(value)) 277 | 278 | for k in self.enc_headers: 279 | if k.lower() == header.lower(): 280 | self.enc_headers[k] = [value] 281 | msg = 'Encapsulated header %s updated to the new value %s' % (header, value) 282 | LOG.debug(msg) 283 | return 284 | 285 | msg = 'Encapsulated header %s not found' % header 286 | LOG.error(msg) 287 | 288 | def delete_enc_header(self, header): 289 | """ 290 | Delete an encapsulated header. 291 | """ 292 | 293 | if not isinstance(header, str): 294 | raise ICAPError(500, 'Header must be a string, not %s.' % type(header)) 295 | 296 | for k in self.enc_headers: 297 | if k.lower() == header.lower(): 298 | del self.enc_headers[k] 299 | msg = 'Encapsulated header %s deleted' % header 300 | LOG.debug(msg) 301 | return 302 | 303 | msg = 'Encapsulated header %s not found' % header 304 | LOG.error(msg) 305 | 306 | def set_icap_response(self, code, message=None): 307 | """ 308 | Sets the ICAP response's status line and response code. 309 | """ 310 | 311 | try: 312 | short, long = self._responses[code] 313 | except KeyError: 314 | short, long = '???', '???' 315 | if message is None: 316 | message = short 317 | 318 | if not isinstance(message, str): 319 | raise ICAPError(500) 320 | 321 | self.icap_response = self._protocol_version + ' ' + str(code) + ' ' + message 322 | self.icap_response_code = code 323 | msg = 'ICAP response: %s' % self.icap_response 324 | LOG.debug(msg) 325 | 326 | def set_icap_header(self, header, value): 327 | """ 328 | Set an ICAP header to the given value 329 | Multiple sets will cause the header to be sent multiple times. 330 | """ 331 | 332 | if not isinstance(header, str): 333 | raise ICAPError(500, 'Header must be a string, not %s.' % type(header)) 334 | 335 | if not isinstance(value, str): 336 | raise ICAPError(500, 'Header value must be a string, not %s.' % type(value)) 337 | 338 | self.icap_headers[header] = self.icap_headers.get(header, []) + [value] 339 | msg = 'ICAP header: %s : %s' % (header, value) 340 | LOG.debug(msg) 341 | 342 | def update_icap_header(self, header, value): 343 | """ 344 | Update an ICAP header to the given value 345 | """ 346 | 347 | if not isinstance(header, str): 348 | raise ICAPError(500, 'Header must be a string, not %s.' % type(header)) 349 | 350 | if not isinstance(value, str): 351 | raise ICAPError(500, 'Header value must be a string, not %s.' % type(value)) 352 | 353 | for k in self.icap_headers: 354 | if k.lower() == header.lower(): 355 | self.icap_headers[k] = [value] 356 | msg = 'ICAP header %s updated to the new value %s' % (header, value) 357 | LOG.debug(msg) 358 | return 359 | 360 | msg = 'ICAP header %s not found' % header 361 | LOG.error(msg) 362 | 363 | def delete_icap_header(self, header): 364 | """ 365 | Delete an existing ICAP header. 366 | """ 367 | 368 | if not isinstance(header, str): 369 | raise ICAPError(500, 'Header must be a string, not %s.' % type(header)) 370 | 371 | for k in self.icap_headers: 372 | if k.lower() == header.lower(): 373 | del self.icap_headers[k] 374 | msg = 'ICAP header %s deleted' % header 375 | LOG.debug(msg) 376 | return 377 | 378 | msg = 'ICAP header %s not found' % header 379 | LOG.error(msg) 380 | 381 | def send_headers(self, has_body=False): 382 | """ 383 | Send ICAP and encapsulated headers. 384 | Assembles the Encapsulated header, so it's need the information 385 | of wether an encapsulated message body is present. 386 | """ 387 | 388 | enc_header = None 389 | enc_req_stat = '' 390 | if self.enc_request != None: 391 | enc_header = 'req-hdr=0' 392 | enc_body = 'req-body=' 393 | enc_req_stat = self.enc_request + '\r\n' 394 | elif self.enc_status != None: 395 | enc_header = 'res-hdr=0' 396 | enc_body = 'res-body=' 397 | enc_req_stat = self.enc_status + '\r\n' 398 | 399 | if not has_body: 400 | enc_body = 'null-body=' 401 | 402 | if not self.icap_headers.has_key('ISTag'): 403 | self.set_icap_header('ISTag', ''.join([random.choice(string.ascii_uppercase \ 404 | + string.digits) for x in xrange(32)])) 405 | 406 | if not self.icap_headers.has_key('Date'): 407 | self.set_icap_header('Date', self.date_time_string()) 408 | 409 | if not self.icap_headers.has_key('Server'): 410 | self.set_icap_header('Server', self.version_string()) 411 | 412 | enc_header_str = enc_req_stat 413 | for k in self.enc_headers: 414 | for v in self.enc_headers[k]: 415 | enc_header_str += k + ': ' + v + '\r\n' 416 | if enc_header_str != '': 417 | enc_header_str += '\r\n' 418 | 419 | body_offset = len(enc_header_str) 420 | 421 | if enc_header: 422 | enc = enc_header + ', ' + enc_body + str(body_offset) 423 | self.set_icap_header('Encapsulated', enc) 424 | 425 | icap_header_str = '' 426 | for k in self.icap_headers: 427 | for v in self.icap_headers[k]: 428 | icap_header_str += k + ': ' + v + '\r\n' 429 | if k.lower() == 'connection' and v.lower() == 'close': 430 | self.close_connection = True 431 | if k.lower() == 'connection' and v.lower() == 'keep-alive': 432 | self.close_connection = False 433 | icap_header_str += '\r\n' 434 | 435 | self.wfile.write(self.icap_response + '\r\n' + icap_header_str + enc_header_str) 436 | 437 | def parse_request(self): 438 | """ 439 | Parse a request (internal). 440 | The request should be stored in self.raw_requestline; the results 441 | are in self.command, self.request_uri, self.request_version and self.headers. 442 | Return True for success, False for failure; on failure, an error is sent back. 443 | """ 444 | 445 | self.command = None 446 | self.request_version = version = self._protocol_version 447 | 448 | # Default behavior is to leave connection open 449 | self.close_connection = False 450 | 451 | requestline = self.raw_requestline.rstrip('\r\n') 452 | self.requestline = requestline 453 | 454 | words = requestline.split() 455 | if len(words) != 3: 456 | raise ICAPError(400, "Bad request syntax (%r)" % requestline) 457 | 458 | command, request_uri, version = words 459 | 460 | if version[:5] != 'ICAP/': 461 | raise ICAPError(400, "Bad request protocol, only accepting ICAP") 462 | 463 | if command not in ['OPTIONS', 'REQMOD', 'RESPMOD']: 464 | raise ICAPError(501, "command %r is not implemented" % command) 465 | 466 | try: 467 | base_version_number = version.split('/', 1)[1] 468 | version_number = base_version_number.split(".") 469 | # RFC 2145 section 3.1 says there can be only one "." and 470 | # - major and minor numbers MUST be treated as 471 | # separate integers; 472 | # - ICAP/2.4 is a lower version than ICAP/2.13, which in 473 | # turn is lower than ICAP/12.3; 474 | # - Leading zeros MUST be ignored by recipients. 475 | if len(version_number) != 2: 476 | raise ValueError 477 | version_number = int(version_number[0]), int(version_number[1]) 478 | except (ValueError, IndexError): 479 | raise ICAPError(400, "Bad request version (%r)" % version) 480 | 481 | if version_number != (1, 0): 482 | raise ICAPError(505, "Invalid ICAP Version (%s)" % base_version_number) 483 | 484 | self.command, self.request_uri, self.request_version = command, request_uri, version 485 | 486 | # Examine the headers and look for a Connection directive 487 | self.headers = self._read_headers() 488 | 489 | conntype = self.headers.get('connection', [''])[0] 490 | if conntype.lower() == 'close': 491 | self.close_connection = True 492 | 493 | self.encapsulated = {} 494 | if self.command in ['RESPMOD', 'REQMOD']: 495 | _encapsulated = self.headers.get('encapsulated', [''])[0].split(',') 496 | if not _encapsulated: 497 | raise ICAPError(500, "Encapsulated is empty.") 498 | for enc in _encapsulated: 499 | try: 500 | k, v = enc.strip().split('=') 501 | except: 502 | raise ICAPError(500, "Encapsulated is malformed.") 503 | self.encapsulated[k] = int(v) 504 | 505 | self.preview = self.headers.get('preview', [None])[0] 506 | self.allow = [x.strip() for x in self.headers.get('allow', [''])[0].split(',')] 507 | 508 | if self.command == 'REQMOD': 509 | if self.encapsulated.has_key('req-hdr'): 510 | self.enc_req = self._read_request() 511 | self.enc_req_headers = self._read_headers() 512 | if self.encapsulated.has_key('req-body'): 513 | self.has_body = True 514 | elif self.command == 'RESPMOD': 515 | if self.encapsulated.has_key('req-hdr'): 516 | self.enc_req = self._read_request() 517 | self.enc_req_headers = self._read_headers() 518 | if self.encapsulated.has_key('res-hdr'): 519 | self.enc_res_status = self._read_status() 520 | self.enc_res_headers = self._read_headers() 521 | if self.encapsulated.has_key('res-body'): 522 | self.has_body = True 523 | # Else: OPTIONS. No encapsulation. 524 | 525 | # Parse service name 526 | self.servicename = urlparse.urlparse(self.request_uri)[2].strip('/') 527 | 528 | def handle(self): 529 | """ 530 | Handles a connection. 531 | Connection: keep-alive is the default behavior. 532 | """ 533 | 534 | self.close_connection = False 535 | while not self.close_connection: 536 | self.handle_one_request() 537 | 538 | def handle_one_request(self): 539 | """ 540 | Handle a single ICAP request. 541 | """ 542 | 543 | # Initialize handler state 544 | self.enc_req = None 545 | self.enc_req_headers = {} 546 | self.enc_res_status = None 547 | self.enc_res_headers = {} 548 | self.has_body = False 549 | self.servicename = None 550 | self.encapsulated = {} 551 | self.ieof = False 552 | self.eob = False 553 | self.methos = None 554 | self.preview = None 555 | self.allow = set() 556 | 557 | self.icap_headers = {} 558 | self.enc_headers = {} 559 | self.enc_status = None 560 | self.enc_request = None 561 | 562 | self.icap_response_code = None 563 | 564 | try: 565 | self.raw_requestline = self.rfile.readline(65537) 566 | 567 | if not self.raw_requestline: 568 | self.close_connection = True 569 | return 570 | 571 | self.parse_request() 572 | 573 | mname = self.servicename + '_' + self.command 574 | if not hasattr(self, mname): 575 | raise ICAPError(404) 576 | 577 | method = getattr(self, mname) 578 | if not callable(method): 579 | raise ICAPError(404) 580 | 581 | method() 582 | self.wfile.flush() 583 | msg = '[%s] "%s" %d' % (self.client_address[0], self.requestline, self.icap_response_code) 584 | LOG.info(msg) 585 | except socket.timeout as e: 586 | msg = 'Request timed out: %r', e 587 | LOG.error(msg) 588 | self.close_connection = True 589 | except ICAPError as e: 590 | self.send_error(e.code, e.message) 591 | except: 592 | self.send_error(500) 593 | 594 | def send_error(self, code, message=None): 595 | """ 596 | Send and log an error reply. 597 | Arguments are the error code, and a detailed message. 598 | The detailed message defaults to the short entry matching the 599 | response code. 600 | This sends an error response (so it must be called before any 601 | output has been generated), logs the error, and finally sends 602 | a piece of HTML explaining the error to the user. 603 | """ 604 | 605 | try: 606 | short, long = self._responses[code] 607 | except KeyError: 608 | short, long = '???', '???' 609 | if message is None: 610 | message = short 611 | 612 | if not isinstance(message, str): 613 | raise ICAPError(500, 'Message must be a string.') 614 | 615 | msg = '[Sending Error] Code: %d, Message: %s' % (code, message) 616 | LOG.error(msg) 617 | 618 | # No encapsulation 619 | self.enc_req = None 620 | self.enc_res_stats = None 621 | 622 | self.set_icap_response(code, message) 623 | self.set_icap_header('Connection', 'close') 624 | self.send_headers() 625 | 626 | def send_enc_error(self, code, message=None, body='', contenttype='text/html'): 627 | """ 628 | Send an encapsulated error reply. 629 | Arguments are the error code, and a detailed message. 630 | The detailed message defaults to the short entry matching the 631 | response code. 632 | This sends an encapsulated error response (so it must be called 633 | before any output has been generated), logs the error, and 634 | finally sends a piece of HTML explaining the error to the user. 635 | """ 636 | 637 | try: 638 | short, long = self._responses[code] 639 | except KeyError: 640 | short, long = '???', '???' 641 | if message is None: 642 | message = short 643 | 644 | if not isinstance(message, str): 645 | raise ICAPError(500, 'Message must be a string.') 646 | 647 | # No encapsulation 648 | self.enc_req = None 649 | 650 | self.set_icap_response(200) 651 | self.set_enc_status('HTTP/1.1 %s %s' % (str(code), message)) 652 | self.set_enc_header('Content-Type', contenttype) 653 | self.set_enc_header('Content-Length', str(len(body))) 654 | self.send_headers(has_body=True) 655 | if len(body) > 0: 656 | self.send_chunk(body) 657 | self.send_chunk('') 658 | 659 | def version_string(self): 660 | """ 661 | Return the server software version string. 662 | """ 663 | 664 | return self._server_version + ' ' + self._sys_version 665 | 666 | def date_time_string(self, timestamp=None): 667 | """ 668 | Return the current date and time formatted for a message header. 669 | """ 670 | 671 | if timestamp is None: 672 | timestamp = time.time() 673 | year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) 674 | dts = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (self._weekdayname[wd], 675 | day, self._monthname[month], year, 676 | hh, mm, ss) 677 | return dts 678 | 679 | def address_string(self): 680 | """ 681 | Return the client address formatted for logging. 682 | This version looks up the full hostname using gethostbyaddr(), 683 | and tries to find a name that contains at least one dot. 684 | """ 685 | 686 | host, port = self.client_address[:2] 687 | return socket.getfqdn(host) 688 | 689 | def no_adaptation_required(self): 690 | """ 691 | Tells the client to leave the message unaltered 692 | If the client allows 204, or this is a preview request than 693 | a 204 preview response is sent. Otherwise a copy of the message 694 | is returned to the client. 695 | """ 696 | 697 | if '204' in self.allow or self.preview != None: 698 | # We MUST read everything the client sent us 699 | if self.has_body: 700 | while True: 701 | if self.read_chunk() == '': 702 | break 703 | self.set_icap_response(204) 704 | self.send_headers() 705 | else: 706 | # We have to copy everything, 707 | # but it's sure there's no preview 708 | self.set_icap_response(200) 709 | 710 | self.set_enc_status(' '.join(self.enc_res_status)) 711 | for h in self.enc_res_headers: 712 | for v in self.enc_res_headers[h]: 713 | self.set_enc_header(h, v) 714 | 715 | if not self.has_body: 716 | self.send_headers(False) 717 | msg = '[%s] "%s" %d' % (self.client_address[0], self.requestline, 200) 718 | LOG.info(msg) 719 | return 720 | 721 | self.send_headers(True) 722 | while True: 723 | chunk = self.read_chunk() 724 | self.send_chunk(chunk) 725 | if chunk == '': 726 | break 727 | 728 | def main(HandlerClass = BaseICAPRequestHandler, ServerClass = ICAPServer): 729 | """ 730 | This runs an ICAP server on port 13440 (or the first command line argument). 731 | """ 732 | 733 | def example_OPTIONS(self): 734 | self.set_icap_response(200) 735 | self.set_icap_header('Methods', 'RESPMOD, REQMOD') 736 | self.set_icap_header('Service', 'ICAP Server' + ' ' + self._server_version) 737 | self.set_icap_header('Options-TTL', '3600') 738 | self.send_headers(False) 739 | 740 | def example_REQMOD(self): 741 | self.no_adaptation_required() 742 | 743 | def example_RESPMOD(self): 744 | self.no_adaptation_required() 745 | 746 | HandlerClass.example_OPTIONS = example_OPTIONS 747 | HandlerClass.example_REQMOD = example_REQMOD 748 | HandlerClass.example_RESPMOD = example_RESPMOD 749 | 750 | if sys.argv[1:]: 751 | port = int(sys.argv[1]) 752 | else: 753 | port = 13440 754 | server_address = ('', port) 755 | 756 | icap_server = ServerClass(server_address, HandlerClass) 757 | 758 | sa = icap_server.socket.getsockname() 759 | print("Serving ICAP on", sa[0], "port", sa[1]) 760 | icap_server.serve_forever() 761 | 762 | 763 | if __name__ == '__main__': 764 | main() 765 | 766 | # ===================================================================================================================== # 767 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | yara==1.7.7 2 | -------------------------------------------------------------------------------- /rules/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RamadhanAmizudin/python-icap-yara/7d3c3c5f4908e212e837540e26133909c1ff8183/rules/.gitkeep -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | import os, datetime, string, sys 4 | import time, threading 5 | import logging, logging.handlers 6 | import socket, socketserver 7 | from yarascanner import * 8 | from icapserver import * 9 | 10 | def dump(obj): 11 | for attr in dir(obj): 12 | print("obj.%s = %s") % (attr, getattr(obj, attr)) 13 | 14 | yara = YaraScanner() 15 | 16 | class ThreadingSimpleServer(socketserver.ThreadingMixIn, ICAPServer): 17 | pass 18 | 19 | class YaraICAPHandler(BaseICAPRequestHandler): 20 | 21 | def yara_OPTIONS(self): 22 | self.set_icap_response(200) 23 | self.set_icap_header('Methods', 'RESPMOD') 24 | self.set_icap_header('Service', 'ICAP Server' + ' ' + self._server_version) 25 | self.set_icap_header('Options-TTL', '3600') 26 | self.set_icap_header('Preview', '0') 27 | self.send_headers(False) 28 | 29 | def yara_REQMOD(self): 30 | self.no_adaptation_required() 31 | 32 | def yara_RESPMOD(self): 33 | self.set_icap_response(200) 34 | self.set_enc_status(' '.join(self.enc_res_status)) 35 | for h in self.enc_res_headers: 36 | for v in self.enc_res_headers[h]: 37 | self.set_enc_header(h, v) 38 | 39 | if not self.has_body: 40 | self.send_headers(False) 41 | return 42 | 43 | self.send_headers(True) 44 | content = '' 45 | while True: 46 | chunk = self.read_chunk() 47 | self.send_chunk(chunk) 48 | content += chunk 49 | if chunk == '': 50 | break 51 | 52 | yara.Scan(content, self.enc_req, self.enc_req_headers, self.enc_res_headers, self.headers['x-client-ip']) 53 | 54 | try: 55 | print('Use Control-C to exit') 56 | server = ThreadingSimpleServer(('127.0.0.1', 1344), YaraICAPHandler) 57 | server.serve_forever() 58 | except KeyboardInterrupt: 59 | server.shutdown() 60 | server.server_close() 61 | print("Finished") 62 | -------------------------------------------------------------------------------- /test/test_base_icap_request_handler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RamadhanAmizudin/python-icap-yara/7d3c3c5f4908e212e837540e26133909c1ff8183/test/test_base_icap_request_handler.py -------------------------------------------------------------------------------- /test/test_icap_error.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from icapserver import ICAPError 3 | 4 | class TestICAPError(unittest.TestCase): 5 | 6 | def test_ICAPError_init_default_messages(self): 7 | sut = ICAPError() 8 | self.assertEqual("Internal Server Error", sut.message) 9 | 10 | def test_ICAPError_init_default_code(self): 11 | sut = ICAPError() 12 | self.assertEqual(500, sut.code) 13 | 14 | def test_ICAPError_init_non_default_message(self): 15 | sut = ICAPError(400) 16 | self.assertEqual("Bad Request", sut.message) 17 | 18 | def test_ICAPError_init_non_default_code(self): 19 | sut = ICAPError(404) 20 | self.assertEqual(404, sut.code) 21 | 22 | def test_ICAPError_init_invalid_http_error_code(self): 23 | sut = ICAPError(999) 24 | self.assertEqual(999, sut.code) 25 | 26 | def test_ICAPError_init_invalid_http_error_code_message(self): 27 | sut = ICAPError(999) 28 | self.assertEqual("Unknown error code", sut.message) 29 | 30 | def test_ICAPError_init_invalid_http_error_code_negative(self): 31 | sut = ICAPError(-8) 32 | self.assertEqual(-8, sut.code) 33 | 34 | def test_ICAPError_init_invalid_http_error_code_negative_message(self): 35 | sut = ICAPError(-8) 36 | self.assertEqual("Unknown error code", sut.message) 37 | 38 | -------------------------------------------------------------------------------- /yarascanner.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | import os, sys, configparser, yara 4 | import logging, logging.handlers 5 | import hashlib, json, binascii 6 | 7 | class YaraScanner(): 8 | def __init__(self): 9 | self.maindir = os.path.dirname(os.path.abspath(__file__)) 10 | self.log_path = os.path.join(self.maindir, 'yaraicap.log') 11 | self.config_path = os.path.join(self.maindir, 'config.ini') 12 | self.config = configparser.ConfigParser() 13 | self.config.read(self.config_path) 14 | self.initLogging() 15 | self.initYara() 16 | 17 | def initLogging(self): 18 | logging.captureWarnings(True) 19 | self.logger = logging.getLogger('YaraICAP') 20 | self.logger.setLevel(logging.DEBUG) 21 | 22 | # log file header 23 | fileHandler = logging.handlers.RotatingFileHandler(self.log_path, maxBytes=2000000, backupCount=5) 24 | formatter = logging.Formatter('[%(asctime)s][%(name)s][%(levelname)s] %(message)s') 25 | fileHandler.setFormatter(formatter) 26 | self.logger.addHandler(fileHandler) 27 | 28 | # log console handler 29 | consoleHandler = logging.StreamHandler() 30 | consoleHandler.setFormatter(formatter) 31 | self.logger.addHandler(consoleHandler) 32 | 33 | def initYara(self): 34 | content_rules = self.config.get('config', 'content_rules') 35 | url_rules = self.config.get('config', 'url_rules') 36 | if not os.path.isfile(content_rules): 37 | self.logger.error('Content YARA rules not found') 38 | exit() 39 | 40 | if not os.path.isfile(url_rules): 41 | self.logger.error('URL YARA rules not found') 42 | exit() 43 | 44 | self.cyara = yara.compile(content_rules) 45 | self.uyara = yara.compile(url_rules) 46 | 47 | def SaveContent(self, chash, content, request_header, response_header, sig): 48 | writepath = "{path}/{hash}.json".format(path=self.config.get('config', 'content_dir'), hash=chash) 49 | if not os.path.isfile(writepath): 50 | data = {} 51 | data['rules'] = ','.join(str(x) for x in sig).split(',') 52 | data['request_header'] = request_header 53 | data['response_header'] = response_header 54 | data['content'] = binascii.hexlify(content) 55 | f = open(writepath, 'w') 56 | f.write(json.dumps(data, indent=4, sort_keys=True)) 57 | f.close() 58 | 59 | def Scan(self, content, request, request_header, response_header, clientip): 60 | if self.config.getboolean('config', 'scan_url'): 61 | self.__ScanUrl(content, request, request_header, response_header, clientip) 62 | 63 | self.__ScanContent(content, request, request_header, response_header, clientip) 64 | 65 | def __ScanUrl(self, content, request, request_header, response_header, clientip): 66 | url = str(request[1]) 67 | murl = self.uyara.match(data=url) 68 | murl_total = len(murl) 69 | if murl_total > 0: 70 | contentmd5 = hashlib.md5(url).hexdigest() 71 | self.SaveContent(contentmd5, content, request_header, response_header, murl) 72 | self.logger.info("[URL][{hash}][{rules}] {clientip} - {url}".format(hash=contentmd5, clientip=str(clientip[0]), url=url, rules=','.join(str(x) for x in murl))); 73 | 74 | def __ScanContent(self, content, request, request_header, response_header, clientip): 75 | url = str(request[1]) 76 | mcontent = self.cyara.match(data=content) 77 | mcontent_total = len(mcontent) 78 | if mcontent_total > 0: 79 | contentmd5 = hashlib.md5(content).hexdigest() 80 | self.SaveContent(contentmd5, content, request_header, response_header, mcontent) 81 | self.logger.info("[Content][{hash}][{rules}] {clientip} - {url}".format(hash=contentmd5, clientip=str(clientip[0]), url=url, rules=','.join(str(x) for x in mcontent))); 82 | 83 | 84 | if __name__ == '__main__': 85 | pass 86 | --------------------------------------------------------------------------------