├── .gitignore ├── .travis.yml ├── LICENSE.md ├── README.rst ├── cmreslogging ├── __init__.py ├── handlers.py └── serializers.py ├── requirements ├── requirements_py27.txt └── requirements_py36.txt ├── setup.cfg ├── setup.py ├── sonar-project.properties ├── tests ├── __init__.py ├── test_cmreshandler.py └── test_cmresserializer.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .idea 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | pylint.txt 29 | coverage.xml 30 | pylint_* 31 | .pylint.d/ 32 | .sonar/ 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *,cover 53 | .hypothesis/ 54 | htmlcov/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | 64 | # Flask instance folder 65 | instance/ 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # IPython Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.7" 5 | - "3.6" 6 | 7 | addons: 8 | apt: 9 | packages: 10 | - oracle-java8-installer 11 | 12 | before_install: 13 | - sudo update-java-alternatives -s java-8-oracle 14 | - export JAVA_HOME=/usr/lib/jvm/java-8-oracle/jre 15 | - java -version 16 | 17 | install: 18 | - pip install tox-travis 19 | - pip install codecov 20 | - mkdir /tmp/elasticsearch 21 | - wget -O - https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-5.5.0.tar.gz | tar xz --directory=/tmp/elasticsearch --strip-components=1 22 | - /tmp/elasticsearch/bin/elasticsearch -d -E script.inline=true -E path.repo=/tmp -E repositories.url.allowed_urls='http://*' -E node.attr.testattr=test 23 | 24 | script: tox 25 | 26 | after_success: 27 | - codecov 28 | 29 | notifications: 30 | email: false -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2016 Carlos Manzanedo Rueda 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | =============== 3 | CMRESHandler.py 4 | =============== 5 | 6 | | |license| |versions| |status| |downloads| 7 | | |ci_status| |codecov| |gitter| 8 | 9 | 10 | **Note: Maintainers needed : Those that committed in the past code to this repo or are presenting new PRs and have experience and interest on helping to maintain repos & python libraries (code quality, testing, integration, etc). If you are intereted on getting our PR's through and helping others to contribute to the library, please get in touch.** 11 | 12 | 13 | Python Elasticsearch Log handler 14 | ******************************** 15 | 16 | This library provides an Elasticsearch logging appender compatible with the 17 | python standard `logging `_ library. 18 | 19 | The code source is in github at `https://github.com/cmanaha/python-elasticsearch-logger 20 | `_ 21 | 22 | 23 | Installation 24 | ============ 25 | Install using pip:: 26 | 27 | pip install CMRESHandler 28 | 29 | Requirements Python 2 30 | ===================== 31 | This library requires the following dependencies 32 | - elasticsearch 33 | - requests 34 | - enum 35 | 36 | 37 | Requirements Python 3 38 | ===================== 39 | This library requires the following dependencies 40 | - elasticsearch 41 | - requests 42 | 43 | Additional requirements for Kerberos support 44 | ============================================ 45 | Additionally, the package support optionally kerberos authentication by adding the following dependecy 46 | - requests-kerberos 47 | 48 | Additional requirements for AWS IAM user authentication (request signing) 49 | ========================================================================= 50 | Additionally, the package support optionally AWS IAM user authentication by adding the following dependecy 51 | - requests-aws4auth 52 | 53 | Using the handler in your program 54 | ================================== 55 | To initialise and create the handler, just add the handler to your logger as follow :: 56 | 57 | from cmreslogging.handlers import CMRESHandler 58 | handler = CMRESHandler(hosts=[{'host': 'localhost', 'port': 9200}], 59 | auth_type=CMRESHandler.AuthType.NO_AUTH, 60 | es_index_name="my_python_index") 61 | log = logging.getLogger("PythonTest") 62 | log.setLevel(logging.INFO) 63 | log.addHandler(handler) 64 | 65 | You can add fields upon initialisation, providing more data of the execution context :: 66 | 67 | from cmreslogging.handlers import CMRESHandler 68 | handler = CMRESHandler(hosts=[{'host': 'localhost', 'port': 9200}], 69 | auth_type=CMRESHandler.AuthType.NO_AUTH, 70 | es_index_name="my_python_index", 71 | es_additional_fields={'App': 'MyAppName', 'Environment': 'Dev'}) 72 | log = logging.getLogger("PythonTest") 73 | log.setLevel(logging.INFO) 74 | log.addHandler(handler) 75 | 76 | This additional fields will be applied to all logging fields and recorded in elasticsearch 77 | 78 | To log, use the regular commands from the logging library :: 79 | 80 | log.info("This is an info statement that will be logged into elasticsearch") 81 | 82 | Your code can also dump additional extra fields on a per log basis that can be used to instrument 83 | operations. For example, when reading information from a database you could do something like:: 84 | 85 | start_time = time.time() 86 | database_operation() 87 | db_delta = time.time() - start_time 88 | log.debug("DB operation took %.3f seconds" % db_delta, extra={'db_execution_time': db_delta}) 89 | 90 | The code above executes the DB operation, measures the time it took and logs an entry that contains 91 | in the message the time the operation took as string and for convenience, it creates another field 92 | called db_execution_time with a float that can be used to plot the time this operations are taking using 93 | Kibana on top of elasticsearch 94 | 95 | Initialisation parameters 96 | ========================= 97 | The constructors takes the following parameters: 98 | - hosts: The list of hosts that elasticsearch clients will connect, multiple hosts are allowed, for example :: 99 | 100 | [{'host':'host1','port':9200}, {'host':'host2','port':9200}] 101 | 102 | 103 | - auth_type: The authentication currently support CMRESHandler.AuthType = NO_AUTH, BASIC_AUTH, KERBEROS_AUTH 104 | - auth_details: When CMRESHandler.AuthType.BASIC_AUTH is used this argument must contain a tuple of string with the user and password that will be used to authenticate against the Elasticsearch servers, for example ('User','Password') 105 | - aws_access_key: When ``CMRESHandler.AuthType.AWS_SIGNED_AUTH`` is used this argument must contain the AWS key id of the the AWS IAM user 106 | - aws_secret_key: When ``CMRESHandler.AuthType.AWS_SIGNED_AUTH`` is used this argument must contain the AWS secret key of the the AWS IAM user 107 | - aws_region: When ``CMRESHandler.AuthType.AWS_SIGNED_AUTH`` is used this argument must contain the AWS region of the the AWS Elasticsearch servers, for example ``'us-east'`` 108 | - use_ssl: A boolean that defines if the communications should use SSL encrypted communication 109 | - verify_ssl: A boolean that defines if the SSL certificates are validated or not 110 | - buffer_size: An int, Once this size is reached on the internal buffer results are flushed into ES 111 | - flush_frequency_in_sec: A float representing how often and when the buffer will be flushed 112 | - es_index_name: A string with the prefix of the elasticsearch index that will be created. Note a date with 113 | YYYY.MM.dd, ``python_logger`` used by default 114 | - index_name_frequency: The frequency to use as part of the index naming. Currently supports 115 | CMRESHandler.IndexNameFrequency.DAILY, CMRESHandler.IndexNameFrequency.WEEKLY, 116 | CMRESHandler.IndexNameFrequency.MONTHLY, CMRESHandler.IndexNameFrequency.YEARLY by default the daily rotation 117 | is used 118 | - es_doc_type: A string with the name of the document type that will be used ``python_log`` used by default 119 | - es_additional_fields: A dictionary with all the additional fields that you would like to add to the logs 120 | 121 | Django Integration 122 | ================== 123 | It is also very easy to integrate the handler to `Django `_ And what is even 124 | better, at DEBUG level django logs information such as how long it takes for DB connections to return so 125 | they can be plotted on Kibana, or the SQL statements that Django executed. :: 126 | 127 | from cmreslogging.handlers import CMRESHandler 128 | LOGGING = { 129 | 'version': 1, 130 | 'disable_existing_loggers': False, 131 | 'handlers': { 132 | 'file': { 133 | 'level': 'DEBUG', 134 | 'class': 'logging.handlers.RotatingFileHandler', 135 | 'filename': './debug.log', 136 | 'maxBytes': 102400, 137 | 'backupCount': 5, 138 | }, 139 | 'elasticsearch': { 140 | 'level': 'DEBUG', 141 | 'class': 'cmreslogging.handlers.CMRESHandler', 142 | 'hosts': [{'host': 'localhost', 'port': 9200}], 143 | 'es_index_name': 'my_python_app', 144 | 'es_additional_fields': {'App': 'Test', 'Environment': 'Dev'}, 145 | 'auth_type': CMRESHandler.AuthType.NO_AUTH, 146 | 'use_ssl': False, 147 | }, 148 | }, 149 | 'loggers': { 150 | 'django': { 151 | 'handlers': ['file','elasticsearch'], 152 | 'level': 'DEBUG', 153 | 'propagate': True, 154 | }, 155 | }, 156 | } 157 | 158 | There is more information about how Django logging works in the 159 | `Django documentation `_ 160 | 161 | 162 | Building the sources & Testing 163 | ------------------------------ 164 | To create the package follow the standard python setup.py to compile. 165 | To test, just execute the python tests within the test folder 166 | 167 | Why using an appender rather than logstash or beats 168 | --------------------------------------------------- 169 | In some cases is quite useful to provide all the information available within the LogRecords as it contains 170 | things such as exception information, the method, file, log line where the log was generated. 171 | 172 | If you are interested on understanding more about the differences between the agent vs handler 173 | approach, I'd suggest reading `this conversation thread `_ 174 | 175 | The same functionality can be implemented in many other different ways. For example, consider the integration 176 | using `SysLogHandler `_ and 177 | `logstash syslog plugin `_. 178 | 179 | 180 | Contributing back 181 | ----------------- 182 | Feel free to use this as is or even better, feel free to fork and send your pull requests over. 183 | 184 | 185 | .. |downloads| image:: https://img.shields.io/pypi/dd/CMRESHandler.svg 186 | :target: https://pypi.python.org/pypi/CMRESHandler 187 | :alt: Daily PyPI downloads 188 | .. |versions| image:: https://img.shields.io/pypi/pyversions/CMRESHandler.svg 189 | :target: https://pypi.python.org/pypi/CMRESHandler 190 | :alt: Python versions supported 191 | .. |status| image:: https://img.shields.io/pypi/status/CMRESHandler.svg 192 | :target: https://pypi.python.org/pypi/CMRESHandler 193 | :alt: Package stability 194 | .. |license| image:: https://img.shields.io/pypi/l/CMRESHandler.svg 195 | :target: https://pypi.python.org/pypi/CMRESHandler 196 | :alt: License 197 | .. |ci_status| image:: https://travis-ci.org/cmanaha/python-elasticsearch-logger.svg?branch=master 198 | :target: https://travis-ci.org/cmanaha/python-elasticsearch-logger 199 | :alt: Continuous Integration Status 200 | .. |codecov| image:: https://codecov.io/github/cmanaha/python-elasticsearch-logger/coverage.svg?branch=master 201 | :target: http://codecov.io/github/cmanaha/python-elasticsearch-logger?branch=master 202 | :alt: Coverage! 203 | .. |gitter| image:: https://badges.gitter.im/Join%20Chat.svg 204 | :target: https://gitter.im/cmanaha/python-elasticsearch-logger?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge 205 | :alt: gitter 206 | -------------------------------------------------------------------------------- /cmreslogging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmanaha/python-elasticsearch-logger/27ee809b4eba309d198a8909d9c559754e257197/cmreslogging/__init__.py -------------------------------------------------------------------------------- /cmreslogging/handlers.py: -------------------------------------------------------------------------------- 1 | """ Elasticsearch logging handler 2 | """ 3 | 4 | import logging 5 | import datetime 6 | import socket 7 | from threading import Timer, Lock 8 | from enum import Enum 9 | from elasticsearch import helpers as eshelpers 10 | from elasticsearch import Elasticsearch, RequestsHttpConnection 11 | 12 | try: 13 | from requests_kerberos import HTTPKerberosAuth, DISABLED 14 | CMR_KERBEROS_SUPPORTED = True 15 | except ImportError: 16 | CMR_KERBEROS_SUPPORTED = False 17 | 18 | try: 19 | from requests_aws4auth import AWS4Auth 20 | AWS4AUTH_SUPPORTED = True 21 | except ImportError: 22 | AWS4AUTH_SUPPORTED = False 23 | 24 | from cmreslogging.serializers import CMRESSerializer 25 | 26 | 27 | class CMRESHandler(logging.Handler): 28 | """ Elasticsearch log handler 29 | 30 | Allows to log to elasticsearch into json format. 31 | All LogRecord fields are serialised and inserted 32 | """ 33 | 34 | class AuthType(Enum): 35 | """ Authentication types supported 36 | 37 | The handler supports 38 | - No authentication 39 | - Basic authentication 40 | - Kerberos or SSO authentication (on windows and linux) 41 | """ 42 | NO_AUTH = 0 43 | BASIC_AUTH = 1 44 | KERBEROS_AUTH = 2 45 | AWS_SIGNED_AUTH = 3 46 | 47 | class IndexNameFrequency(Enum): 48 | """ Index type supported 49 | the handler supports 50 | - Daily indices 51 | - Weekly indices 52 | - Monthly indices 53 | - Year indices 54 | """ 55 | DAILY = 0 56 | WEEKLY = 1 57 | MONTHLY = 2 58 | YEARLY = 3 59 | 60 | # Defaults for the class 61 | __DEFAULT_ELASTICSEARCH_HOST = [{'host': 'localhost', 'port': 9200}] 62 | __DEFAULT_AUTH_USER = '' 63 | __DEFAULT_AUTH_PASSWD = '' 64 | __DEFAULT_AWS_ACCESS_KEY = '' 65 | __DEFAULT_AWS_SECRET_KEY = '' 66 | __DEFAULT_AWS_REGION = '' 67 | __DEFAULT_USE_SSL = False 68 | __DEFAULT_VERIFY_SSL = True 69 | __DEFAULT_AUTH_TYPE = AuthType.NO_AUTH 70 | __DEFAULT_INDEX_FREQUENCY = IndexNameFrequency.DAILY 71 | __DEFAULT_BUFFER_SIZE = 1000 72 | __DEFAULT_FLUSH_FREQ_INSEC = 1 73 | __DEFAULT_ADDITIONAL_FIELDS = {} 74 | __DEFAULT_ES_INDEX_NAME = 'python_logger' 75 | __DEFAULT_ES_DOC_TYPE = 'python_log' 76 | __DEFAULT_RAISE_ON_EXCEPTION = False 77 | __DEFAULT_TIMESTAMP_FIELD_NAME = "timestamp" 78 | 79 | __LOGGING_FILTER_FIELDS = ['msecs', 80 | 'relativeCreated', 81 | 'levelno', 82 | 'created'] 83 | 84 | @staticmethod 85 | def _get_daily_index_name(es_index_name): 86 | """ Returns elasticearch index name 87 | :param: index_name the prefix to be used in the index 88 | :return: A srting containing the elasticsearch indexname used which should include the date. 89 | """ 90 | return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y.%m.%d')) 91 | 92 | @staticmethod 93 | def _get_weekly_index_name(es_index_name): 94 | """ Return elasticsearch index name 95 | :param: index_name the prefix to be used in the index 96 | :return: A srting containing the elasticsearch indexname used which should include the date and specific week 97 | """ 98 | current_date = datetime.datetime.now() 99 | start_of_the_week = current_date - datetime.timedelta(days=current_date.weekday()) 100 | return "{0!s}-{1!s}".format(es_index_name, start_of_the_week.strftime('%Y.%m.%d')) 101 | 102 | @staticmethod 103 | def _get_monthly_index_name(es_index_name): 104 | """ Return elasticsearch index name 105 | :param: index_name the prefix to be used in the index 106 | :return: A srting containing the elasticsearch indexname used which should include the date and specific moth 107 | """ 108 | return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y.%m')) 109 | 110 | @staticmethod 111 | def _get_yearly_index_name(es_index_name): 112 | """ Return elasticsearch index name 113 | :param: index_name the prefix to be used in the index 114 | :return: A srting containing the elasticsearch indexname used which should include the date and specific year 115 | """ 116 | return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y')) 117 | 118 | _INDEX_FREQUENCY_FUNCION_DICT = { 119 | IndexNameFrequency.DAILY: _get_daily_index_name, 120 | IndexNameFrequency.WEEKLY: _get_weekly_index_name, 121 | IndexNameFrequency.MONTHLY: _get_monthly_index_name, 122 | IndexNameFrequency.YEARLY: _get_yearly_index_name 123 | } 124 | 125 | def __init__(self, 126 | hosts=__DEFAULT_ELASTICSEARCH_HOST, 127 | auth_details=(__DEFAULT_AUTH_USER, __DEFAULT_AUTH_PASSWD), 128 | aws_access_key=__DEFAULT_AWS_ACCESS_KEY, 129 | aws_secret_key=__DEFAULT_AWS_SECRET_KEY, 130 | aws_region=__DEFAULT_AWS_REGION, 131 | auth_type=__DEFAULT_AUTH_TYPE, 132 | use_ssl=__DEFAULT_USE_SSL, 133 | verify_ssl=__DEFAULT_VERIFY_SSL, 134 | buffer_size=__DEFAULT_BUFFER_SIZE, 135 | flush_frequency_in_sec=__DEFAULT_FLUSH_FREQ_INSEC, 136 | es_index_name=__DEFAULT_ES_INDEX_NAME, 137 | index_name_frequency=__DEFAULT_INDEX_FREQUENCY, 138 | es_doc_type=__DEFAULT_ES_DOC_TYPE, 139 | es_additional_fields=__DEFAULT_ADDITIONAL_FIELDS, 140 | raise_on_indexing_exceptions=__DEFAULT_RAISE_ON_EXCEPTION, 141 | default_timestamp_field_name=__DEFAULT_TIMESTAMP_FIELD_NAME): 142 | """ Handler constructor 143 | 144 | :param hosts: The list of hosts that elasticsearch clients will connect. The list can be provided 145 | in the format ```[{'host':'host1','port':9200}, {'host':'host2','port':9200}]``` to 146 | make sure the client supports failover of one of the instertion nodes 147 | :param auth_details: When ```CMRESHandler.AuthType.BASIC_AUTH``` is used this argument must contain 148 | a tuple of string with the user and password that will be used to authenticate against 149 | the Elasticsearch servers, for example```('User','Password') 150 | :param aws_access_key: When ```CMRESHandler.AuthType.AWS_SIGNED_AUTH``` is used this argument must contain 151 | the AWS key id of the the AWS IAM user 152 | :param aws_secret_key: When ```CMRESHandler.AuthType.AWS_SIGNED_AUTH``` is used this argument must contain 153 | the AWS secret key of the the AWS IAM user 154 | :param aws_region: When ```CMRESHandler.AuthType.AWS_SIGNED_AUTH``` is used this argument must contain 155 | the AWS region of the the AWS Elasticsearch servers, for example```'us-east' 156 | :param auth_type: The authentication type to be used in the connection ```CMRESHandler.AuthType``` 157 | Currently, NO_AUTH, BASIC_AUTH, KERBEROS_AUTH are supported 158 | :param use_ssl: A boolean that defines if the communications should use SSL encrypted communication 159 | :param verify_ssl: A boolean that defines if the SSL certificates are validated or not 160 | :param buffer_size: An int, Once this size is reached on the internal buffer results are flushed into ES 161 | :param flush_frequency_in_sec: A float representing how often and when the buffer will be flushed, even 162 | if the buffer_size has not been reached yet 163 | :param es_index_name: A string with the prefix of the elasticsearch index that will be created. Note a 164 | date with YYYY.MM.dd, ```python_logger``` used by default 165 | :param index_name_frequency: Defines what the date used in the postfix of the name would be. available values 166 | are selected from the IndexNameFrequency class (IndexNameFrequency.DAILY, 167 | IndexNameFrequency.WEEKLY, IndexNameFrequency.MONTHLY, IndexNameFrequency.YEARLY). By default 168 | it uses daily indices. 169 | :param es_doc_type: A string with the name of the document type that will be used ```python_log``` used 170 | by default 171 | :param es_additional_fields: A dictionary with all the additional fields that you would like to add 172 | to the logs, such the application, environment, etc. 173 | :param raise_on_indexing_exceptions: A boolean, True only for debugging purposes to raise exceptions 174 | caused when 175 | :return: A ready to be used CMRESHandler. 176 | """ 177 | logging.Handler.__init__(self) 178 | 179 | self.hosts = hosts 180 | self.auth_details = auth_details 181 | self.aws_access_key = aws_access_key 182 | self.aws_secret_key = aws_secret_key 183 | self.aws_region = aws_region 184 | self.auth_type = auth_type 185 | self.use_ssl = use_ssl 186 | self.verify_certs = verify_ssl 187 | self.buffer_size = buffer_size 188 | self.flush_frequency_in_sec = flush_frequency_in_sec 189 | self.es_index_name = es_index_name 190 | self.index_name_frequency = index_name_frequency 191 | self.es_doc_type = es_doc_type 192 | self.es_additional_fields = es_additional_fields.copy() 193 | self.es_additional_fields.update({'host': socket.gethostname(), 194 | 'host_ip': socket.gethostbyname(socket.gethostname())}) 195 | self.raise_on_indexing_exceptions = raise_on_indexing_exceptions 196 | self.default_timestamp_field_name = default_timestamp_field_name 197 | 198 | self._client = None 199 | self._buffer = [] 200 | self._buffer_lock = Lock() 201 | self._timer = None 202 | self._index_name_func = CMRESHandler._INDEX_FREQUENCY_FUNCION_DICT[self.index_name_frequency] 203 | self.serializer = CMRESSerializer() 204 | 205 | def __schedule_flush(self): 206 | if self._timer is None: 207 | self._timer = Timer(self.flush_frequency_in_sec, self.flush) 208 | self._timer.setDaemon(True) 209 | self._timer.start() 210 | 211 | def __get_es_client(self): 212 | if self.auth_type == CMRESHandler.AuthType.NO_AUTH: 213 | if self._client is None: 214 | self._client = Elasticsearch(hosts=self.hosts, 215 | use_ssl=self.use_ssl, 216 | verify_certs=self.verify_certs, 217 | connection_class=RequestsHttpConnection, 218 | serializer=self.serializer) 219 | return self._client 220 | 221 | if self.auth_type == CMRESHandler.AuthType.BASIC_AUTH: 222 | if self._client is None: 223 | return Elasticsearch(hosts=self.hosts, 224 | http_auth=self.auth_details, 225 | use_ssl=self.use_ssl, 226 | verify_certs=self.verify_certs, 227 | connection_class=RequestsHttpConnection, 228 | serializer=self.serializer) 229 | return self._client 230 | 231 | if self.auth_type == CMRESHandler.AuthType.KERBEROS_AUTH: 232 | if not CMR_KERBEROS_SUPPORTED: 233 | raise EnvironmentError("Kerberos module not available. Please install \"requests-kerberos\"") 234 | # For kerberos we return a new client each time to make sure the tokens are up to date 235 | return Elasticsearch(hosts=self.hosts, 236 | use_ssl=self.use_ssl, 237 | verify_certs=self.verify_certs, 238 | connection_class=RequestsHttpConnection, 239 | http_auth=HTTPKerberosAuth(mutual_authentication=DISABLED), 240 | serializer=self.serializer) 241 | 242 | if self.auth_type == CMRESHandler.AuthType.AWS_SIGNED_AUTH: 243 | if not AWS4AUTH_SUPPORTED: 244 | raise EnvironmentError("AWS4Auth not available. Please install \"requests-aws4auth\"") 245 | if self._client is None: 246 | awsauth = AWS4Auth(self.aws_access_key, self.aws_secret_key, self.aws_region, 'es') 247 | self._client = Elasticsearch( 248 | hosts=self.hosts, 249 | http_auth=awsauth, 250 | use_ssl=self.use_ssl, 251 | verify_certs=True, 252 | connection_class=RequestsHttpConnection, 253 | serializer=self.serializer 254 | ) 255 | return self._client 256 | 257 | raise ValueError("Authentication method not supported") 258 | 259 | def test_es_source(self): 260 | """ Returns True if the handler can ping the Elasticsearch servers 261 | 262 | Can be used to confirm the setup of a handler has been properly done and confirm 263 | that things like the authentication is working properly 264 | 265 | :return: A boolean, True if the connection against elasticserach host was successful 266 | """ 267 | return self.__get_es_client().ping() 268 | 269 | @staticmethod 270 | def __get_es_datetime_str(timestamp): 271 | """ Returns elasticsearch utc formatted time for an epoch timestamp 272 | 273 | :param timestamp: epoch, including milliseconds 274 | :return: A string valid for elasticsearch time record 275 | """ 276 | current_date = datetime.datetime.utcfromtimestamp(timestamp) 277 | return "{0!s}.{1:03d}Z".format(current_date.strftime('%Y-%m-%dT%H:%M:%S'), int(current_date.microsecond / 1000)) 278 | 279 | def flush(self): 280 | """ Flushes the buffer into ES 281 | :return: None 282 | """ 283 | if self._timer is not None and self._timer.is_alive(): 284 | self._timer.cancel() 285 | self._timer = None 286 | 287 | if self._buffer: 288 | try: 289 | with self._buffer_lock: 290 | logs_buffer = self._buffer 291 | self._buffer = [] 292 | actions = ( 293 | { 294 | '_index': self._index_name_func.__func__(self.es_index_name), 295 | '_type': self.es_doc_type, 296 | '_source': log_record 297 | } 298 | for log_record in logs_buffer 299 | ) 300 | eshelpers.bulk( 301 | client=self.__get_es_client(), 302 | actions=actions, 303 | stats_only=True 304 | ) 305 | except Exception as exception: 306 | if self.raise_on_indexing_exceptions: 307 | raise exception 308 | 309 | def close(self): 310 | """ Flushes the buffer and release any outstanding resource 311 | 312 | :return: None 313 | """ 314 | if self._timer is not None: 315 | self.flush() 316 | self._timer = None 317 | 318 | def emit(self, record): 319 | """ Emit overrides the abstract logging.Handler logRecord emit method 320 | 321 | Format and records the log 322 | 323 | :param record: A class of type ```logging.LogRecord``` 324 | :return: None 325 | """ 326 | self.format(record) 327 | 328 | rec = self.es_additional_fields.copy() 329 | for key, value in record.__dict__.items(): 330 | if key not in CMRESHandler.__LOGGING_FILTER_FIELDS: 331 | if key == "args": 332 | value = tuple(str(arg) for arg in value) 333 | rec[key] = "" if value is None else value 334 | rec[self.default_timestamp_field_name] = self.__get_es_datetime_str(record.created) 335 | with self._buffer_lock: 336 | self._buffer.append(rec) 337 | 338 | if len(self._buffer) >= self.buffer_size: 339 | self.flush() 340 | else: 341 | self.__schedule_flush() 342 | -------------------------------------------------------------------------------- /cmreslogging/serializers.py: -------------------------------------------------------------------------------- 1 | """ JSON serializer for Elasticsearch use 2 | """ 3 | from elasticsearch.serializer import JSONSerializer 4 | 5 | 6 | class CMRESSerializer(JSONSerializer): 7 | """ JSON serializer inherited from the elastic search JSON serializer 8 | 9 | Allows to serialize logs for a elasticsearch use. 10 | Manage the record.exc_info containing an exception type. 11 | """ 12 | def default(self, data): 13 | """ Default overrides the elasticsearch default method 14 | 15 | Allows to transform unknown types into strings 16 | 17 | :params data: The data to serialize before sending it to elastic search 18 | """ 19 | try: 20 | return super(CMRESSerializer, self).default(data) 21 | except TypeError: 22 | return str(data) 23 | -------------------------------------------------------------------------------- /requirements/requirements_py27.txt: -------------------------------------------------------------------------------- 1 | elasticsearch==5.4.0 2 | requests==2.18.1 3 | enum==0.4.6 4 | -------------------------------------------------------------------------------- /requirements/requirements_py36.txt: -------------------------------------------------------------------------------- 1 | elasticsearch==5.4.0 2 | requests==2.18.1 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """A setuptools based setup module. 2 | See: 3 | https://packaging.python.org/en/latest/distributing.html 4 | https://github.com/pypa/sampleproject 5 | """ 6 | 7 | # Always prefer setuptools over distutils 8 | from setuptools import setup, find_packages 9 | # To use a consistent encoding 10 | from codecs import open 11 | from os import path 12 | import sys 13 | 14 | here = path.abspath(path.dirname(__file__)) 15 | 16 | # Get the long description from the README file 17 | with open(path.join(here, 'README.rst'), encoding='utf-8') as f: 18 | long_description = f.read() 19 | 20 | dependencies = [ 21 | 'elasticsearch', 22 | 'requests' 23 | ] 24 | 25 | # If python version is above 3.4 (built in enums supported enums) 26 | if sys.version_info <= (3,4): 27 | dependencies.append('enum') 28 | 29 | print("List of dependencies : {0}".format(str(dependencies))) 30 | 31 | setup( 32 | name='CMRESHandler', 33 | 34 | # Versions should comply with PEP440. For a discussion on single-sourcing 35 | # the version across setup.py and the project code, see 36 | # https://packaging.python.org/en/latest/single_source_version.html 37 | version='1.0.0', 38 | 39 | description='Elasticsearch Log handler for the logging library', 40 | long_description=long_description, 41 | 42 | # The project's main homepage. 43 | url='https://github.com/cmanaha/python-elasticsearch-logger', 44 | 45 | # Author details 46 | author='Carlos Manzanedo Rueda', 47 | author_email='c.manaha@gmail.com', 48 | 49 | # Choose your license 50 | license='Apache2', 51 | 52 | classifiers=[ 53 | # How mature is this project? Common values are 54 | # 3 - Alpha 55 | # 4 - Beta 56 | # 5 - Production/Stable 57 | 'Development Status :: 5 - Production/Stable', 58 | 59 | # Indicate who your project is intended for 60 | 'Intended Audience :: Developers', 61 | 'Topic :: System :: Logging', 62 | 'Topic :: Software Development :: Libraries', 63 | 'Topic :: Internet :: Log Analysis', 64 | 65 | # Pick your license as you wish (should match "license" above) 66 | 'License :: OSI Approved :: Apache Software License', 67 | 68 | # Specify the Python versions you support here. In particular, ensure 69 | # that you indicate whether you support Python 2, Python 3 or both. 70 | 'Programming Language :: Python :: 2.7', 71 | 'Programming Language :: Python :: 3.6', 72 | ], 73 | 74 | # What does your project relate to? 75 | keywords='logging elasticsearch handler log django instrumentation', 76 | 77 | # You can just specify the packages manually here if your project is 78 | # simple. Or you can use find_packages(). 79 | packages=find_packages(exclude=['dist', 'docs', 'build']), 80 | 81 | # Alternatively, if you want to distribute just a my_module.py, uncomment 82 | # this: 83 | # py_modules=["my_module"], 84 | 85 | # List run-time dependencies here. These will be installed by pip when 86 | # your project is installed. For an analysis of "install_requires" vs pip's 87 | # requirements files see: 88 | # https://packaging.python.org/en/latest/requirements.html 89 | install_requires=dependencies, 90 | 91 | # List additional groups of dependencies here (e.g. development 92 | # dependencies). You can install these using the following syntax, 93 | # for example: 94 | # $ pip install -e .[dev,test] 95 | extras_require={ 96 | 'dev': ['check-manifest', 'six', 'pylint'], 97 | 'test': ['coverage'], 98 | }, 99 | 100 | # If there are data files included in your packages that need to be 101 | # installed, specify them here. If using Python 2.6 or less, then these 102 | # have to be included in MANIFEST.in as well. 103 | package_data={ 104 | }, 105 | 106 | # Although 'package_data' is the preferred approach, in some case you may 107 | # need to place data files outside of your packages. See: 108 | # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa 109 | # In this case, 'data_file' will be installed into '/my_data' 110 | data_files=[], 111 | 112 | # To provide executable scripts, use entry points in preference to the 113 | # "scripts" keyword. Entry points provide cross-platform support and allow 114 | # pip to create the appropriate form of executable for the target platform. 115 | entry_points={ 116 | }, 117 | ) 118 | -------------------------------------------------------------------------------- /sonar-project.properties: -------------------------------------------------------------------------------- 1 | sonar.projectKey=cmr.python:python-elasticsearch-logger 2 | sonar.projectName=Python Elasticsearch Logger 3 | sonar.projectVersion=1.0.0b4 4 | sonar.verbose=DEBUG 5 | sonar.language=py 6 | sonar.sources=cmreslogging 7 | sonar.tests=tests 8 | sonar.python.coverage.reportPath=coverage.xml 9 | sonar.sourceEncoding=UTF-8 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmanaha/python-elasticsearch-logger/27ee809b4eba309d198a8909d9c559754e257197/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_cmreshandler.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import logging 3 | import time 4 | import os 5 | import sys 6 | sys.path.insert(0, os.path.abspath('.')) 7 | from cmreslogging.handlers import CMRESHandler 8 | 9 | 10 | class CMRESHandlerTestCase(unittest.TestCase): 11 | DEFAULT_ES_SERVER = 'localhost' 12 | DEFAULT_ES_PORT = 9200 13 | 14 | def getESHost(self): 15 | return os.getenv('TEST_ES_SERVER',CMRESHandlerTestCase.DEFAULT_ES_SERVER) 16 | 17 | def getESPort(self): 18 | try: 19 | return int(os.getenv('TEST_ES_PORT',CMRESHandlerTestCase.DEFAULT_ES_PORT)) 20 | except ValueError: 21 | return CMRESHandlerTestCase.DEFAULT_ES_PORT 22 | 23 | def setUp(self): 24 | self.log = logging.getLogger("MyTestCase") 25 | test_handler = logging.StreamHandler(stream=sys.stderr) 26 | self.log.addHandler(test_handler) 27 | 28 | def tearDown(self): 29 | del self.log 30 | 31 | def test_ping(self): 32 | handler = CMRESHandler(hosts=[{'host': self.getESHost(), 'port': self.getESPort()}], 33 | auth_type=CMRESHandler.AuthType.NO_AUTH, 34 | es_index_name="pythontest", 35 | use_ssl=False, 36 | raise_on_indexing_exceptions=True) 37 | es_test_server_is_up = handler.test_es_source() 38 | self.assertEqual(True, es_test_server_is_up) 39 | 40 | def test_buffered_log_insertion_flushed_when_buffer_full(self): 41 | handler = CMRESHandler(hosts=[{'host': self.getESHost(), 'port': self.getESPort()}], 42 | auth_type=CMRESHandler.AuthType.NO_AUTH, 43 | use_ssl=False, 44 | buffer_size=2, 45 | flush_frequency_in_sec=1000, 46 | es_index_name="pythontest", 47 | es_additional_fields={'App': 'Test', 'Environment': 'Dev'}, 48 | raise_on_indexing_exceptions=True) 49 | 50 | es_test_server_is_up = handler.test_es_source() 51 | self.log.info("ES services status is: {0!s}".format(es_test_server_is_up)) 52 | self.assertEqual(True, es_test_server_is_up) 53 | 54 | log = logging.getLogger("PythonTest") 55 | log.setLevel(logging.DEBUG) 56 | log.addHandler(handler) 57 | log.warning("First Message") 58 | log.info("Seccond Message") 59 | self.assertEqual(0, len(handler._buffer)) 60 | handler.close() 61 | 62 | def test_es_log_extra_argument_insertion(self): 63 | self.log.info("About to test elasticsearch insertion") 64 | handler = CMRESHandler(hosts=[{'host': self.getESHost(), 'port': self.getESPort()}], 65 | auth_type=CMRESHandler.AuthType.NO_AUTH, 66 | use_ssl=False, 67 | es_index_name="pythontest", 68 | es_additional_fields={'App': 'Test', 'Environment': 'Dev'}, 69 | raise_on_indexing_exceptions=True) 70 | 71 | es_test_server_is_up = handler.test_es_source() 72 | self.log.info("ES services status is: {0!s}".format(es_test_server_is_up)) 73 | self.assertEqual(True, es_test_server_is_up) 74 | 75 | log = logging.getLogger("PythonTest") 76 | log.addHandler(handler) 77 | log.warning("Extra arguments Message", extra={"Arg1": 300, "Arg2": 400}) 78 | self.assertEqual(1, len(handler._buffer)) 79 | self.assertEqual(handler._buffer[0]['Arg1'], 300) 80 | self.assertEqual(handler._buffer[0]['Arg2'], 400) 81 | self.assertEqual(handler._buffer[0]['App'], 'Test') 82 | self.assertEqual(handler._buffer[0]['Environment'], 'Dev') 83 | handler.flush() 84 | self.assertEqual(0, len(handler._buffer)) 85 | 86 | def test_buffered_log_insertion_after_interval_expired(self): 87 | handler = CMRESHandler(hosts=[{'host': self.getESHost(), 'port': self.getESPort()}], 88 | auth_type=CMRESHandler.AuthType.NO_AUTH, 89 | use_ssl=False, 90 | flush_frequency_in_sec=0.1, 91 | es_index_name="pythontest", 92 | es_additional_fields={'App': 'Test', 'Environment': 'Dev'}, 93 | raise_on_indexing_exceptions=True) 94 | 95 | es_test_server_is_up = handler.test_es_source() 96 | self.log.info("ES services status is: {0!s}".format(es_test_server_is_up)) 97 | self.assertEqual(True, es_test_server_is_up) 98 | 99 | log = logging.getLogger("PythonTest") 100 | log.addHandler(handler) 101 | log.warning("Extra arguments Message", extra={"Arg1": 300, "Arg2": 400}) 102 | self.assertEqual(1, len(handler._buffer)) 103 | self.assertEqual(handler._buffer[0]['Arg1'], 300) 104 | self.assertEqual(handler._buffer[0]['Arg2'], 400) 105 | self.assertEqual(handler._buffer[0]['App'], 'Test') 106 | self.assertEqual(handler._buffer[0]['Environment'], 'Dev') 107 | time.sleep(1) 108 | self.assertEqual(0, len(handler._buffer)) 109 | 110 | def test_fast_insertion_of_hundred_logs(self): 111 | handler = CMRESHandler(hosts=[{'host': self.getESHost(), 'port': self.getESPort()}], 112 | auth_type=CMRESHandler.AuthType.NO_AUTH, 113 | use_ssl=False, 114 | buffer_size=500, 115 | flush_frequency_in_sec=0.5, 116 | es_index_name="pythontest", 117 | raise_on_indexing_exceptions=True) 118 | log = logging.getLogger("PythonTest") 119 | log.setLevel(logging.DEBUG) 120 | log.addHandler(handler) 121 | for i in range(100): 122 | log.info("Logging line {0:d}".format(i), extra={'LineNum': i}) 123 | handler.flush() 124 | self.assertEqual(0, len(handler._buffer)) 125 | 126 | def test_index_name_frequency_functions(self): 127 | index_name = "pythontest" 128 | handler = CMRESHandler(hosts=[{'host': self.getESHost(), 'port': self.getESPort()}], 129 | auth_type=CMRESHandler.AuthType.NO_AUTH, 130 | es_index_name=index_name, 131 | use_ssl=False, 132 | index_name_frequency=CMRESHandler.IndexNameFrequency.DAILY, 133 | raise_on_indexing_exceptions=True) 134 | self.assertEqual( 135 | handler._index_name_func.__func__(index_name), 136 | CMRESHandler._get_daily_index_name(index_name) 137 | ) 138 | 139 | handler = CMRESHandler(hosts=[{'host': self.getESHost(), 'port': self.getESPort()}], 140 | auth_type=CMRESHandler.AuthType.NO_AUTH, 141 | es_index_name=index_name, 142 | use_ssl=False, 143 | index_name_frequency=CMRESHandler.IndexNameFrequency.WEEKLY, 144 | raise_on_indexing_exceptions=True) 145 | self.assertEqual( 146 | handler._index_name_func.__func__(index_name), 147 | CMRESHandler._get_weekly_index_name(index_name) 148 | ) 149 | 150 | handler = CMRESHandler(hosts=[{'host': self.getESHost(), 'port': self.getESPort()}], 151 | auth_type=CMRESHandler.AuthType.NO_AUTH, 152 | es_index_name=index_name, 153 | use_ssl=False, 154 | index_name_frequency=CMRESHandler.IndexNameFrequency.MONTHLY, 155 | raise_on_indexing_exceptions=True) 156 | self.assertEqual( 157 | handler._index_name_func.__func__(index_name), 158 | CMRESHandler._get_monthly_index_name(index_name) 159 | ) 160 | 161 | handler = CMRESHandler(hosts=[{'host': self.getESHost(), 'port': self.getESPort()}], 162 | auth_type=CMRESHandler.AuthType.NO_AUTH, 163 | es_index_name=index_name, 164 | use_ssl=False, 165 | index_name_frequency=CMRESHandler.IndexNameFrequency.YEARLY, 166 | raise_on_indexing_exceptions=True) 167 | self.assertEqual( 168 | handler._index_name_func.__func__(index_name), 169 | CMRESHandler._get_yearly_index_name(index_name) 170 | ) 171 | 172 | 173 | if __name__ == '__main__': 174 | unittest.main() 175 | -------------------------------------------------------------------------------- /tests/test_cmresserializer.py: -------------------------------------------------------------------------------- 1 | """ Test class for the serializers module 2 | """ 3 | import unittest 4 | import logging 5 | import datetime 6 | import os 7 | import sys 8 | import decimal 9 | 10 | sys.path.insert(0, os.path.abspath('.')) 11 | from cmreslogging.serializers import CMRESSerializer 12 | 13 | 14 | class CMRESSerializerTestCase(unittest.TestCase): 15 | """ CMRESSerializer test class 16 | """ 17 | 18 | def setUp(self): 19 | """ Set up the test 20 | 21 | Set up the log and the formatter to get asctime and exc_text fields 22 | """ 23 | self.log = logging.getLogger("MyTestCase") 24 | self.formatter = logging.Formatter('%(asctime)s') 25 | 26 | def tearDown(self): 27 | """ Delete the log and the formatter 28 | """ 29 | del self.log 30 | del self.formatter 31 | 32 | def test_dumps_classic_log(self): 33 | """ Test the classic log serialization 34 | """ 35 | serializer = CMRESSerializer() 36 | record = self.log.makeRecord(name=self.log.name, 37 | level=logging.INFO, 38 | fn=self.__class__.__name__, 39 | lno=58, msg="dump_classic_log", 40 | args=None, 41 | exc_info=False, 42 | func=None, 43 | extra=None) 44 | self.formatter.format(record) 45 | for value in record.__dict__.values(): 46 | try: 47 | serializer.dumps(value) 48 | except TypeError: 49 | self.fail("Serializer raised a TypeError exception") 50 | 51 | def test_dumps_exception_log(self): 52 | """ Test the exception log serialization with the exc_info field 53 | """ 54 | serializer = CMRESSerializer() 55 | try: 56 | bad_idea = 1/0 57 | except ZeroDivisionError: 58 | record = self.log.makeRecord(name=self.log.name, 59 | level=logging.ERROR, 60 | fn=self.__class__.__name__, 61 | lno=58, msg="dump_exception_log", 62 | args=None, 63 | exc_info=sys.exc_info(), 64 | func=None, 65 | extra=None) 66 | self.formatter.format(record) 67 | for value in record.__dict__.values(): 68 | try: 69 | serializer.dumps(value) 70 | except TypeError: 71 | self.fail("Serializer raised a TypeError exception") 72 | 73 | def test_dumps_log_with_extras_and_args(self): 74 | """ Test the log serialization with arguments and extras complex parameters 75 | """ 76 | serializer = CMRESSerializer() 77 | record = self.log.makeRecord(name=self.log.name, 78 | level=logging.ERROR, 79 | fn=self.__class__.__name__, 80 | lno=58, msg="dump_%s_log", 81 | args="args", 82 | exc_info=False, 83 | func=None, 84 | extra={'complexvalue1': datetime.date.today(), 85 | 'complexvalue2': decimal.Decimal('3.0')}) 86 | self.formatter.format(record) 87 | for value in record.__dict__.values(): 88 | try: 89 | serializer.dumps(value) 90 | except TypeError: 91 | self.fail("Serializer raised a TypeError exception") 92 | 93 | 94 | if __name__ == '__main__': 95 | unittest.main() 96 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = {py27,py36} 3 | 4 | [testenv] 5 | passenv = 6 | TEST_ES_SERVER 7 | TEST_ES_PORT 8 | basepython = 9 | py27: python2.7 10 | py36: python3.6 11 | deps = 12 | check-manifest 13 | docutils 14 | {py27}: readme_renderer 15 | pylint 16 | flake8 17 | pytest 18 | coverage 19 | commands = 20 | python setup.py check -m -r -s 21 | flake8 --max-line-length=120 cmreslogging/ 22 | # pylint ./cmreslogging -r n --files-output=y '--msg-template="\{path\}:\{line\}: [\{msg_id\}(\{symbol\}), \{obj\}] \{msg\}"' 23 | coverage erase 24 | coverage run -a --source=./cmreslogging --branch tests/test_cmreshandler.py 25 | coverage run -a --source=./cmreslogging --branch tests/test_cmresserializer.py 26 | coverage xml -i 27 | coverage html 28 | 29 | [flake8] 30 | exclude = .tox,*.egg,build,data 31 | select = E,W,F 32 | --------------------------------------------------------------------------------