├── lala ├── version.py ├── __init__.py ├── conf.py └── WebLogs.py ├── docs ├── makehtml.sh ├── _static │ ├── images │ │ ├── logo.png │ │ ├── report.jpeg │ │ ├── pw_maze_dark.png │ │ └── dataframe_example.png │ └── css │ │ └── main.css ├── ref │ └── ref.rst ├── README.md ├── examples │ ├── report_example.rst │ └── basic_example.rst ├── index.rst ├── Makefile ├── make.bat └── conf.py ├── MANIFEST.in ├── examples ├── report_example.pdf ├── basic_example_piechart.png ├── basic_example_timeline.png ├── basic_example_worldmap.png ├── basic_example_frequent_visitors.png ├── report_example.py ├── basic_example.py └── data │ └── example_template.pug ├── .gitignore ├── setup.py ├── .travis.yml ├── LICENCE.txt ├── tests ├── test_basics.py └── data │ └── template.pug ├── README.rst └── ez_setup.py /lala/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.4" 2 | -------------------------------------------------------------------------------- /docs/makehtml.sh: -------------------------------------------------------------------------------- 1 | make html 2 | firefox ../../built_docs/html/index.html 3 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | recursive-include examples *.txt *.py 3 | include ez_setup.py 4 | -------------------------------------------------------------------------------- /docs/_static/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/docs/_static/images/logo.png -------------------------------------------------------------------------------- /examples/report_example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/examples/report_example.pdf -------------------------------------------------------------------------------- /docs/_static/images/report.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/docs/_static/images/report.jpeg -------------------------------------------------------------------------------- /docs/_static/images/pw_maze_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/docs/_static/images/pw_maze_dark.png -------------------------------------------------------------------------------- /examples/basic_example_piechart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/examples/basic_example_piechart.png -------------------------------------------------------------------------------- /examples/basic_example_timeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/examples/basic_example_timeline.png -------------------------------------------------------------------------------- /examples/basic_example_worldmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/examples/basic_example_worldmap.png -------------------------------------------------------------------------------- /docs/_static/images/dataframe_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/docs/_static/images/dataframe_example.png -------------------------------------------------------------------------------- /examples/basic_example_frequent_visitors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/examples/basic_example_frequent_visitors.png -------------------------------------------------------------------------------- /docs/ref/ref.rst: -------------------------------------------------------------------------------- 1 | .. _reference: 2 | 3 | lala Reference manual 4 | ========================== 5 | 6 | 7 | .. autoclass:: lala.WebLogs 8 | :members: 9 | -------------------------------------------------------------------------------- /lala/__init__.py: -------------------------------------------------------------------------------- 1 | """ dna_sequencing_viewer/__init__.py """ 2 | 3 | # __all__ = [] 4 | 5 | from .conf import conf 6 | from .WebLogs import WebLogs 7 | from .version import __version__ 8 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | This directory contains the sources of the documentation. 2 | 3 | To be able to compile the source, install the dependencies with 4 | :: 5 | sudo pip install sphinx sphinx_rtd_theme numpydoc sphinxcontrib-mermaid 6 | -------------------------------------------------------------------------------- /lala/conf.py: -------------------------------------------------------------------------------- 1 | import appdirs 2 | import os 3 | 4 | data_dir = appdirs.user_data_dir('lala', 'EGF') 5 | conf = { 6 | 'data_dir': data_dir, 7 | 'geolite_url': "http://geolite.maxmind.com/download/" 8 | "geoip/database/GeoLiteCity.dat.gz", 9 | 'geolite_path': os.path.join(data_dir, 'GeoLiteCity.dat') 10 | } 11 | -------------------------------------------------------------------------------- /docs/examples/report_example.rst: -------------------------------------------------------------------------------- 1 | .. _report_example: 2 | 3 | Report example 4 | --------------- 5 | 6 | An minimal report example: 7 | 8 | .. literalinclude:: ../../examples/report_example.py 9 | 10 | Output (`file link `_): 11 | 12 | .. image:: ../_static/images/report.jpeg 13 | :alt: [report] 14 | :align: center 15 | :width: 550px 16 | -------------------------------------------------------------------------------- /examples/report_example.py: -------------------------------------------------------------------------------- 1 | import os 2 | from lala import WebLogs 3 | 4 | example_logs_path = os.path.join('data', 'example_logs.txt') 5 | template_path = os.path.join('data', 'example_template.pug') 6 | 7 | weblogs, errored_lines = WebLogs.from_nginx_weblogs(example_logs_path) 8 | 9 | print ("Now identifying IP addresses") 10 | weblogs.identify_ips_domains() 11 | 12 | print ("Now writing the report") 13 | weblogs.write_report(template_path=template_path, target="report_example.pdf") 14 | -------------------------------------------------------------------------------- /docs/_static/css/main.css: -------------------------------------------------------------------------------- 1 | body, h1, h2, h3 { 2 | font-family: Raleway; 3 | } 4 | 5 | .wy-nav-content-wrap { 6 | background: none 7 | } 8 | 9 | .wy-nav-side { 10 | background-image: url("../images/pw_maze_dark.png"); 11 | } 12 | 13 | .wy-menu-vertical a { 14 | color: black 15 | } 16 | 17 | .wy-menu-vertical a:hover { 18 | background-color: #e7cfd4 19 | } 20 | 21 | .wy-side-nav-search, .wy-nav-top { 22 | background-color: #d0d0e7; 23 | background-image: url("../images/pw_maze_dark.png"); 24 | } 25 | 26 | .edgeLabel { 27 | background-color: #fcfcfc; 28 | } 29 | 30 | 31 | .section { 32 | opacity: 1.0 !important; 33 | } 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | *.tar.gz 10 | dist 11 | build 12 | eggs 13 | parts 14 | bin 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | lib 20 | lib64 21 | __pycache__ 22 | 23 | # Sublime 24 | .sublime-project 25 | 26 | # Installer logs 27 | pip-log.txt 28 | 29 | # Unit test / coverage reports 30 | .coverage 31 | .tox 32 | nosetests.xml 33 | 34 | # Translations 35 | *.mo 36 | 37 | # Mr Developer 38 | .mr.developer.cfg 39 | .project 40 | .pydevproject 41 | 42 | # Temp files 43 | 44 | *~ 45 | 46 | # Pipy codes 47 | 48 | .pypirc 49 | 50 | examples/reports 51 | examples/features/downloaded_data 52 | 53 | .cache 54 | -------------------------------------------------------------------------------- /docs/examples/basic_example.rst: -------------------------------------------------------------------------------- 1 | .. _basic_example: 2 | 3 | Basic example 4 | ------------- 5 | 6 | An example showcasing some Lala routines. 7 | 8 | .. literalinclude:: ../../examples/basic_example.py 9 | 10 | **Outputs:** 11 | 12 | .. image:: ../../examples/basic_example_piechart.png 13 | :alt: [piechart] 14 | :align: center 15 | :width: 300px 16 | 17 | .. image:: ../../examples/basic_example_timeline.png 18 | :alt: [piechart] 19 | :align: center 20 | :width: 550px 21 | 22 | .. image:: ../../examples/basic_example_worldmap.png 23 | :alt: [piechart] 24 | :align: center 25 | :width: 550px 26 | 27 | .. image:: ../../examples/basic_example_frequent_visitors.png 28 | :alt: [piechart] 29 | :align: center 30 | :width: 350px 31 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import ez_setup 2 | 3 | ez_setup.use_setuptools() 4 | 5 | from setuptools import setup, find_packages 6 | 7 | exec(open("lala/version.py").read()) # loads __version__ 8 | 9 | setup( 10 | name="python-lala", 11 | version=__version__, 12 | author="Zulko", 13 | description="Library of web access log analysis", 14 | long_description=open("README.rst").read(), 15 | long_description_content_type="text/x-rst", 16 | license="MIT", 17 | keywords="access log analysis website webservice stats", 18 | packages=find_packages(exclude="docs"), 19 | install_requires=[ 20 | "appdirs", 21 | "numpy", 22 | "matplotlib", 23 | "Pillow", 24 | "pygeoip", 25 | "pandas", 26 | "scipy", 27 | "proglog", 28 | "pdf_reports", 29 | ], 30 | ) 31 | -------------------------------------------------------------------------------- /examples/basic_example.py: -------------------------------------------------------------------------------- 1 | import os 2 | from lala import WebLogs 3 | 4 | # LOAD ALL RECORDS TO ANALYSE AND AVAILABLE PRIMERS 5 | logs_path = os.path.join('data', 'example_logs.txt') 6 | weblogs, errored_lines = WebLogs.from_nginx_weblogs(logs_path) 7 | 8 | # PLOT COUNTRIES PIE CHART 9 | ax, country_values = weblogs.plot_piechart('country_name') 10 | ax.figure.set_size_inches((5, 5)) 11 | ax.figure.savefig('basic_example_piechart.png', bbox_inches='tight') 12 | 13 | # PLOT COUNTRIES MAP 14 | ax = weblogs.plot_geo_positions() 15 | ax.figure.savefig('basic_example_worldmap.png', bbox_inches='tight') 16 | 17 | # PLOT UK CONNECTIONS TIMELINE 18 | ag_entries = weblogs[weblogs.country_name == 'Argentina'] 19 | ax = ag_entries.plot_timeline(bins_per_day=2) 20 | ax.figure.savefig('basic_example_timeline.png', bbox_inches='tight') 21 | 22 | # PLOT MOST FREQUENT VISITORS 23 | most_frequent_visitors = weblogs.most_frequent_visitors(n_visitors=5) 24 | ax = weblogs.plot_most_frequent_visitors(n_visitors=5) 25 | ax.figure.savefig('basic_example_frequent_visitors.png', bbox_inches='tight') 26 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | # command to install dependencies. Credit where credit is due: 5 | # https://laszukdawid.com/2017/06/04/installing-cartopy-on-ubuntu-14-04-or-travis-ci/ 6 | before_install: 7 | - sudo apt-get -qq update 8 | - sudo apt-get install libproj-dev proj-bin proj-data libgeos-dev 9 | - sudo apt-get install -y python-pyproj 10 | - sudo apt-get install python-scipy 11 | - sudo apt-get install -y libc6 12 | - wget http://es.archive.ubuntu.com/ubuntu/pool/universe/p/proj/libproj9_4.9.2-2_amd64.deb 13 | - sudo dpkg -i libproj9_4.9.2-2_amd64.deb 14 | - wget http://es.archive.ubuntu.com/ubuntu/pool/universe/p/proj/libproj-dev_4.9.2-2_amd64.deb 15 | - sudo dpkg -i libproj-dev_4.9.2-2_amd64.deb 16 | install: 17 | - pip install coveralls pytest-cov pytest 18 | - pip install --no-binary shapely shapely 19 | - pip install cython 20 | - pip install cartopy 21 | - pip install -e . 22 | # command to run tests 23 | script: 24 | - python -m pytest -v --cov lala --cov-report term-missing 25 | 26 | after_success: 27 | - coveralls 28 | -------------------------------------------------------------------------------- /LICENCE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | [OSI Approved License] 3 | 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2018 Edinburgh Genome Foundry 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /tests/test_basics.py: -------------------------------------------------------------------------------- 1 | import os 2 | import matplotlib 3 | matplotlib.use("Agg") 4 | from lala import WebLogs 5 | 6 | access_log_path = os.path.join('tests', 'data', "test_logs.txt") 7 | template_path = os.path.join('tests', 'data', "template.pug") 8 | 9 | def test_basics(tmpdir): 10 | 11 | # LOAD ALL RECORDS TO ANALYSE AND AVAILABLE PRIMERS 12 | weblogs, errored_lines = WebLogs.from_nginx_weblogs(access_log_path) 13 | 14 | # PLOT COUNTRIES PIE CHART 15 | ax, country_values = weblogs.plot_piechart('country_name') 16 | 17 | # PLOT COUNTRIES MAP 18 | weblogs.plot_geo_positions() 19 | 20 | # PLOT UK CONNECTIONS TIMELINE 21 | ag_weblogs = weblogs[weblogs.country_name == 'Argentina'] 22 | ax = ag_weblogs.plot_timeline(bins_per_day=2) 23 | 24 | # COMPUTE THE VISITORS/VISITS 25 | visitors = weblogs.visitors_and_visits() 26 | assert len(visitors) == 88 27 | 28 | # visitors_locations = weblogs.visitors_locations() 29 | most_frequent_visitors = weblogs.most_frequent_visitors(n_visitors=5) 30 | assert len(most_frequent_visitors) == 5 31 | weblogs.plot_most_frequent_visitors() 32 | 33 | sub_weblogs = weblogs[-50:] 34 | sub_weblogs.identify_ips_domains() 35 | filtered_weblogs = sub_weblogs.filter_by_text_search( 36 | terms=['googlebot', 'spider.yandex', 'baidu', 'msnbot'], 37 | not_in='domain' 38 | ) 39 | assert len(filtered_weblogs) == 50 40 | 41 | def test_template(tmpdir): 42 | # LOAD ALL RECORDS TO ANALYSE AND AVAILABLE PRIMERS 43 | weblogs, errored_lines = WebLogs.from_nginx_weblogs(access_log_path) 44 | sub_weblogs = weblogs[-50:] 45 | sub_weblogs.identify_ips_domains() 46 | target_path = os.path.join(str(tmpdir), "output.pdf") 47 | sub_weblogs.write_report(template_path=template_path, target=target_path) 48 | -------------------------------------------------------------------------------- /tests/data/template.pug: -------------------------------------------------------------------------------- 1 | #sidebar: p Generated using Lala for Python, on {{ pdf_tools.now() }} 2 | 3 | h1(style="margin-top: 0") 30-day server logs analysis 4 | 5 | .ui.piled.segment(style="margin-top: 0") 6 | :markdown 7 | These are statistics from the logs of the [EGF-CUBA](http://cuba.genomefoundry.org/) 8 | website. While the website doest not use cookies or collect personal data or files, 9 | the Django server logs provide interesting website usage information. 10 | 11 | - var blacklist = ['googlebot', 'spider.yandex', 'baidu', 'msnbot'] 12 | - var weblogs = weblogs.filter_by_text_search(terms=blacklist, not_in='domain') 13 | - var requests = weblogs.filter_by_text_search(terms=['start/'], are_in='request') 14 | 15 | 16 | .ui.grid 17 | .statistics.two.wide.column 18 | .ui.statistic.tiny 19 | .value= weblogs.index.size 20 | .label Visits 21 | .ui.statistic.tiny 22 | .value= weblogs.IP.unique().size 23 | .label Unique visitors 24 | .ui.statistic.tiny 25 | .value= requests.index.size 26 | .label Requests 27 | .twelve.wide.column 28 | - var figure = weblogs.plot_geo_positions() 29 | img(src="{{ pdf_tools.figure_data(figure) }}") 30 | 31 | 32 | h3(style='margin-top: 0') Visitors per day 33 | 34 | - var figure = weblogs.plot_timeline(bins_per_day=1) 35 | img(src="{{ pdf_tools.figure_data(figure, (12, 1.5)) }}") 36 | 37 | 38 | .tables 39 | 40 | h3 Requests 41 | 42 | - var request_counts = requests.url.value_counts().to_frame() 43 | {{ pdf_tools.dataframe_to_html(request_counts, index=1, header=0) }} 44 | 45 | h3 Requests by country 46 | 47 | - var countries_counts = requests.country_name.value_counts().to_frame() 48 | {{ pdf_tools.dataframe_to_html(countries_counts, index=1, header=0) }} 49 | 50 | h3 Requests by city 51 | 52 | - var city_counts = requests.city.value_counts().to_frame() 53 | {{ pdf_tools.dataframe_to_html(city_counts, index=1, header=0) }} 54 | 55 | 56 | style. 57 | h3 { 58 | margin-top: 0 !important; 59 | } 60 | img { 61 | max-width: 100% !important; 62 | } 63 | .ui.statistic { 64 | font-size: 0.7em !important; 65 | margin-bottom: 1.5em !important; 66 | display: block !important; 67 | margin: 0 auto; 68 | } 69 | .tables { 70 | column-count: 2; 71 | margin-top: 1em; 72 | } 73 | -------------------------------------------------------------------------------- /examples/data/example_template.pug: -------------------------------------------------------------------------------- 1 | #sidebar: p Generated using Lala for Python, on {{ pdf_tools.now() }} 2 | 3 | h1 30-day server logs analysis 4 | 5 | .ui.piled.segment 6 | :markdown 7 | These are anonymized statistics (fake IP addresses) from the logs of the 8 | [EGF-CUBA](http://cuba.genomefoundry.org/) website. 9 | While the website doest not use cookies or collect personal data or files, 10 | the Django server logs provide interesting website usage information. 11 | 12 | - var blacklist = ['googlebot', 'spider.yandex', 'baidu', 'msnbot'] 13 | - var weblogs = weblogs.filter_by_text_search(terms=blacklist, not_in='domain') 14 | - var requests = weblogs.filter_by_text_search(terms=['start/'], are_in='request') 15 | 16 | 17 | .ui.grid 18 | .statistics.two.wide.column 19 | .ui.statistic.tiny 20 | .value= weblogs.index.size 21 | .label Visits 22 | .ui.statistic.tiny 23 | .value= weblogs.IP.unique().size 24 | .label Unique visitors 25 | .ui.statistic.tiny 26 | .value= requests.index.size 27 | .label Requests 28 | .twelve.wide.column 29 | - var figure = weblogs.plot_geo_positions() 30 | img(src="{{ pdf_tools.figure_data(figure) }}") 31 | 32 | 33 | h3(style='margin-top: 0') Visitors per day 34 | 35 | - var figure = weblogs.plot_timeline(bins_per_day=1) 36 | img(src="{{ pdf_tools.figure_data(figure, (12, 1.5)) }}") 37 | 38 | 39 | .tables 40 | 41 | h3 Requests 42 | 43 | - var request_counts = requests.url.value_counts().to_frame() 44 | {{ pdf_tools.dataframe_to_html(request_counts, index=1, header=0) }} 45 | 46 | h3 Requests by country 47 | 48 | - var countries_counts = requests.country_name.value_counts().to_frame() 49 | {{ pdf_tools.dataframe_to_html(countries_counts, index=1, header=0) }} 50 | 51 | h3 Requests by provider 52 | 53 | - var city_counts = requests.domain.value_counts().to_frame() 54 | {{ pdf_tools.dataframe_to_html(city_counts, index=1, header=0) }} 55 | 56 | 57 | style. 58 | h3 { 59 | margin-top: 0 !important; 60 | } 61 | img { 62 | max-width: 100% !important; 63 | } 64 | .ui.statistic { 65 | font-size: 0.7em !important; 66 | margin-bottom: 1.5em !important; 67 | display: block !important; 68 | margin: 0 auto; 69 | } 70 | .tables { 71 | column-count: 2; 72 | margin-top: 1em; 73 | } 74 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/docs/_static/images/logo.png 2 | :width: 200 px 3 | :alt: alternate text 4 | :align: center 5 | 6 | | 7 | 8 | .. image:: https://travis-ci.org/Edinburgh-Genome-Foundry/lala.svg?branch=master 9 | :target: https://travis-ci.org/Edinburgh-Genome-Foundry/lala 10 | :alt: Travis CI build status 11 | 12 | .. image:: https://coveralls.io/repos/github/Edinburgh-Genome-Foundry/lala/badge.svg?branch=master 13 | :target: https://coveralls.io/github/Edinburgh-Genome-Foundry/lala?branch=master 14 | 15 | 16 | Lala is a Python library for access log analysis. It provides a set of methods to retrieve, parse and analyze access logs (only from NGINX for now), and makes it easy to plot geo-localization or time-series data. Think of it as a simpler, Python-automatable version of Google Analytics, to make reports like this: 17 | 18 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/docs/_static/images/report.jpeg 19 | :width: 550 px 20 | :alt: alternate text 21 | :align: center 22 | 23 | 24 | Usage 25 | ----- 26 | 27 | .. code:: python 28 | 29 | from lala import WebLogs 30 | weblogs, errored_lines = WebLogs.from_nginx_weblogs('access_logs.txt') 31 | 32 | Similarly, to fetch logs on a distant server (for which you have access keys) 33 | you would write: 34 | 35 | .. code:: python 36 | 37 | from lala import get_remote_file_content, WebLogs 38 | 39 | logs= lala.get_remote_file_content( 40 | host="cuba.genomefoundry.org", user='root', 41 | filename='/var/log/nginx_cuba/access.log' 42 | ) 43 | weblogs, errors = WebLogs.from_nginx_weblogs(logs.split('\n')) 44 | 45 | Now ``weblogs`` is a scpecial kind of `Pandas `_ dataframe where each row is one server access, with fields such as ``IP``, ``date``, ``referrer``, ``country_name``, etc. 46 | 47 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/docs/_static/images/dataframe_example.png 48 | :width: 800 px 49 | :alt: alternate text 50 | :align: center 51 | 52 | The web logs can therefore be analyzed using any of Pandas' built-in filtering and plotting functions. The ``WebLogs`` class also provides additional methods which are particularly useful to analyse web logs, for instance to plot pie-charts: 53 | 54 | .. code:: python 55 | 56 | ax, country_values = weblogs.plot_piechart('country_name') 57 | 58 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_piechart.png 59 | :width: 300 px 60 | :alt: alternate text 61 | :align: center 62 | 63 | Next we plot the location (cities) providing the most connexions: 64 | 65 | .. code:: python 66 | 67 | ax = weblogs.plot_geo_positions() 68 | 69 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_worldmap.png 70 | :width: 700 px 71 | :alt: alternate text 72 | :align: center 73 | 74 | We can also restrict the entries to the UK, and plot a timeline of connexions: 75 | 76 | .. code:: python 77 | 78 | uk_entries = weblogs[weblogs.country_name == 'United Kingdom'] 79 | ax = uk_entries.plot_timeline(bins_per_day=2) 80 | 81 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_timeline.png 82 | :width: 700 px 83 | :alt: alternate text 84 | :align: center 85 | 86 | Here is how to get the visitors a list of visitors and visits, sort out the most frequent visitors, find their locations, and plot it all: 87 | 88 | .. code:: python 89 | 90 | visitors = weblogs.visitors_and_visits() 91 | visitors_locations = weblogs.visitors_locations() 92 | frequent_visitors = weblogs.most_frequent_visitors(n_visitors=5) 93 | ax = weblogs.plot_most_frequent_visitors(n_visitors=5) 94 | 95 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_frequent_visitors.png 96 | :width: 450 px 97 | :alt: alternate text 98 | :align: center 99 | 100 | Lala can do more, such as identifying the domain name of the visitors, which can be used to filter out the robots of search engines: 101 | 102 | 103 | .. code:: python 104 | 105 | weblogs.identify_ips_domains() 106 | filtered_entries = weblogs.filter_by_text_search( 107 | terms=['googlebot', 'spider.yandex', 'baidu', 'msnbot'], 108 | not_in='domain' 109 | ) 110 | 111 | Lala also plays nicely with the `PDF Reports `_ library to let you define report templates such as `this one `_ (written in Pug), and then generate `this PDF report `_ with the following code: 112 | 113 | .. code:: python 114 | 115 | weblogs.write_report(template_path="path/to/template.pug", 116 | target="report_example.pdf") 117 | 118 | Installation 119 | ------------- 120 | 121 | You can install lala through PIP 122 | 123 | .. code:: bash 124 | 125 | sudo pip install python-lala 126 | 127 | Alternatively, you can unzip the sources in a folder and type 128 | 129 | .. code:: bash 130 | 131 | sudo python setup.py install 132 | 133 | For plotting maps you will need Cartopy which is not always easy to install - it may depend on your system. If you are on Ubuntu 16+, first install the dependencies with: 134 | 135 | .. code:: bash 136 | 137 | sudo apt-get install libproj-dev proj-bin proj-data libgeos-dev 138 | sudo pip install cython 139 | 140 | License = MIT 141 | -------------- 142 | 143 | lala is an open-source software originally written at the `Edinburgh Genome Foundry `_ by `Zulko `_ and `released on Github `_ under the MIT licence (Copyright 2018 Edinburgh Genome Foundry). 144 | 145 | Everyone is welcome to contribute! 146 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. image:: _static/images/logo.png 3 | :alt: [logo] 4 | :align: center 5 | :width: 200px 6 | 7 | Lala 8 | ---- 9 | 10 | .. image:: https://travis-ci.org/Edinburgh-Genome-Foundry/lala.svg?branch=master 11 | :target: https://travis-ci.org/Edinburgh-Genome-Foundry/lala 12 | :alt: Travis CI build status 13 | 14 | .. image:: https://coveralls.io/repos/github/Edinburgh-Genome-Foundry/lala/badge.svg?branch=master 15 | :target: https://coveralls.io/github/Edinburgh-Genome-Foundry/lala?branch=master 16 | 17 | 18 | 19 | Lala is a Python library for access log analysis. It provides a set of methods to retrieve, parse and analyze access logs (only from NGINX for now), and makes it easy to plot geo-localization or time-series data. Think of it as a simpler, Python-automatable version of Google Analytics, to make reports like this: 20 | 21 | .. image:: _static/images/report.jpeg 22 | :alt: [screenshot] 23 | :align: center 24 | :width: 550px 25 | 26 | Usage 27 | ----- 28 | 29 | .. code:: python 30 | 31 | from lala import WebLogs 32 | weblogs, errored_lines = WebLogs.from_nginx_weblogs('access_logs.txt') 33 | 34 | Similarly, to fetch logs on a distant server (for which you have access keys) 35 | you would write: 36 | 37 | .. code:: python 38 | 39 | from lala import get_remote_file_content, WebLogs 40 | 41 | logs= lala.get_remote_file_content( 42 | host="cuba.genomefoundry.org", user='root', 43 | filename='/var/log/nginx_cuba/access.log' 44 | ) 45 | weblogs, errors = WebLogs.from_nginx_weblogs(logs.split('\n')) 46 | 47 | Now ``weblogs`` is a scpecial kind of `Pandas `_ dataframe where each row is one server access, with fields such as ``IP``, ``date``, ``referrer``, ``country_name``, etc. 48 | 49 | .. raw:: html 50 | 51 |

52 | 53 |

54 | 55 | The web logs can therefore be analyzed using any of Pandas' built-in filtering and plotting functions. The ``WebLogs`` class also provides additional methods which are particularly useful to analyse web logs, for instance to plot pie-charts: 56 | 57 | .. code:: python 58 | 59 | ax, country_values = weblogs.plot_piechart('country_name') 60 | 61 | .. raw:: html 62 | 63 |

64 | 65 |

66 | 67 | Next we plot the location (cities) providing the most connexions: 68 | 69 | .. code:: python 70 | 71 | ax = weblogs.plot_geo_positions() 72 | 73 | .. raw:: html 74 | 75 |

76 | 77 |

78 | 79 | We can also restrict the entries to the UK, and plot a timeline of connexions: 80 | 81 | .. code:: python 82 | 83 | uk_entries = weblogs[weblogs.country_name == 'United Kingdom'] 84 | ax = uk_entries.plot_timeline(bins_per_day=2) 85 | 86 | .. raw:: html 87 | 88 |

89 | 90 |

91 | 92 | Here is how to get the visitors a list of visitors and visits, sort out the most frequent visitors, find their locations, and plot it all: 93 | 94 | .. code:: python 95 | 96 | visitors = weblogs.visitors_and_visits() 97 | visitors_locations = weblogs.visitors_locations() 98 | frequent_visitors = weblogs.most_frequent_visitors(n_visitors=5) 99 | ax = weblogs.plot_most_frequent_visitors(n_visitors=5) 100 | 101 | .. raw:: html 102 | 103 |

104 | 105 |

106 | 107 | Lala can do more, such as identifying the domain name of the visitors, which can be used to filter out the robots of search engines: 108 | 109 | 110 | .. code:: python 111 | 112 | weblogs.identify_ips_domains() 113 | filtered_entries = weblogs.filter_by_text_search( 114 | terms=['googlebot', 'spider.yandex', 'baidu', 'msnbot'], 115 | not_in='domain' 116 | ) 117 | 118 | Lala also plays nicely with the `PDF Reports `_ library to let you define report templates such as `this one `_ (written in Pug), and then generate `this PDF report `_ with the following code: 119 | 120 | .. code:: python 121 | 122 | weblogs.write_report(template_path="path/to/template.pug", 123 | target="report_example.pdf") 124 | 125 | Installation 126 | ------------- 127 | 128 | You can install lala through PIP 129 | 130 | .. code:: 131 | 132 | sudo pip install python-lala 133 | 134 | Alternatively, you can unzip the sources in a folder and type 135 | 136 | .. code:: 137 | 138 | sudo python setup.py install 139 | 140 | For plotting maps you will need Cartopy which is not always easy to install - it may depend on your system. If you are on Ubuntu 16+, first install the dependencies with : 141 | 142 | .. code:: 143 | 144 | sudo apt-get install libproj-dev proj-bin proj-data libgeos-dev 145 | sudo pip install cython 146 | 147 | License = MIT 148 | -------------- 149 | 150 | lala is an open-source software originally written at the `Edinburgh Genome Foundry `_ by `Zulko `_ and `released on Github `_ under the MIT licence (¢ Edinburg Genome Foundry). 151 | 152 | Everyone is welcome to contribute ! 153 | 154 | 155 | .. raw:: html 156 | 157 | 160 | 164 | 166 | 167 | 168 | 169 | 170 | .. toctree:: 171 | :hidden: 172 | :maxdepth: 3 173 | 174 | self 175 | 176 | .. toctree:: 177 | :hidden: 178 | :caption: Reference 179 | :maxdepth: 3 180 | 181 | ref/ref 182 | 183 | .. toctree:: 184 | :caption: Examples 185 | 186 | examples/basic_example 187 | examples/report_example 188 | 189 | .. _PYPI: https://pypi.python.org/pypi/lala 190 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = -E -a 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = ../../built_docs 9 | PDFBUILDDIR = /tmp 10 | PDF = ../../manual.pdf 11 | 12 | # User-friendly check for sphinx-build 13 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 14 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 15 | endif 16 | 17 | # Internal variables. 18 | PAPEROPT_a4 = -D latex_paper_size=a4 19 | PAPEROPT_letter = -D latex_paper_size=letter 20 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | # the i18n builder cannot share the environment and doctrees with the others 22 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 23 | 24 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 25 | 26 | help: 27 | @echo "Please use \`make ' where is one of" 28 | @echo " html to make standalone HTML files" 29 | @echo " dirhtml to make HTML files named index.html in directories" 30 | @echo " singlehtml to make a single large HTML file" 31 | @echo " pickle to make pickle files" 32 | @echo " json to make JSON files" 33 | @echo " htmlhelp to make HTML files and a HTML help project" 34 | @echo " qthelp to make HTML files and a qthelp project" 35 | @echo " devhelp to make HTML files and a Devhelp project" 36 | @echo " epub to make an epub" 37 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 38 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 39 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 40 | @echo " text to make text files" 41 | @echo " man to make manual pages" 42 | @echo " texinfo to make Texinfo files" 43 | @echo " info to make Texinfo files and run them through makeinfo" 44 | @echo " gettext to make PO message catalogs" 45 | @echo " changes to make an overview of all changed/added/deprecated items" 46 | @echo " xml to make Docutils-native XML files" 47 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 48 | @echo " linkcheck to check all external links for integrity" 49 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | dirhtml: 60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 63 | 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | pickle: 70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 71 | @echo 72 | @echo "Build finished; now you can process the pickle files." 73 | 74 | json: 75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 76 | @echo 77 | @echo "Build finished; now you can process the JSON files." 78 | 79 | htmlhelp: 80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 81 | @echo 82 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 83 | ".hhp project file in $(BUILDDIR)/htmlhelp." 84 | 85 | qthelp: 86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 87 | @echo 88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/lala.qhcp" 91 | @echo "To view the help file:" 92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/lala.qhc" 93 | 94 | devhelp: 95 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 96 | @echo 97 | @echo "Build finished." 98 | @echo "To view the help file:" 99 | @echo "# mkdir -p $$HOME/.local/share/devhelp/lala" 100 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/lala" 101 | @echo "# devhelp" 102 | 103 | epub: 104 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 105 | @echo 106 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 107 | 108 | latex: 109 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 110 | @echo 111 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 112 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 113 | "(use \`make latexpdf' here to do that automatically)." 114 | 115 | latexpdf: 116 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(PDFBUILDDIR)/latex 117 | @echo "Running LaTeX files through pdflatex..." 118 | $(MAKE) -C $(PDFBUILDDIR)/latex all-pdf 119 | cp $(PDFBUILDDIR)/latex/*.pdf $(PDF) 120 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 121 | 122 | latexpdfja: 123 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 124 | @echo "Running LaTeX files through platex and dvipdfmx..." 125 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 126 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 127 | 128 | text: 129 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 130 | @echo 131 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 132 | 133 | man: 134 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 135 | @echo 136 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 137 | 138 | texinfo: 139 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 140 | @echo 141 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 142 | @echo "Run \`make' in that directory to run these through makeinfo" \ 143 | "(use \`make info' here to do that automatically)." 144 | 145 | info: 146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 147 | @echo "Running Texinfo files through makeinfo..." 148 | make -C $(BUILDDIR)/texinfo info 149 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 150 | 151 | gettext: 152 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 153 | @echo 154 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 155 | 156 | changes: 157 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 158 | @echo 159 | @echo "The overview file is in $(BUILDDIR)/changes." 160 | 161 | linkcheck: 162 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 163 | @echo 164 | @echo "Link check complete; look for any errors in the above output " \ 165 | "or in $(BUILDDIR)/linkcheck/output.txt." 166 | 167 | doctest: 168 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 169 | @echo "Testing of doctests in the sources finished, look at the " \ 170 | "results in $(BUILDDIR)/doctest/output.txt." 171 | 172 | xml: 173 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 174 | @echo 175 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 176 | 177 | pseudoxml: 178 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 179 | @echo 180 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 181 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\lala.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\lala.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /ez_setup.py: -------------------------------------------------------------------------------- 1 | 2 | #!python 3 | """Bootstrap setuptools installation 4 | 5 | If you want to use setuptools in your package's setup.py, just include this 6 | file in the same directory with it, and add this to the top of your setup.py:: 7 | 8 | from ez_setup import use_setuptools 9 | use_setuptools() 10 | 11 | If you want to require a specific version of setuptools, set a download 12 | mirror, or use an alternate download directory, you can do so by supplying 13 | the appropriate options to ``use_setuptools()``. 14 | 15 | This file can also be run as a script to install or upgrade setuptools. 16 | """ 17 | import os 18 | import shutil 19 | import sys 20 | import tempfile 21 | import tarfile 22 | import optparse 23 | import subprocess 24 | 25 | from distutils import log 26 | 27 | try: 28 | from site import USER_SITE 29 | except ImportError: 30 | USER_SITE = None 31 | 32 | DEFAULT_VERSION = "0.9.6" 33 | DEFAULT_URL = "https://pypi.python.org/packages/source/s/setuptools/" 34 | 35 | def _python_cmd(*args): 36 | args = (sys.executable,) + args 37 | return subprocess.call(args) == 0 38 | 39 | def _install(tarball, install_args=()): 40 | # extracting the tarball 41 | tmpdir = tempfile.mkdtemp() 42 | log.warn('Extracting in %s', tmpdir) 43 | old_wd = os.getcwd() 44 | try: 45 | os.chdir(tmpdir) 46 | tar = tarfile.open(tarball) 47 | _extractall(tar) 48 | tar.close() 49 | 50 | # going in the directory 51 | subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) 52 | os.chdir(subdir) 53 | log.warn('Now working in %s', subdir) 54 | 55 | # installing 56 | log.warn('Installing Setuptools') 57 | if not _python_cmd('setup.py', 'install', *install_args): 58 | log.warn('Something went wrong during the installation.') 59 | log.warn('See the error message above.') 60 | # exitcode will be 2 61 | return 2 62 | finally: 63 | os.chdir(old_wd) 64 | shutil.rmtree(tmpdir) 65 | 66 | 67 | def _build_egg(egg, tarball, to_dir): 68 | # extracting the tarball 69 | tmpdir = tempfile.mkdtemp() 70 | log.warn('Extracting in %s', tmpdir) 71 | old_wd = os.getcwd() 72 | try: 73 | os.chdir(tmpdir) 74 | tar = tarfile.open(tarball) 75 | _extractall(tar) 76 | tar.close() 77 | 78 | # going in the directory 79 | subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0]) 80 | os.chdir(subdir) 81 | log.warn('Now working in %s', subdir) 82 | 83 | # building an egg 84 | log.warn('Building a Setuptools egg in %s', to_dir) 85 | _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir) 86 | 87 | finally: 88 | os.chdir(old_wd) 89 | shutil.rmtree(tmpdir) 90 | # returning the result 91 | log.warn(egg) 92 | if not os.path.exists(egg): 93 | raise IOError('Could not build the egg.') 94 | 95 | 96 | def _do_download(version, download_base, to_dir, download_delay): 97 | egg = os.path.join(to_dir, 'setuptools-%s-py%d.%d.egg' 98 | % (version, sys.version_info[0], sys.version_info[1])) 99 | if not os.path.exists(egg): 100 | tarball = download_setuptools(version, download_base, 101 | to_dir, download_delay) 102 | _build_egg(egg, tarball, to_dir) 103 | sys.path.insert(0, egg) 104 | import setuptools 105 | setuptools.bootstrap_install_from = egg 106 | 107 | 108 | def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, 109 | to_dir=os.curdir, download_delay=15): 110 | # making sure we use the absolute path 111 | to_dir = os.path.abspath(to_dir) 112 | was_imported = 'pkg_resources' in sys.modules or \ 113 | 'setuptools' in sys.modules 114 | try: 115 | import pkg_resources 116 | except ImportError: 117 | return _do_download(version, download_base, to_dir, download_delay) 118 | try: 119 | pkg_resources.require("setuptools>=" + version) 120 | return 121 | except pkg_resources.VersionConflict: 122 | e = sys.exc_info()[1] 123 | if was_imported: 124 | sys.stderr.write( 125 | "The required version of setuptools (>=%s) is not available,\n" 126 | "and can't be installed while this script is running. Please\n" 127 | "install a more recent version first, using\n" 128 | "'easy_install -U setuptools'." 129 | "\n\n(Currently using %r)\n" % (version, e.args[0])) 130 | sys.exit(2) 131 | else: 132 | del pkg_resources, sys.modules['pkg_resources'] # reload ok 133 | return _do_download(version, download_base, to_dir, 134 | download_delay) 135 | except pkg_resources.DistributionNotFound: 136 | return _do_download(version, download_base, to_dir, 137 | download_delay) 138 | 139 | 140 | def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL, 141 | to_dir=os.curdir, delay=15): 142 | """Download setuptools from a specified location and return its filename 143 | 144 | `version` should be a valid setuptools version number that is available 145 | as an egg for download under the `download_base` URL (which should end 146 | with a '/'). `to_dir` is the directory where the egg will be downloaded. 147 | `delay` is the number of seconds to pause before an actual download 148 | attempt. 149 | """ 150 | # making sure we use the absolute path 151 | to_dir = os.path.abspath(to_dir) 152 | try: 153 | from urllib.request import urlopen 154 | except ImportError: 155 | from urllib2 import urlopen 156 | tgz_name = "setuptools-%s.tar.gz" % version 157 | url = download_base + tgz_name 158 | saveto = os.path.join(to_dir, tgz_name) 159 | src = dst = None 160 | if not os.path.exists(saveto): # Avoid repeated downloads 161 | try: 162 | log.warn("Downloading %s", url) 163 | src = urlopen(url) 164 | # Read/write all in one block, so we don't create a corrupt file 165 | # if the download is interrupted. 166 | data = src.read() 167 | dst = open(saveto, "wb") 168 | dst.write(data) 169 | finally: 170 | if src: 171 | src.close() 172 | if dst: 173 | dst.close() 174 | return os.path.realpath(saveto) 175 | 176 | 177 | def _extractall(self, path=".", members=None): 178 | """Extract all members from the archive to the current working 179 | directory and set owner, modification time and permissions on 180 | directories afterwards. `path' specifies a different directory 181 | to extract to. `members' is optional and must be a subset of the 182 | list returned by getmembers(). 183 | """ 184 | import copy 185 | import operator 186 | from tarfile import ExtractError 187 | directories = [] 188 | 189 | if members is None: 190 | members = self 191 | 192 | for tarinfo in members: 193 | if tarinfo.isdir(): 194 | # Extract directories with a safe mode. 195 | directories.append(tarinfo) 196 | tarinfo = copy.copy(tarinfo) 197 | tarinfo.mode = 448 # decimal for oct 0700 198 | self.extract(tarinfo, path) 199 | 200 | # Reverse sort directories. 201 | if sys.version_info < (2, 4): 202 | def sorter(dir1, dir2): 203 | return cmp(dir1.name, dir2.name) 204 | directories.sort(sorter) 205 | directories.reverse() 206 | else: 207 | directories.sort(key=operator.attrgetter('name'), reverse=True) 208 | 209 | # Set correct owner, mtime and filemode on directories. 210 | for tarinfo in directories: 211 | dirpath = os.path.join(path, tarinfo.name) 212 | try: 213 | self.chown(tarinfo, dirpath) 214 | self.utime(tarinfo, dirpath) 215 | self.chmod(tarinfo, dirpath) 216 | except ExtractError: 217 | e = sys.exc_info()[1] 218 | if self.errorlevel > 1: 219 | raise 220 | else: 221 | self._dbg(1, "tarfile: %s" % e) 222 | 223 | 224 | def _build_install_args(options): 225 | """ 226 | Build the arguments to 'python setup.py install' on the setuptools package 227 | """ 228 | install_args = [] 229 | if options.user_install: 230 | if sys.version_info < (2, 6): 231 | log.warn("--user requires Python 2.6 or later") 232 | raise SystemExit(1) 233 | install_args.append('--user') 234 | return install_args 235 | 236 | def _parse_args(): 237 | """ 238 | Parse the command line for options 239 | """ 240 | parser = optparse.OptionParser() 241 | parser.add_option( 242 | '--user', dest='user_install', action='store_true', default=False, 243 | help='install in user site package (requires Python 2.6 or later)') 244 | parser.add_option( 245 | '--download-base', dest='download_base', metavar="URL", 246 | default=DEFAULT_URL, 247 | help='alternative URL from where to download the setuptools package') 248 | options, args = parser.parse_args() 249 | # positional arguments are ignored 250 | return options 251 | 252 | def main(version=DEFAULT_VERSION): 253 | """Install or upgrade setuptools and EasyInstall""" 254 | options = _parse_args() 255 | tarball = download_setuptools(download_base=options.download_base) 256 | return _install(tarball, _build_install_args(options)) 257 | 258 | if __name__ == '__main__': 259 | sys.exit(main()) 260 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # lala documentation build configuration file, created by 4 | # sphinx-quickstart on Sat Jul 13 14:47:48 2013. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = [ 29 | 'sphinx.ext.autodoc', 30 | 'sphinx.ext.todo', 31 | 'sphinx.ext.viewcode', 32 | 'numpydoc', 33 | 'sphinxcontrib.mermaid' 34 | ] 35 | numpydoc_show_class_members = False 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # The suffix of source filenames. 40 | source_suffix = ['.rst'] 41 | 42 | # The encoding of source files. 43 | #source_encoding = 'utf-8-sig' 44 | 45 | # The master toctree document. 46 | master_doc = 'index' 47 | 48 | # General information about the project. 49 | project = u'lala' 50 | copyright = u'2017, Edinburgh Genome Foundry' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = '0.1.0' 58 | # The full version, including alpha/beta/rc tags. 59 | release = '0.1.0' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | #language = None 64 | 65 | # There are two options for replacing |today|: either, you set today to some 66 | # non-false value, then it is used: 67 | #today = '' 68 | # Else, today_fmt is used as the format for a strftime call. 69 | #today_fmt = '%B %d, %Y' 70 | 71 | # List of patterns, relative to source directory, that match files and 72 | # directories to ignore when looking for source files. 73 | exclude_patterns = ['_build'] 74 | 75 | # The reST default role (used for this markup: `text`) to use for all documents. 76 | #default_role = None 77 | 78 | # If true, '()' will be appended to :func: etc. cross-reference text. 79 | #add_function_parentheses = True 80 | 81 | # If true, the current module name will be prepended to all description 82 | # unit titles (such as .. function::). 83 | #add_module_names = True 84 | 85 | # If true, sectionauthor and moduleauthor directives will be shown in the 86 | # output. They are ignored by default. 87 | #show_authors = False 88 | 89 | # A list of ignored prefixes for module index sorting. 90 | #modindex_common_prefix = [] 91 | 92 | # If true, keep warnings as "system message" paragraphs in the built documents. 93 | #keep_warnings = False 94 | 95 | 96 | # -- Options for HTML output --------------------------------------------------- 97 | 98 | # The theme to use for HTML and HTML Help pages. See the documentation for 99 | # a list of builtin themes. 100 | 101 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 102 | 103 | if not on_rtd: # only import and set the theme if we're building docs locally 104 | import sphinx_rtd_theme 105 | html_theme = 'sphinx_rtd_theme' 106 | html_theme_path = sphinx_rtd_theme.get_html_theme_path() 107 | def setup(app): 108 | app.add_stylesheet('css/main.css') 109 | else: 110 | html_context = { 111 | 'css_files': [ 112 | 'https://media.readthedocs.org/css/sphinx_rtd_theme.css', 113 | 'https://media.readthedocs.org/css/readthedocs-doc-embed.css', 114 | '_static/css/main.css', 115 | ], 116 | } 117 | #sys.path.append(os.path.abspath('_themes')) 118 | # Theme options are theme-specific and customize the look and feel of a theme 119 | # further. For a list of options available for each theme, see the 120 | # documentation. 121 | #html_theme_options = {} 122 | 123 | # Add any paths that contain custom themes here, relative to this directory. 124 | #html_theme_path = [] 125 | 126 | # The name for this set of Sphinx documents. If None, it defaults to 127 | # " v documentation". 128 | #html_title = None 129 | 130 | # A shorter title for the navigation bar. Default is the same as html_title. 131 | #html_short_title = None 132 | 133 | # The name of an image file (relative to this directory) to place at the top 134 | # of the sidebar. 135 | # html_logo = '_static/images/logo.png' 136 | 137 | # The name of an image file (within the static path) to use as favicon of the 138 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 139 | # pixels large. 140 | #html_favicon = None 141 | 142 | # Add any paths that contain custom static files (such as style sheets) here, 143 | # relative to this directory. They are copied after the builtin static files, 144 | # so a file named "default.css" will overwrite the builtin "default.css". 145 | html_static_path = ['_static'] 146 | 147 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 148 | # using the given strftime format. 149 | #html_last_updated_fmt = '%b %d, %Y' 150 | 151 | # If true, SmartyPants will be used to convert quotes and dashes to 152 | # typographically correct entities. 153 | #html_use_smartypants = True 154 | 155 | # Custom sidebar templates, maps document names to template names. 156 | #html_sidebars = {} 157 | 158 | # Additional templates that should be rendered to pages, maps page names to 159 | # template names. 160 | #html_additional_pages = {} 161 | 162 | # If false, no module index is generated. 163 | #html_domain_indices = True 164 | 165 | # If false, no index is generated. 166 | #html_use_index = True 167 | 168 | # If true, the index is split into individual pages for each letter. 169 | #html_split_index = False 170 | 171 | # If true, links to the reST sources are added to the pages. 172 | #html_show_sourcelink = True 173 | 174 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 175 | #html_show_sphinx = True 176 | 177 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 178 | #html_show_copyright = True 179 | 180 | # If true, an OpenSearch description file will be output, and all pages will 181 | # contain a tag referring to it. The value of this option must be the 182 | # base URL from which the finished HTML is served. 183 | #html_use_opensearch = '' 184 | 185 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 186 | #html_file_suffix = None 187 | 188 | # Output file base name for HTML help builder. 189 | htmlhelp_basename = 'laladoc' 190 | 191 | 192 | # -- Options for LaTeX output -------------------------------------------------- 193 | 194 | latex_elements = { 195 | # The paper size ('letterpaper' or 'a4paper'). 196 | #'papersize': 'letterpaper', 197 | 198 | # The font size ('10pt', '11pt' or '12pt'). 199 | #'pointsize': '10pt', 200 | 201 | # Additional stuff for the LaTeX preamble. 202 | #'preamble': '', 203 | } 204 | 205 | # Grouping the document tree into LaTeX files. List of tuples 206 | # (source start file, target name, title, author, documentclass [howto/manual]). 207 | latex_documents = [ 208 | ('index', 'lala.tex', u'lala Documentation', 209 | u'Zulko', 'manual'), 210 | ] 211 | 212 | # The name of an image file (relative to this directory) to place at the top of 213 | # the title page. 214 | #latex_logo = None 215 | 216 | # For "manual" documents, if this is true, then toplevel headings are parts, 217 | # not chapters. 218 | #latex_use_parts = False 219 | 220 | # If true, show page references after internal links. 221 | #latex_show_pagerefs = False 222 | 223 | # If true, show URL addresses after external links. 224 | #latex_show_urls = False 225 | 226 | # Documents to append as an appendix to all manuals. 227 | #latex_appendices = [] 228 | 229 | # If false, no module index is generated. 230 | #latex_domain_indices = True 231 | 232 | 233 | # -- Options for manual page output -------------------------------------------- 234 | 235 | # One entry per manual page. List of tuples 236 | # (source start file, name, description, authors, manual section). 237 | man_pages = [ 238 | ('index', 'lala', u'PACKAGE_NAME Documentation', 239 | [u'Zulko'], 1) 240 | ] 241 | 242 | # If true, show URL addresses after external links. 243 | #man_show_urls = False 244 | 245 | 246 | # -- Options for Texinfo output ------------------------------------------------ 247 | 248 | # Grouping the document tree into Texinfo files. List of tuples 249 | # (source start file, target name, title, author, 250 | # dir menu entry, description, category) 251 | texinfo_documents = [ 252 | ('index', 'lala', u'lala Documentation', 253 | u'Zulko', 'lala', 'One line description of project.', 254 | 'Miscellaneous'), 255 | ] 256 | 257 | # Documents to append as an appendix to all manuals. 258 | #texinfo_appendices = [] 259 | 260 | # If false, no module index is generated. 261 | #texinfo_domain_indices = True 262 | 263 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 264 | #texinfo_show_urls = 'footnote' 265 | 266 | # If true, do not generate a @detailmenu in the "Top" node's menu. 267 | #texinfo_no_detailmenu = False 268 | 269 | 270 | # -- Options for Epub output --------------------------------------------------- 271 | 272 | # Bibliographic Dublin Core info. 273 | epub_title = u'lala' 274 | epub_author = u'Zulko' 275 | epub_publisher = u'Zulko' 276 | epub_copyright = u'2016, Zulko' 277 | 278 | # The language of the text. It defaults to the language option 279 | # or en if the language is not set. 280 | #epub_language = '' 281 | 282 | # The scheme of the identifier. Typical schemes are ISBN or URL. 283 | #epub_scheme = '' 284 | 285 | # The unique identifier of the text. This can be a ISBN number 286 | # or the project homepage. 287 | #epub_identifier = '' 288 | 289 | # A unique identification for the text. 290 | #epub_uid = '' 291 | 292 | # A tuple containing the cover image and cover page html template filenames. 293 | #epub_cover = () 294 | 295 | # A sequence of (type, uri, title) tuples for the guide element of content.opf. 296 | #epub_guide = () 297 | 298 | # HTML files that should be inserted before the pages created by sphinx. 299 | # The format is a list of tuples containing the path and title. 300 | #epub_pre_files = [] 301 | 302 | # HTML files shat should be inserted after the pages created by sphinx. 303 | # The format is a list of tuples containing the path and title. 304 | #epub_post_files = [] 305 | 306 | # A list of files that should not be packed into the epub file. 307 | #epub_exclude_files = [] 308 | 309 | # The depth of the table of contents in toc.ncx. 310 | #epub_tocdepth = 3 311 | 312 | # Allow duplicate toc entries. 313 | #epub_tocdup = True 314 | 315 | # Fix unsupported image types using the PIL. 316 | #epub_fix_images = False 317 | 318 | # Scale large images. 319 | #epub_max_image_width = 0 320 | 321 | # If 'no', URL addresses will not be shown. 322 | #epub_show_urls = 'inline' 323 | 324 | # If false, no index is generated. 325 | #epub_use_index = True 326 | 327 | #autodoc_member_order = 'bysource' 328 | -------------------------------------------------------------------------------- /lala/WebLogs.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import re 4 | import time 5 | import subprocess as sp 6 | import urllib 7 | import os 8 | import gzip 9 | from io import BytesIO 10 | import socket 11 | 12 | import pygeoip 13 | import pandas 14 | import proglog 15 | from pdf_reports import pug_to_html, write_report 16 | 17 | from .conf import conf 18 | 19 | import numpy as np 20 | from matplotlib import cm 21 | import matplotlib.pyplot as plt 22 | from matplotlib.ticker import MaxNLocator 23 | 24 | try: 25 | import cartopy 26 | import cartopy.io.shapereader as shpreader 27 | import cartopy.crs as ccrs 28 | shpfilename = shpreader.natural_earth(resolution='110m', 29 | category='cultural', 30 | name='admin_0_countries') 31 | reader = shpreader.Reader(shpfilename) 32 | countries = list(reader.records()) 33 | name_to_geometry = { 34 | country.attributes[e]: country.geometry 35 | for country in countries 36 | for e in ('ADM0_A3', 'BRK_NAME') 37 | } 38 | name_to_extent = { 39 | name: geometry.bounds 40 | for name, geometry in name_to_geometry.items() 41 | } 42 | CARTOPY_INSTALLED = True 43 | 44 | except ImportError: 45 | name_to_geometry = None 46 | name_to_extent = None 47 | cartopy = None 48 | ccrs = None 49 | CARTOPY_INSTALLED = False 50 | 51 | 52 | if not os.path.exists(conf['geolite_path']): 53 | response = urllib.request.urlopen(conf['geolite_url']) 54 | geolite_gz = response.read() 55 | geolite_bites = BytesIO(geolite_gz) 56 | with gzip.open(geolite_bites, 'rb') as f: 57 | geolite_content = f.read() 58 | if not os.path.exists(conf['data_dir']): 59 | os.makedirs(conf['data_dir']) 60 | with open(conf['geolite_path'], 'wb') as f: 61 | f.write(geolite_content) 62 | 63 | geoip = pygeoip.GeoIP(conf['geolite_path']) 64 | 65 | durations = { 66 | 'second': 1, 67 | 'minute': 60, 68 | 'hour': 60 * 60, 69 | 'day': 60 * 60 * 24, 70 | 'week': 60 * 60 * 24 * 7, 71 | 'month': 60 * 60 * 24 * 30, 72 | 'year': 60 * 60 * 24 * 365, 73 | } 74 | def time_of_last(num, duration): 75 | """Returns the EPOCH time (in seconds) of XX ago (relative to the present). 76 | 77 | Examples 78 | -------- 79 | 80 | >>> time_of_last(2, 'week') # => EPOCH time of two weeks ago 81 | >>> time_of_last(5, 'hour') # => EPOCH time of five hours ago 82 | """ 83 | return time.time() - num * durations[duration] 84 | 85 | def get_remote_file_content(filename='/var/log/nginx/access.log', 86 | host='localhost', user='root', decode='utf8', 87 | target=None): 88 | """ 89 | Parameters 90 | ---------- 91 | 92 | filename 93 | path to the file in the host machine 94 | 95 | host 96 | IP address or domain name of the host. 97 | 98 | user 99 | Username on the host. 100 | 101 | decode 102 | If not None, the file content received from the server will be 103 | decoded into a string using this format. 104 | """ 105 | proc = sp.Popen(['ssh', '%s@%s' % (user, host), 'cat %s' % filename], 106 | stderr=sp.PIPE, stdout=sp.PIPE) 107 | out, err = proc.communicate() 108 | if len(err): 109 | raise IOError(err) 110 | if decode is not None: 111 | out = out.decode(decode) 112 | if target is not None: 113 | with open(target, "w") as f: 114 | f.write(out) 115 | return out 116 | 117 | def init_map(figsize=(12, 8), extent=(-150, 60, -25, 60)): 118 | """Initialize a world map with the given dimensions. 119 | 120 | ``figsize`` is the figure's size in inches. ``extent`` is the boundaries 121 | of the map, in its own PlateCarree coordinates. 122 | """ 123 | if not CARTOPY_INSTALLED: 124 | raise ImportError('This feature requires Cartopy installed.') 125 | ax = plt.axes(projection=cartopy.crs.PlateCarree()) 126 | ax.add_feature(cartopy.feature.LAND) 127 | ax.add_feature(cartopy.feature.OCEAN) 128 | ax.add_feature(cartopy.feature.COASTLINE) 129 | ax.add_feature(cartopy.feature.BORDERS, linestyle='-', alpha=.5) 130 | 131 | ax.set_extent(extent) 132 | ax.figure.set_size_inches(figsize) 133 | return ax 134 | 135 | class WebLogs(pandas.DataFrame): 136 | "Custom Pandas dataframe class for reading web logs." 137 | def __init__(self, *args, **kw): 138 | super(WebLogs, self).__init__(*args, **kw) 139 | 140 | @property 141 | def _constructor(self): 142 | return WebLogs 143 | 144 | @staticmethod 145 | def from_nginx_weblogs(filepath=None, log_lines=None): 146 | """Return a dataframe of access log entries, from lines of NGINX logs. 147 | 148 | The log_lines are a list of strings, each representing one access 149 | logged by NGINX. 150 | """ 151 | if log_lines is None: 152 | with open(filepath, 'r') as f: 153 | log_lines = f.read().split("\n") 154 | regexpr = r'(.*) -(.*) - \[(.*)\] "(.*)" (\d+) (\d+) "(.*)" "(.*)"' 155 | regexpr = re.compile(regexpr) 156 | errored_lines = [] 157 | records = [] 158 | for i, line in enumerate(log_lines): 159 | match = re.match(regexpr, line) 160 | fields = ('IP', 'stuff', 'date', 'request', 'response', 'status', 161 | 'referrer', 'browser') 162 | if match is None: 163 | errored_lines.append(i) 164 | else: 165 | records.append(dict(zip(fields, match.groups()))) 166 | weblogs = WebLogs.from_records(records) 167 | weblogs['parsed_date'] = [ 168 | datetime.strptime(s, '%d/%b/%Y:%H:%M:%S %z') 169 | for s in weblogs['date'] 170 | ] 171 | weblogs['timestamp'] = [x.timestamp() 172 | for x in weblogs['parsed_date']] 173 | fields = ['country_name', 'city', 'country_code3', 'latitude', 174 | 'longitude'] 175 | d = {f: [] for f in fields} 176 | for ip in weblogs.IP: 177 | rec = geoip.record_by_addr(ip) 178 | if rec is None: 179 | rec = {field: None for field in fields} 180 | for field in fields: 181 | d[field].append(rec[field]) 182 | for field in fields: 183 | weblogs[field] = d[field] 184 | 185 | methods, urls, https = zip(*[ 186 | request.split() 187 | if len(request.split()) == 3 188 | else (None, None, None) 189 | for request in weblogs.request 190 | ]) 191 | for name, data in [('method', methods), 192 | ('url', urls), 193 | ('http', https)]: 194 | weblogs[name] = data 195 | 196 | return weblogs, errored_lines 197 | 198 | @staticmethod 199 | def from_weblogs_spreadsheet(filepath=None): 200 | if filepath.lower().endswith((".csv")): 201 | dataframe = pandas.read_csv(filepath) 202 | else: 203 | dataframe = pandas.read_excel(filepath) 204 | return WebLogs(dataframe) 205 | 206 | 207 | def identify_ips_domains(self, logger='bar', known_ips=None): 208 | """Add a `ip_owner` column to self.""" 209 | if isinstance(known_ips, pandas.DataFrame): 210 | known_ips = { 211 | row.IP: row.domain 212 | for i, row in known_ips.iterrows() 213 | } 214 | if known_ips is None: 215 | known_ips = {} 216 | if logger == 'bar': 217 | logger = proglog.TqdmProgressBarLogger() 218 | 219 | ips_domains = {} 220 | for ip in logger.iter_bar(ip=list(set(self.IP))): 221 | if ip in known_ips: 222 | ips_domains[ip] = known_ips[ip] 223 | else: 224 | try: 225 | ips_domains[ip] = known_ips[ip] = socket.getfqdn(ip) 226 | except socket.herror: 227 | ips_domains[ip] = 'Unknown' 228 | self.loc[:, 'domain'] = [ips_domains[ip] for ip in self.IP] 229 | return known_ips 230 | 231 | def blacklist_ips(self, ips_blacklist): 232 | """Return a new version of self minus the blacklisted ips.""" 233 | ips_set = set(self.IP) 234 | blacklisted_ips = set([ 235 | ip for ip in ips_set 236 | if ip in ips_blacklist 237 | ]) 238 | return self[[ 239 | ip not in blacklisted_ips 240 | for ip in self.IP 241 | ]] 242 | 243 | def entries_last(self, num, duration): 244 | """Returns the weblogs of the latest entries up to XX ago. 245 | 246 | Examples 247 | -------- 248 | 249 | >>> # Filter out all entries more than 1 hour old 250 | >>> last_hour_weblogs = self.entries_last(1, 'hour') 251 | >>> # Filter out all entries more than 5 days old 252 | >>> last_days_weblogs = self.entries_last(5, 'days') 253 | """ 254 | return self[self.timestamp >= time_of_last(num, duration)] 255 | 256 | def filter_by_text_search(self, terms, are_in=None, not_in=None): 257 | """Return a filtered version of self based on searched terms. 258 | """ 259 | 260 | if not_in is not None: 261 | field = not_in 262 | def filtr(v): 263 | return (v is not None) and isinstance(v, str) and not any([ 264 | term in v for term in terms 265 | ]) 266 | else: 267 | field = are_in 268 | def filtr(v): 269 | return (v is not None) and isinstance(v, str) and any([ 270 | term in v for term in terms 271 | ]) 272 | field_dict = { 273 | val: filtr(val) 274 | for val in set(self[field]) 275 | } 276 | indices = [field_dict[v] for v in self[field]] 277 | return self[indices] 278 | 279 | def cluster_dates(self, max_interval=60): 280 | dates_intervals = [[self.parsed_date[0], self.parsed_date[0]]] 281 | for date in self.parsed_date[1:]: 282 | interval = (date - dates_intervals[-1][-1]).total_seconds() 283 | if interval < max_interval: 284 | dates_intervals[-1][-1] = date 285 | else: 286 | dates_intervals.append([date, date]) 287 | return dates_intervals 288 | 289 | def visitors_and_visits(self, max_visits_interval=60, per='IP'): 290 | return { 291 | ip: df.cluster_dates(max_interval=max_visits_interval) 292 | for ip, df in self.groupby(per) 293 | if ip is not None 294 | } 295 | 296 | def most_frequent_visitors(self, criterion='n_visits', n_visitors='all', 297 | max_visits_interval=60, per='IP'): 298 | visitors = self.visitors_and_visits( 299 | max_visits_interval=max_visits_interval, per=per) 300 | if n_visitors == 'all': 301 | n_visitors = len(visitors.keys()) 302 | 303 | criterion_function = { 304 | 'n_visits': lambda visits: len(visits), 305 | 'time_spent': lambda visits: sum([(v[1] - v[0]).total_seconds() 306 | for v in visits]) / 60.0 307 | }[criterion] 308 | 309 | return sorted([ 310 | (criterion_function(visits), visitor) 311 | for visitor, visits in visitors.items() 312 | ])[::-1][:n_visitors] 313 | 314 | def visitors_locations(self): 315 | return { 316 | ip: " ".join([ 317 | df.iloc[0].city if df.iloc[0].city else "", 318 | df.iloc[0].country_name if df.iloc[0].country_name else "" 319 | ]) 320 | for ip, df in self.groupby('IP') 321 | } 322 | 323 | 324 | def countries_colormap(self, mini='auto', maxi='auto', ax=None): 325 | """Plot a colormap of different countries, return the Matplotlib ax. 326 | 327 | Parameters 328 | ---------- 329 | country_values 330 | A list of couples (coutry_name, value) 331 | 332 | mini, maxi 333 | Extreme values leading to read or white colors. Leave to auto to adjust 334 | this range to the values of country_values. 335 | 336 | ax 337 | A Matplotlib ax with a representation of the world. If None, one is 338 | created automatically 339 | """ 340 | if not CARTOPY_INSTALLED: 341 | raise ImportError('This feature requires Cartopy installed.') 342 | country_values = self.country_name.value_counts() 343 | countries = country_values.index 344 | values = country_values.values 345 | if mini == 'auto': 346 | mini = values.min() 347 | if maxi == 'auto': 348 | maxi = values.max() 349 | values = (values - mini) / (maxi - mini) 350 | country_values = zip(countries, values) 351 | 352 | if ax is None: 353 | ax = init_map(figsize=(12, 8), extent=(-150, 60, -25, 60)) 354 | for (country_name, value) in country_values: 355 | if country_name not in name_to_geometry: 356 | continue 357 | color = cm.YlOrBr(value) 358 | ax.add_geometries(name_to_geometry[country_name], ccrs.PlateCarree(), 359 | facecolor=color) 360 | return ax 361 | 362 | 363 | def plot_geo_positions(self, ax=None, country_colors=True): 364 | """Plot circles on a map around positions of the entries in the access log. 365 | 366 | Parameters 367 | ---------- 368 | 369 | ax 370 | Matplotlib ax with a representation of the world. 371 | """ 372 | if not CARTOPY_INSTALLED: 373 | raise ImportError('This feature requires Cartopy installed.') 374 | if ax is None: 375 | ax = init_map(figsize=(12, 8), extent=(-150, 60, -25, 60)) 376 | if country_colors: 377 | self.countries_colormap(mini='auto', maxi='auto', ax=ax) 378 | 379 | counts = [ 380 | (len(dataframe_), ll) 381 | for (ll, dataframe_) in self.groupby(['longitude', 'latitude']) 382 | ] 383 | counts, xy = zip(*(sorted(counts)[::-1])) 384 | counts = 1.0 * np.array(counts) 385 | counts = np.maximum(5, 600 * counts / counts.max()) 386 | xx, yy = [list(e) for e in zip(*xy)] 387 | ax.scatter(xx, yy, c='w', s=counts, zorder=2000, linewidths=2, 388 | edgecolor='k', transform=ccrs.Geodetic()) 389 | return ax 390 | 391 | 392 | def plot_piechart(self, column, ax=None): 393 | """Plot circles on a map around positions of the entries in the access log. 394 | 395 | Parameters 396 | ---------- 397 | 398 | column 399 | name of the column to plot 400 | 401 | ax 402 | Matplotlib ax on which to plot the pie chart. If None, one is created 403 | automatically. 404 | """ 405 | count = self[column].value_counts() 406 | if ax is None: 407 | fig, ax = plt.subplots(1) 408 | ax = count.plot(kind='pie', ax=ax) 409 | ax.set_aspect('equal') 410 | ax.set_ylabel('') 411 | return ax, count 412 | 413 | 414 | def plot_timeline(self, bins_per_day=4, ax=None): 415 | """Plot a time profile of access. 416 | 417 | Parameters 418 | ---------- 419 | 420 | bins_per_day 421 | number of time points per day. 422 | 423 | ax 424 | Matplotlib ax on which to plot the profile. If None, one is created 425 | automatically. 426 | """ 427 | mini, maxi = self['timestamp'].min(), self['timestamp'].max() 428 | bins = int(bins_per_day * (maxi - mini) / durations['day']) 429 | if ax is None: 430 | fig, ax = plt.subplots(1, figsize=(12, 3)) 431 | self['timestamp'].plot(kind='hist', bins=bins, alpha=0.6) 432 | x_ticks = ax.get_xticks() 433 | xlabels = [datetime.fromtimestamp(int(x)).strftime('%Y-%m-%d') 434 | for x in x_ticks] 435 | ax.set_xticklabels(xlabels, rotation=45) 436 | ax.set_xlim(mini, maxi) 437 | ax.set_ylabel('occurences') 438 | return ax 439 | 440 | 441 | 442 | def plot_most_frequent_visitors(self, plot_ips=True, n_visitors='all', 443 | criterion='n_visits'): 444 | visitors_locations = self.visitors_locations() 445 | most_frequent = self.most_frequent_visitors( 446 | criterion=criterion, n_visitors=n_visitors) 447 | label = { 448 | 'n_visits': 'Number of visits', 449 | 'time_spent': 'Time spent (mins)' 450 | }[criterion] 451 | fig, ax = plt.subplots(1) 452 | scores, visitors = zip(*most_frequent) 453 | if visitors_locations is not None: 454 | visitors = [ 455 | v + " - " + visitors_locations[v] 456 | for v in visitors 457 | ] 458 | ticks = list(range(len(scores)))[::-1] 459 | ax.bar(left=1, height=0.5, bottom=ticks, width=scores, 460 | tick_label=visitors if plot_ips else None, 461 | orientation='horizontal', alpha=0.6) 462 | 463 | # Hide the right and top spines 464 | ax.spines['right'].set_visible(False) 465 | ax.spines['top'].set_visible(False) 466 | 467 | # Only show ticks on the left and bottom spines 468 | ax.yaxis.set_ticks_position('left') 469 | ax.xaxis.set_ticks_position('bottom') 470 | ax.set_xlabel(label) 471 | ax.xaxis.set_major_locator(MaxNLocator(integer=True)) 472 | if not plot_ips: 473 | ax.set_ylabel('Visitors') 474 | return ax 475 | 476 | def write_report(self, template_path=None, template_string=None, 477 | target=None, stylesheets=(), **context): 478 | html = pug_to_html(path=template_path, 479 | string=template_string, 480 | weblogs=self, **context) 481 | return write_report(html, target=target, extra_stylesheets=stylesheets) 482 | --------------------------------------------------------------------------------