├── lala
    ├── version.py
    ├── __init__.py
    ├── conf.py
    └── WebLogs.py
├── docs
    ├── makehtml.sh
    ├── _static
    │   ├── images
    │   │   ├── logo.png
    │   │   ├── report.jpeg
    │   │   ├── pw_maze_dark.png
    │   │   └── dataframe_example.png
    │   └── css
    │   │   └── main.css
    ├── ref
    │   └── ref.rst
    ├── README.md
    ├── examples
    │   ├── report_example.rst
    │   └── basic_example.rst
    ├── index.rst
    ├── Makefile
    ├── make.bat
    └── conf.py
├── MANIFEST.in
├── examples
    ├── report_example.pdf
    ├── basic_example_piechart.png
    ├── basic_example_timeline.png
    ├── basic_example_worldmap.png
    ├── basic_example_frequent_visitors.png
    ├── report_example.py
    ├── basic_example.py
    └── data
    │   └── example_template.pug
├── .gitignore
├── setup.py
├── .travis.yml
├── LICENCE.txt
├── tests
    ├── test_basics.py
    └── data
    │   └── template.pug
├── README.rst
└── ez_setup.py


/lala/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.1.4"
2 | 


--------------------------------------------------------------------------------
/docs/makehtml.sh:
--------------------------------------------------------------------------------
1 | make html
2 | firefox ../../built_docs/html/index.html
3 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | recursive-include examples *.txt *.py
3 | include ez_setup.py
4 | 


--------------------------------------------------------------------------------
/docs/_static/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/docs/_static/images/logo.png


--------------------------------------------------------------------------------
/examples/report_example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/examples/report_example.pdf


--------------------------------------------------------------------------------
/docs/_static/images/report.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/docs/_static/images/report.jpeg


--------------------------------------------------------------------------------
/docs/_static/images/pw_maze_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/docs/_static/images/pw_maze_dark.png


--------------------------------------------------------------------------------
/examples/basic_example_piechart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/examples/basic_example_piechart.png


--------------------------------------------------------------------------------
/examples/basic_example_timeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/examples/basic_example_timeline.png


--------------------------------------------------------------------------------
/examples/basic_example_worldmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/examples/basic_example_worldmap.png


--------------------------------------------------------------------------------
/docs/_static/images/dataframe_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/docs/_static/images/dataframe_example.png


--------------------------------------------------------------------------------
/examples/basic_example_frequent_visitors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/HEAD/examples/basic_example_frequent_visitors.png


--------------------------------------------------------------------------------
/docs/ref/ref.rst:
--------------------------------------------------------------------------------
1 | .. _reference:
2 | 
3 | lala Reference manual
4 | ==========================
5 | 
6 | 
7 | .. autoclass:: lala.WebLogs
8 |    :members:
9 | 


--------------------------------------------------------------------------------
/lala/__init__.py:
--------------------------------------------------------------------------------
1 | """ dna_sequencing_viewer/__init__.py """
2 | 
3 | # __all__ = []
4 | 
5 | from .conf import conf
6 | from .WebLogs import WebLogs
7 | from .version import __version__
8 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | This directory contains the sources of the documentation.
2 | 
3 | To be able to compile the source, install the dependencies with
4 | ::
5 |     sudo pip install sphinx sphinx_rtd_theme numpydoc sphinxcontrib-mermaid
6 | 


--------------------------------------------------------------------------------
/lala/conf.py:
--------------------------------------------------------------------------------
 1 | import appdirs
 2 | import os
 3 | 
 4 | data_dir = appdirs.user_data_dir('lala', 'EGF')
 5 | conf = {
 6 |     'data_dir': data_dir,
 7 |     'geolite_url': "http://geolite.maxmind.com/download/"
 8 |                    "geoip/database/GeoLiteCity.dat.gz",
 9 |     'geolite_path': os.path.join(data_dir, 'GeoLiteCity.dat')
10 | }
11 | 


--------------------------------------------------------------------------------
/docs/examples/report_example.rst:
--------------------------------------------------------------------------------
 1 | .. _report_example:
 2 | 
 3 | Report example
 4 | ---------------
 5 | 
 6 | An minimal report example:
 7 | 
 8 | .. literalinclude:: ../../examples/report_example.py
 9 | 
10 | Output (`file link <https://github.com/Edinburgh-Genome-Foundry/lala/raw/master/examples/report_example.pdf>`_):
11 | 
12 | .. image:: ../_static/images/report.jpeg
13 |    :alt: [report]
14 |    :align: center
15 |    :width: 550px
16 | 


--------------------------------------------------------------------------------
/examples/report_example.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from lala import WebLogs
 3 | 
 4 | example_logs_path = os.path.join('data', 'example_logs.txt')
 5 | template_path = os.path.join('data', 'example_template.pug')
 6 | 
 7 | weblogs, errored_lines = WebLogs.from_nginx_weblogs(example_logs_path)
 8 | 
 9 | print ("Now identifying IP addresses")
10 | weblogs.identify_ips_domains()
11 | 
12 | print ("Now writing the report")
13 | weblogs.write_report(template_path=template_path, target="report_example.pdf")
14 | 


--------------------------------------------------------------------------------
/docs/_static/css/main.css:
--------------------------------------------------------------------------------
 1 | body, h1, h2, h3 {
 2 |     font-family: Raleway;
 3 | }
 4 | 
 5 | .wy-nav-content-wrap {
 6 |     background: none
 7 | }
 8 | 
 9 | .wy-nav-side {
10 |     background-image: url("../images/pw_maze_dark.png");
11 | }
12 | 
13 | .wy-menu-vertical a {
14 |     color: black
15 | }
16 | 
17 | .wy-menu-vertical a:hover {
18 |     background-color: #e7cfd4
19 | }
20 | 
21 | .wy-side-nav-search, .wy-nav-top {
22 |     background-color: #d0d0e7;
23 |     background-image: url("../images/pw_maze_dark.png");
24 | }
25 | 
26 | .edgeLabel {
27 |   background-color: #fcfcfc;
28 | }
29 | 
30 | 
31 | .section {
32 |     opacity: 1.0 !important;
33 | }
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | *.tar.gz
10 | dist
11 | build
12 | eggs
13 | parts
14 | bin
15 | var
16 | sdist
17 | develop-eggs
18 | .installed.cfg
19 | lib
20 | lib64
21 | __pycache__
22 | 
23 | # Sublime
24 | .sublime-project
25 | 
26 | # Installer logs
27 | pip-log.txt
28 | 
29 | # Unit test / coverage reports
30 | .coverage
31 | .tox
32 | nosetests.xml
33 | 
34 | # Translations
35 | *.mo
36 | 
37 | # Mr Developer
38 | .mr.developer.cfg
39 | .project
40 | .pydevproject
41 | 
42 | # Temp files
43 | 
44 | *~
45 | 
46 | # Pipy codes
47 | 
48 | .pypirc
49 | 
50 | examples/reports
51 | examples/features/downloaded_data
52 | 
53 | .cache
54 | 


--------------------------------------------------------------------------------
/docs/examples/basic_example.rst:
--------------------------------------------------------------------------------
 1 | .. _basic_example:
 2 | 
 3 | Basic example
 4 | -------------
 5 | 
 6 | An example showcasing some Lala routines.
 7 | 
 8 | .. literalinclude:: ../../examples/basic_example.py
 9 | 
10 | **Outputs:**
11 | 
12 | .. image:: ../../examples/basic_example_piechart.png
13 |    :alt: [piechart]
14 |    :align: center
15 |    :width: 300px
16 | 
17 | .. image:: ../../examples/basic_example_timeline.png
18 |   :alt: [piechart]
19 |   :align: center
20 |   :width: 550px
21 | 
22 | .. image:: ../../examples/basic_example_worldmap.png
23 |    :alt: [piechart]
24 |    :align: center
25 |    :width: 550px
26 | 
27 | .. image:: ../../examples/basic_example_frequent_visitors.png
28 |   :alt: [piechart]
29 |   :align: center
30 |   :width: 350px
31 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import ez_setup
 2 | 
 3 | ez_setup.use_setuptools()
 4 | 
 5 | from setuptools import setup, find_packages
 6 | 
 7 | exec(open("lala/version.py").read())  # loads __version__
 8 | 
 9 | setup(
10 |     name="python-lala",
11 |     version=__version__,
12 |     author="Zulko",
13 |     description="Library of web access log analysis",
14 |     long_description=open("README.rst").read(),
15 |     long_description_content_type="text/x-rst",
16 |     license="MIT",
17 |     keywords="access log analysis website webservice stats",
18 |     packages=find_packages(exclude="docs"),
19 |     install_requires=[
20 |         "appdirs",
21 |         "numpy",
22 |         "matplotlib",
23 |         "Pillow",
24 |         "pygeoip",
25 |         "pandas",
26 |         "scipy",
27 |         "proglog",
28 |         "pdf_reports",
29 |     ],
30 | )
31 | 


--------------------------------------------------------------------------------
/examples/basic_example.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from lala import WebLogs
 3 | 
 4 | # LOAD ALL RECORDS TO ANALYSE AND AVAILABLE PRIMERS
 5 | logs_path = os.path.join('data', 'example_logs.txt')
 6 | weblogs, errored_lines = WebLogs.from_nginx_weblogs(logs_path)
 7 | 
 8 | # PLOT COUNTRIES PIE CHART
 9 | ax, country_values = weblogs.plot_piechart('country_name')
10 | ax.figure.set_size_inches((5, 5))
11 | ax.figure.savefig('basic_example_piechart.png', bbox_inches='tight')
12 | 
13 | # PLOT COUNTRIES MAP
14 | ax = weblogs.plot_geo_positions()
15 | ax.figure.savefig('basic_example_worldmap.png', bbox_inches='tight')
16 | 
17 | # PLOT UK CONNECTIONS TIMELINE
18 | ag_entries = weblogs[weblogs.country_name == 'Argentina']
19 | ax = ag_entries.plot_timeline(bins_per_day=2)
20 | ax.figure.savefig('basic_example_timeline.png', bbox_inches='tight')
21 | 
22 | # PLOT MOST FREQUENT VISITORS
23 | most_frequent_visitors = weblogs.most_frequent_visitors(n_visitors=5)
24 | ax = weblogs.plot_most_frequent_visitors(n_visitors=5)
25 | ax.figure.savefig('basic_example_frequent_visitors.png', bbox_inches='tight')
26 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.6"
 4 | # command to install dependencies. Credit where credit is due:
 5 | # https://laszukdawid.com/2017/06/04/installing-cartopy-on-ubuntu-14-04-or-travis-ci/
 6 | before_install:
 7 |   - sudo apt-get -qq update
 8 |   - sudo apt-get install libproj-dev proj-bin proj-data libgeos-dev
 9 |   - sudo apt-get install -y python-pyproj
10 |   - sudo apt-get install python-scipy
11 |   - sudo apt-get install -y libc6
12 |   - wget http://es.archive.ubuntu.com/ubuntu/pool/universe/p/proj/libproj9_4.9.2-2_amd64.deb
13 |   - sudo dpkg -i libproj9_4.9.2-2_amd64.deb
14 |   - wget http://es.archive.ubuntu.com/ubuntu/pool/universe/p/proj/libproj-dev_4.9.2-2_amd64.deb
15 |   - sudo dpkg -i libproj-dev_4.9.2-2_amd64.deb
16 | install:
17 |   - pip install coveralls pytest-cov pytest
18 |   - pip install --no-binary shapely shapely
19 |   - pip install cython
20 |   - pip install cartopy
21 |   - pip install -e .
22 | # command to run tests
23 | script:
24 |   - python -m pytest -v --cov lala --cov-report term-missing
25 | 
26 | after_success:
27 |   - coveralls
28 | 


--------------------------------------------------------------------------------
/LICENCE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | [OSI Approved License]
 3 | 
 4 | The MIT License (MIT)
 5 | 
 6 | Copyright (c) 2018 Edinburgh Genome Foundry
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/tests/test_basics.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import matplotlib
 3 | matplotlib.use("Agg")
 4 | from lala import WebLogs
 5 | 
 6 | access_log_path = os.path.join('tests', 'data', "test_logs.txt")
 7 | template_path = os.path.join('tests', 'data', "template.pug")
 8 | 
 9 | def test_basics(tmpdir):
10 | 
11 |     # LOAD ALL RECORDS TO ANALYSE AND AVAILABLE PRIMERS
12 |     weblogs, errored_lines = WebLogs.from_nginx_weblogs(access_log_path)
13 | 
14 |     # PLOT COUNTRIES PIE CHART
15 |     ax, country_values = weblogs.plot_piechart('country_name')
16 | 
17 |     # PLOT COUNTRIES MAP
18 |     weblogs.plot_geo_positions()
19 | 
20 |     # PLOT UK CONNECTIONS TIMELINE
21 |     ag_weblogs = weblogs[weblogs.country_name == 'Argentina']
22 |     ax = ag_weblogs.plot_timeline(bins_per_day=2)
23 | 
24 |     # COMPUTE THE VISITORS/VISITS
25 |     visitors = weblogs.visitors_and_visits()
26 |     assert len(visitors) == 88
27 | 
28 |     # visitors_locations = weblogs.visitors_locations()
29 |     most_frequent_visitors = weblogs.most_frequent_visitors(n_visitors=5)
30 |     assert len(most_frequent_visitors) == 5
31 |     weblogs.plot_most_frequent_visitors()
32 | 
33 |     sub_weblogs = weblogs[-50:]
34 |     sub_weblogs.identify_ips_domains()
35 |     filtered_weblogs = sub_weblogs.filter_by_text_search(
36 |         terms=['googlebot', 'spider.yandex', 'baidu', 'msnbot'],
37 |         not_in='domain'
38 |     )
39 |     assert len(filtered_weblogs) == 50
40 | 
41 | def test_template(tmpdir):
42 |     # LOAD ALL RECORDS TO ANALYSE AND AVAILABLE PRIMERS
43 |     weblogs, errored_lines = WebLogs.from_nginx_weblogs(access_log_path)
44 |     sub_weblogs = weblogs[-50:]
45 |     sub_weblogs.identify_ips_domains()
46 |     target_path = os.path.join(str(tmpdir), "output.pdf")
47 |     sub_weblogs.write_report(template_path=template_path, target=target_path)
48 | 


--------------------------------------------------------------------------------
/tests/data/template.pug:
--------------------------------------------------------------------------------
 1 | #sidebar: p Generated using Lala for Python, on {{ pdf_tools.now() }}
 2 | 
 3 | h1(style="margin-top: 0") 30-day server logs analysis
 4 | 
 5 | .ui.piled.segment(style="margin-top: 0")
 6 |   :markdown
 7 |     These are statistics from the logs of the [EGF-CUBA](http://cuba.genomefoundry.org/)
 8 |     website. While the website doest not use cookies or collect personal data or files,
 9 |     the Django server logs provide interesting website usage information.
10 | 
11 | - var blacklist = ['googlebot', 'spider.yandex', 'baidu', 'msnbot']
12 | - var weblogs = weblogs.filter_by_text_search(terms=blacklist, not_in='domain')
13 | - var requests = weblogs.filter_by_text_search(terms=['start/'], are_in='request')
14 | 
15 | 
16 | .ui.grid
17 |   .statistics.two.wide.column
18 |     .ui.statistic.tiny
19 |       .value= weblogs.index.size
20 |       .label Visits
21 |     .ui.statistic.tiny
22 |       .value= weblogs.IP.unique().size
23 |       .label Unique visitors
24 |     .ui.statistic.tiny
25 |       .value= requests.index.size
26 |       .label Requests
27 |   .twelve.wide.column
28 |     - var figure = weblogs.plot_geo_positions()
29 |     img(src="{{ pdf_tools.figure_data(figure) }}")
30 | 
31 | 
32 | h3(style='margin-top: 0') Visitors per day
33 | 
34 | - var figure = weblogs.plot_timeline(bins_per_day=1)
35 | img(src="{{ pdf_tools.figure_data(figure, (12, 1.5)) }}")
36 | 
37 | 
38 | .tables
39 | 
40 |   h3 Requests
41 |   
42 |   - var request_counts = requests.url.value_counts().to_frame()
43 |   {{ pdf_tools.dataframe_to_html(request_counts, index=1, header=0) }}
44 | 
45 |   h3 Requests by country
46 |  
47 |   - var countries_counts = requests.country_name.value_counts().to_frame()
48 |   {{ pdf_tools.dataframe_to_html(countries_counts, index=1, header=0) }}
49 |   
50 |   h3 Requests by city
51 |  
52 |   - var city_counts = requests.city.value_counts().to_frame()
53 |   {{ pdf_tools.dataframe_to_html(city_counts, index=1, header=0) }}
54 | 
55 | 
56 | style.
57 |   h3 {
58 |     margin-top: 0 !important;
59 |   }
60 |   img {
61 |     max-width: 100% !important;
62 |   }
63 |   .ui.statistic {
64 |     font-size: 0.7em !important;
65 |     margin-bottom: 1.5em !important;
66 |     display: block !important;
67 |     margin: 0 auto;
68 |   }
69 |   .tables {
70 |     column-count: 2;
71 |     margin-top: 1em;
72 |   }
73 | 


--------------------------------------------------------------------------------
/examples/data/example_template.pug:
--------------------------------------------------------------------------------
 1 | #sidebar: p Generated using Lala for Python, on {{ pdf_tools.now() }}
 2 | 
 3 | h1 30-day server logs analysis
 4 | 
 5 | .ui.piled.segment
 6 |   :markdown
 7 |     These are anonymized statistics (fake IP addresses) from the logs of the
 8 |     [EGF-CUBA](http://cuba.genomefoundry.org/) website.
 9 |     While the website doest not use cookies or collect personal data or files,
10 |     the Django server logs provide interesting website usage information.
11 | 
12 | - var blacklist = ['googlebot', 'spider.yandex', 'baidu', 'msnbot']
13 | - var weblogs = weblogs.filter_by_text_search(terms=blacklist, not_in='domain')
14 | - var requests = weblogs.filter_by_text_search(terms=['start/'], are_in='request')
15 | 
16 | 
17 | .ui.grid
18 |   .statistics.two.wide.column
19 |     .ui.statistic.tiny
20 |       .value= weblogs.index.size
21 |       .label Visits
22 |     .ui.statistic.tiny
23 |       .value= weblogs.IP.unique().size
24 |       .label Unique visitors
25 |     .ui.statistic.tiny
26 |       .value= requests.index.size
27 |       .label Requests
28 |   .twelve.wide.column
29 |     - var figure = weblogs.plot_geo_positions()
30 |     img(src="{{ pdf_tools.figure_data(figure) }}")
31 | 
32 | 
33 | h3(style='margin-top: 0') Visitors per day
34 | 
35 | - var figure = weblogs.plot_timeline(bins_per_day=1)
36 | img(src="{{ pdf_tools.figure_data(figure, (12, 1.5)) }}")
37 | 
38 | 
39 | .tables
40 | 
41 |   h3 Requests
42 |   
43 |   - var request_counts = requests.url.value_counts().to_frame()
44 |   {{ pdf_tools.dataframe_to_html(request_counts, index=1, header=0) }}
45 | 
46 |   h3 Requests by country
47 |  
48 |   - var countries_counts = requests.country_name.value_counts().to_frame()
49 |   {{ pdf_tools.dataframe_to_html(countries_counts, index=1, header=0) }}
50 |   
51 |   h3 Requests by provider
52 |  
53 |   - var city_counts = requests.domain.value_counts().to_frame()
54 |   {{ pdf_tools.dataframe_to_html(city_counts, index=1, header=0) }}
55 | 
56 | 
57 | style.
58 |   h3 {
59 |     margin-top: 0 !important;
60 |   }
61 |   img {
62 |     max-width: 100% !important;
63 |   }
64 |   .ui.statistic {
65 |     font-size: 0.7em !important;
66 |     margin-bottom: 1.5em !important;
67 |     display: block !important;
68 |     margin: 0 auto;
69 |   }
70 |   .tables {
71 |     column-count: 2;
72 |     margin-top: 1em;
73 |   }
74 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/docs/_static/images/logo.png
  2 |    :width: 200 px
  3 |    :alt: alternate text
  4 |    :align: center
  5 | 
  6 | |
  7 | 
  8 | .. image:: https://travis-ci.org/Edinburgh-Genome-Foundry/lala.svg?branch=master
  9 |    :target: https://travis-ci.org/Edinburgh-Genome-Foundry/lala
 10 |    :alt: Travis CI build status
 11 | 
 12 | .. image:: https://coveralls.io/repos/github/Edinburgh-Genome-Foundry/lala/badge.svg?branch=master
 13 |    :target: https://coveralls.io/github/Edinburgh-Genome-Foundry/lala?branch=master
 14 | 
 15 | 
 16 | Lala is a Python library for access log analysis. It provides a set of methods to retrieve, parse and analyze access logs (only from NGINX for now), and makes it easy to plot geo-localization or time-series data. Think of it as a simpler, Python-automatable version of Google Analytics, to make reports like this:
 17 | 
 18 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/docs/_static/images/report.jpeg
 19 |    :width: 550 px
 20 |    :alt: alternate text
 21 |    :align: center
 22 | 
 23 | 
 24 | Usage
 25 | -----
 26 | 
 27 | .. code:: python
 28 | 
 29 |     from lala import WebLogs
 30 |     weblogs, errored_lines = WebLogs.from_nginx_weblogs('access_logs.txt')
 31 | 
 32 | Similarly, to fetch logs on a distant server (for which you have access keys)
 33 | you would write:
 34 | 
 35 | .. code:: python
 36 | 
 37 |     from lala import get_remote_file_content, WebLogs
 38 | 
 39 |     logs= lala.get_remote_file_content(
 40 |         host="cuba.genomefoundry.org", user='root',
 41 |         filename='/var/log/nginx_cuba/access.log'
 42 |     )
 43 |     weblogs, errors = WebLogs.from_nginx_weblogs(logs.split('\n'))
 44 | 
 45 | Now ``weblogs`` is a scpecial kind of `Pandas <https://pandas.pydata.org/>`_ dataframe where each row is one server access, with fields such as ``IP``, ``date``, ``referrer``, ``country_name``, etc.
 46 | 
 47 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/docs/_static/images/dataframe_example.png
 48 |    :width: 800 px
 49 |    :alt: alternate text
 50 |    :align: center
 51 | 
 52 | The web logs can therefore be analyzed using any of Pandas' built-in filtering and plotting functions. The ``WebLogs`` class also provides additional methods which are particularly useful to analyse web logs, for instance to plot pie-charts:
 53 | 
 54 | .. code:: python
 55 | 
 56 |     ax, country_values = weblogs.plot_piechart('country_name')
 57 | 
 58 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_piechart.png
 59 |    :width: 300 px
 60 |    :alt: alternate text
 61 |    :align: center
 62 | 
 63 | Next we plot the location (cities) providing the most connexions:
 64 | 
 65 | .. code:: python
 66 | 
 67 |     ax = weblogs.plot_geo_positions()
 68 | 
 69 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_worldmap.png
 70 |    :width: 700 px
 71 |    :alt: alternate text
 72 |    :align: center
 73 | 
 74 | We can also restrict the entries to the UK, and plot a timeline of connexions:
 75 | 
 76 | .. code:: python
 77 | 
 78 |     uk_entries = weblogs[weblogs.country_name == 'United Kingdom']
 79 |     ax = uk_entries.plot_timeline(bins_per_day=2)
 80 | 
 81 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_timeline.png
 82 |    :width: 700 px
 83 |    :alt: alternate text
 84 |    :align: center
 85 | 
 86 | Here is how to get the visitors a list of visitors and visits, sort out the most frequent visitors, find their locations, and plot it all:
 87 | 
 88 | .. code:: python
 89 | 
 90 |     visitors = weblogs.visitors_and_visits()
 91 |     visitors_locations = weblogs.visitors_locations()
 92 |     frequent_visitors = weblogs.most_frequent_visitors(n_visitors=5)
 93 |     ax = weblogs.plot_most_frequent_visitors(n_visitors=5)
 94 | 
 95 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_frequent_visitors.png
 96 |    :width: 450 px
 97 |    :alt: alternate text
 98 |    :align: center
 99 | 
100 | Lala can do more, such as identifying the domain name of the visitors, which can be used to filter out the robots of search engines:
101 | 
102 | 
103 | .. code:: python
104 | 
105 |     weblogs.identify_ips_domains()
106 |     filtered_entries = weblogs.filter_by_text_search(
107 |         terms=['googlebot', 'spider.yandex', 'baidu', 'msnbot'],
108 |         not_in='domain'
109 |     )
110 | 
111 | Lala also plays nicely with the `PDF Reports <https://github.com/Edinburgh-Genome-Foundry/pdf_reports>`_ library to let you define report templates such as `this one <https://github.com/Edinburgh-Genome-Foundry/lala/blob/master/examples/data/example_template.pug>`_ (written in Pug), and then generate `this PDF report <https://github.com/Edinburgh-Genome-Foundry/lala/blob/master/examples/report_example.pdf>`_ with the following code:
112 | 
113 | .. code:: python
114 | 
115 |     weblogs.write_report(template_path="path/to/template.pug",
116 |                          target="report_example.pdf")
117 | 
118 | Installation
119 | -------------
120 | 
121 | You can install lala through PIP
122 | 
123 | .. code:: bash
124 | 
125 |     sudo pip install python-lala
126 | 
127 | Alternatively, you can unzip the sources in a folder and type
128 | 
129 | .. code:: bash
130 | 
131 |     sudo python setup.py install
132 | 
133 | For plotting maps you will need Cartopy which is not always easy to install - it may depend on your system. If you are on Ubuntu 16+, first install the dependencies with:
134 | 
135 | .. code:: bash
136 | 
137 |     sudo apt-get install libproj-dev proj-bin proj-data libgeos-dev
138 |     sudo pip install cython
139 | 
140 | License = MIT
141 | --------------
142 | 
143 | lala is an open-source software originally written at the `Edinburgh Genome Foundry <http://genomefoundry.org>`_ by `Zulko <https://github.com/Zulko>`_ and `released on Github <https://github.com/Edinburgh-Genome-Foundry/lala>`_ under the MIT licence (Copyright 2018 Edinburgh Genome Foundry).
144 | 
145 | Everyone is welcome to contribute!
146 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | .. image:: _static/images/logo.png
  3 |    :alt: [logo]
  4 |    :align: center
  5 |    :width: 200px
  6 | 
  7 | Lala
  8 | ----
  9 | 
 10 | .. image:: https://travis-ci.org/Edinburgh-Genome-Foundry/lala.svg?branch=master
 11 |   :target: https://travis-ci.org/Edinburgh-Genome-Foundry/lala
 12 |   :alt: Travis CI build status
 13 | 
 14 | .. image:: https://coveralls.io/repos/github/Edinburgh-Genome-Foundry/lala/badge.svg?branch=master
 15 |   :target: https://coveralls.io/github/Edinburgh-Genome-Foundry/lala?branch=master
 16 | 
 17 | 
 18 | 
 19 | Lala is a Python library for access log analysis. It provides a set of methods to retrieve, parse and analyze access logs (only from NGINX for now), and makes it easy to plot geo-localization or time-series data. Think of it as a simpler, Python-automatable version of Google Analytics, to make reports like this:
 20 | 
 21 | .. image:: _static/images/report.jpeg
 22 |    :alt: [screenshot]
 23 |    :align: center
 24 |    :width: 550px
 25 | 
 26 | Usage
 27 | -----
 28 | 
 29 | .. code:: python
 30 | 
 31 |    from lala import WebLogs
 32 |    weblogs, errored_lines = WebLogs.from_nginx_weblogs('access_logs.txt')
 33 | 
 34 | Similarly, to fetch logs on a distant server (for which you have access keys)
 35 | you would write:
 36 | 
 37 | .. code:: python
 38 | 
 39 |    from lala import get_remote_file_content, WebLogs
 40 | 
 41 |    logs= lala.get_remote_file_content(
 42 |        host="cuba.genomefoundry.org", user='root',
 43 |        filename='/var/log/nginx_cuba/access.log'
 44 |    )
 45 |    weblogs, errors = WebLogs.from_nginx_weblogs(logs.split('\n'))
 46 | 
 47 | Now ``weblogs`` is a scpecial kind of `Pandas <https://pandas.pydata.org/>`_ dataframe where each row is one server access, with fields such as ``IP``, ``date``, ``referrer``, ``country_name``, etc.
 48 | 
 49 | .. raw:: html
 50 | 
 51 |    <p align="center">
 52 |    <img src="https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/docs/_static/images/dataframe_example.png" width="800">
 53 |    </p>
 54 | 
 55 | The web logs can therefore be analyzed using any of Pandas' built-in filtering and plotting functions. The ``WebLogs`` class also provides additional methods which are particularly useful to analyse web logs, for instance to plot pie-charts:
 56 | 
 57 | .. code:: python
 58 | 
 59 |    ax, country_values = weblogs.plot_piechart('country_name')
 60 | 
 61 | .. raw:: html
 62 | 
 63 |    <p align="center">
 64 |    <img src="https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_piechart.png" width="300">
 65 |    </p>
 66 | 
 67 | Next we plot the location (cities) providing the most connexions:
 68 | 
 69 | .. code:: python
 70 | 
 71 |    ax = weblogs.plot_geo_positions()
 72 | 
 73 | .. raw:: html
 74 | 
 75 |    <p align="center">
 76 |    <img src="https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_worldmap.png" width="700">
 77 |    </p>
 78 | 
 79 | We can also restrict the entries to the UK, and plot a timeline of connexions:
 80 | 
 81 | .. code:: python
 82 | 
 83 |    uk_entries = weblogs[weblogs.country_name == 'United Kingdom']
 84 |    ax = uk_entries.plot_timeline(bins_per_day=2)
 85 | 
 86 | .. raw:: html
 87 | 
 88 |    <p align="center">
 89 |    <img src="https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_timeline.png" width="700">
 90 |    </p>
 91 | 
 92 | Here is how to get the visitors a list of visitors and visits, sort out the most frequent visitors, find their locations, and plot it all:
 93 | 
 94 | .. code:: python
 95 | 
 96 |    visitors = weblogs.visitors_and_visits()
 97 |    visitors_locations = weblogs.visitors_locations()
 98 |    frequent_visitors = weblogs.most_frequent_visitors(n_visitors=5)
 99 |    ax = weblogs.plot_most_frequent_visitors(n_visitors=5)
100 | 
101 | .. raw:: html
102 | 
103 |    <p align="center">
104 |    <img src="https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/lala/master/examples/basic_example_frequent_visitors.png" width="450">
105 |    </p>
106 | 
107 | Lala can do more, such as identifying the domain name of the visitors, which can be used to filter out the robots of search engines:
108 | 
109 | 
110 | .. code:: python
111 | 
112 |    weblogs.identify_ips_domains()
113 |    filtered_entries = weblogs.filter_by_text_search(
114 |        terms=['googlebot', 'spider.yandex', 'baidu', 'msnbot'],
115 |        not_in='domain'
116 |    )
117 | 
118 | Lala also plays nicely with the `PDF Reports <https://github.com/Edinburgh-Genome-Foundry/pdf_reports>`_ library to let you define report templates such as `this one <https://github.com/Edinburgh-Genome-Foundry/lala/blob/master/examples/data/example_template.pug>`_ (written in Pug), and then generate `this PDF report <https://github.com/Edinburgh-Genome-Foundry/lala/blob/master/examples/report_example.pdf>`_ with the following code:
119 | 
120 | .. code:: python
121 | 
122 |    weblogs.write_report(template_path="path/to/template.pug",
123 |                         target="report_example.pdf")
124 | 
125 | Installation
126 | -------------
127 | 
128 | You can install lala through PIP
129 | 
130 | .. code::
131 | 
132 |    sudo pip install python-lala
133 | 
134 | Alternatively, you can unzip the sources in a folder and type
135 | 
136 | .. code::
137 | 
138 |    sudo python setup.py install
139 | 
140 | For plotting maps you will need Cartopy which is not always easy to install - it may depend on your system. If you are on Ubuntu 16+, first install the dependencies with :
141 | 
142 | .. code::
143 | 
144 |    sudo apt-get install libproj-dev proj-bin proj-data libgeos-dev
145 |    sudo pip install cython
146 | 
147 | License = MIT
148 | --------------
149 | 
150 | lala is an open-source software originally written at the `Edinburgh Genome Foundry <http://genomefoundry.org>`_ by `Zulko <https://github.com/Zulko>`_ and `released on Github <https://github.com/Edinburgh-Genome-Foundry/lala>`_ under the MIT licence (¢ Edinburg Genome Foundry).
151 | 
152 | Everyone is welcome to contribute !
153 | 
154 | 
155 | .. raw:: html
156 | 
157 |        <a href="https://twitter.com/share" class="twitter-share-button"
158 |        data-text="lala - A Python module for automatic primer selection and sequencing validation" data-size="large" data-hashtags="Bioprinting">Tweet
159 |        </a>
160 |        <script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';
161 |        if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';
162 |        fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');
163 |        </script>
164 |        <iframe src="http://ghbtns.com/github-btn.html?user=Edinburgh-Genome-Foundry&repo=lala&type=watch&count=true&size=large"
165 |        allowtransparency="true" frameborder="0" scrolling="0" width="152px" height="30px" margin-bottom="30px"></iframe>
166 | 
167 | 
168 | 
169 | 
170 | .. toctree::
171 |     :hidden:
172 |     :maxdepth: 3
173 | 
174 |     self
175 | 
176 | .. toctree::
177 |    :hidden:
178 |    :caption: Reference
179 |    :maxdepth: 3
180 | 
181 |    ref/ref
182 | 
183 | .. toctree::
184 |    :caption: Examples
185 | 
186 |    examples/basic_example
187 |    examples/report_example
188 | 
189 | .. _PYPI: https://pypi.python.org/pypi/lala
190 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    = -E -a
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = ../../built_docs
  9 | PDFBUILDDIR   = /tmp
 10 | PDF           = ../../manual.pdf
 11 | 
 12 | # User-friendly check for sphinx-build
 13 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 14 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 15 | endif
 16 | 
 17 | # Internal variables.
 18 | PAPEROPT_a4     = -D latex_paper_size=a4
 19 | PAPEROPT_letter = -D latex_paper_size=letter
 20 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | # the i18n builder cannot share the environment and doctrees with the others
 22 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 23 | 
 24 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 25 | 
 26 | help:
 27 | 	@echo "Please use \`make <target>' where <target> is one of"
 28 | 	@echo "  html       to make standalone HTML files"
 29 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 30 | 	@echo "  singlehtml to make a single large HTML file"
 31 | 	@echo "  pickle     to make pickle files"
 32 | 	@echo "  json       to make JSON files"
 33 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 34 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 35 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 36 | 	@echo "  epub       to make an epub"
 37 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 38 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 39 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 40 | 	@echo "  text       to make text files"
 41 | 	@echo "  man        to make manual pages"
 42 | 	@echo "  texinfo    to make Texinfo files"
 43 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 44 | 	@echo "  gettext    to make PO message catalogs"
 45 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 46 | 	@echo "  xml        to make Docutils-native XML files"
 47 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 48 | 	@echo "  linkcheck  to check all external links for integrity"
 49 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 50 | 
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | html:
 55 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 58 | 
 59 | dirhtml:
 60 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 61 | 	@echo
 62 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 63 | 
 64 | singlehtml:
 65 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 66 | 	@echo
 67 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 68 | 
 69 | pickle:
 70 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 71 | 	@echo
 72 | 	@echo "Build finished; now you can process the pickle files."
 73 | 
 74 | json:
 75 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 76 | 	@echo
 77 | 	@echo "Build finished; now you can process the JSON files."
 78 | 
 79 | htmlhelp:
 80 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 81 | 	@echo
 82 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 83 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 84 | 
 85 | qthelp:
 86 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 89 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 90 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/lala.qhcp"
 91 | 	@echo "To view the help file:"
 92 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/lala.qhc"
 93 | 
 94 | devhelp:
 95 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 96 | 	@echo
 97 | 	@echo "Build finished."
 98 | 	@echo "To view the help file:"
 99 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/lala"
100 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/lala"
101 | 	@echo "# devhelp"
102 | 
103 | epub:
104 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
105 | 	@echo
106 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
107 | 
108 | latex:
109 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
110 | 	@echo
111 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
112 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
113 | 	      "(use \`make latexpdf' here to do that automatically)."
114 | 
115 | latexpdf:
116 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(PDFBUILDDIR)/latex
117 | 	@echo "Running LaTeX files through pdflatex..."
118 | 	$(MAKE) -C $(PDFBUILDDIR)/latex all-pdf
119 | 	cp $(PDFBUILDDIR)/latex/*.pdf $(PDF)
120 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
121 | 
122 | latexpdfja:
123 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
124 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
125 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
126 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
127 | 
128 | text:
129 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
130 | 	@echo
131 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
132 | 
133 | man:
134 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
135 | 	@echo
136 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
137 | 
138 | texinfo:
139 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
140 | 	@echo
141 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
142 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
143 | 	      "(use \`make info' here to do that automatically)."
144 | 
145 | info:
146 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
147 | 	@echo "Running Texinfo files through makeinfo..."
148 | 	make -C $(BUILDDIR)/texinfo info
149 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
150 | 
151 | gettext:
152 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
153 | 	@echo
154 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
155 | 
156 | changes:
157 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
158 | 	@echo
159 | 	@echo "The overview file is in $(BUILDDIR)/changes."
160 | 
161 | linkcheck:
162 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
163 | 	@echo
164 | 	@echo "Link check complete; look for any errors in the above output " \
165 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
166 | 
167 | doctest:
168 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
169 | 	@echo "Testing of doctests in the sources finished, look at the " \
170 | 	      "results in $(BUILDDIR)/doctest/output.txt."
171 | 
172 | xml:
173 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
174 | 	@echo
175 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
176 | 
177 | pseudoxml:
178 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
179 | 	@echo
180 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
181 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\lala.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\lala.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/ez_setup.py:
--------------------------------------------------------------------------------
  1 | 
  2 | #!python
  3 | """Bootstrap setuptools installation
  4 | 
  5 | If you want to use setuptools in your package's setup.py, just include this
  6 | file in the same directory with it, and add this to the top of your setup.py::
  7 | 
  8 | from ez_setup import use_setuptools
  9 | use_setuptools()
 10 | 
 11 | If you want to require a specific version of setuptools, set a download
 12 | mirror, or use an alternate download directory, you can do so by supplying
 13 | the appropriate options to ``use_setuptools()``.
 14 | 
 15 | This file can also be run as a script to install or upgrade setuptools.
 16 | """
 17 | import os
 18 | import shutil
 19 | import sys
 20 | import tempfile
 21 | import tarfile
 22 | import optparse
 23 | import subprocess
 24 | 
 25 | from distutils import log
 26 | 
 27 | try:
 28 |     from site import USER_SITE
 29 | except ImportError:
 30 |     USER_SITE = None
 31 | 
 32 | DEFAULT_VERSION = "0.9.6"
 33 | DEFAULT_URL = "https://pypi.python.org/packages/source/s/setuptools/"
 34 | 
 35 | def _python_cmd(*args):
 36 |     args = (sys.executable,) + args
 37 |     return subprocess.call(args) == 0
 38 | 
 39 | def _install(tarball, install_args=()):
 40 |     # extracting the tarball
 41 |     tmpdir = tempfile.mkdtemp()
 42 |     log.warn('Extracting in %s', tmpdir)
 43 |     old_wd = os.getcwd()
 44 |     try:
 45 |         os.chdir(tmpdir)
 46 |         tar = tarfile.open(tarball)
 47 |         _extractall(tar)
 48 |         tar.close()
 49 | 
 50 |         # going in the directory
 51 |         subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0])
 52 |         os.chdir(subdir)
 53 |         log.warn('Now working in %s', subdir)
 54 | 
 55 |         # installing
 56 |         log.warn('Installing Setuptools')
 57 |         if not _python_cmd('setup.py', 'install', *install_args):
 58 |             log.warn('Something went wrong during the installation.')
 59 |             log.warn('See the error message above.')
 60 |             # exitcode will be 2
 61 |             return 2
 62 |     finally:
 63 |         os.chdir(old_wd)
 64 |         shutil.rmtree(tmpdir)
 65 | 
 66 | 
 67 | def _build_egg(egg, tarball, to_dir):
 68 |     # extracting the tarball
 69 |     tmpdir = tempfile.mkdtemp()
 70 |     log.warn('Extracting in %s', tmpdir)
 71 |     old_wd = os.getcwd()
 72 |     try:
 73 |         os.chdir(tmpdir)
 74 |         tar = tarfile.open(tarball)
 75 |         _extractall(tar)
 76 |         tar.close()
 77 | 
 78 |         # going in the directory
 79 |         subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0])
 80 |         os.chdir(subdir)
 81 |         log.warn('Now working in %s', subdir)
 82 | 
 83 |         # building an egg
 84 |         log.warn('Building a Setuptools egg in %s', to_dir)
 85 |         _python_cmd('setup.py', '-q', 'bdist_egg', '--dist-dir', to_dir)
 86 | 
 87 |     finally:
 88 |         os.chdir(old_wd)
 89 |         shutil.rmtree(tmpdir)
 90 |     # returning the result
 91 |     log.warn(egg)
 92 |     if not os.path.exists(egg):
 93 |         raise IOError('Could not build the egg.')
 94 | 
 95 | 
 96 | def _do_download(version, download_base, to_dir, download_delay):
 97 |     egg = os.path.join(to_dir, 'setuptools-%s-py%d.%d.egg'
 98 |                        % (version, sys.version_info[0], sys.version_info[1]))
 99 |     if not os.path.exists(egg):
100 |         tarball = download_setuptools(version, download_base,
101 |                                       to_dir, download_delay)
102 |         _build_egg(egg, tarball, to_dir)
103 |     sys.path.insert(0, egg)
104 |     import setuptools
105 |     setuptools.bootstrap_install_from = egg
106 | 
107 | 
108 | def use_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
109 |                    to_dir=os.curdir, download_delay=15):
110 |     # making sure we use the absolute path
111 |     to_dir = os.path.abspath(to_dir)
112 |     was_imported = 'pkg_resources' in sys.modules or \
113 |         'setuptools' in sys.modules
114 |     try:
115 |         import pkg_resources
116 |     except ImportError:
117 |         return _do_download(version, download_base, to_dir, download_delay)
118 |     try:
119 |         pkg_resources.require("setuptools>=" + version)
120 |         return
121 |     except pkg_resources.VersionConflict:
122 |         e = sys.exc_info()[1]
123 |         if was_imported:
124 |             sys.stderr.write(
125 |             "The required version of setuptools (>=%s) is not available,\n"
126 |             "and can't be installed while this script is running. Please\n"
127 |             "install a more recent version first, using\n"
128 |             "'easy_install -U setuptools'."
129 |             "\n\n(Currently using %r)\n" % (version, e.args[0]))
130 |             sys.exit(2)
131 |         else:
132 |             del pkg_resources, sys.modules['pkg_resources'] # reload ok
133 |             return _do_download(version, download_base, to_dir,
134 |                                 download_delay)
135 |     except pkg_resources.DistributionNotFound:
136 |         return _do_download(version, download_base, to_dir,
137 |                             download_delay)
138 | 
139 | 
140 | def download_setuptools(version=DEFAULT_VERSION, download_base=DEFAULT_URL,
141 |                         to_dir=os.curdir, delay=15):
142 |     """Download setuptools from a specified location and return its filename
143 | 
144 | `version` should be a valid setuptools version number that is available
145 | as an egg for download under the `download_base` URL (which should end
146 | with a '/'). `to_dir` is the directory where the egg will be downloaded.
147 | `delay` is the number of seconds to pause before an actual download
148 | attempt.
149 | """
150 |     # making sure we use the absolute path
151 |     to_dir = os.path.abspath(to_dir)
152 |     try:
153 |         from urllib.request import urlopen
154 |     except ImportError:
155 |         from urllib2 import urlopen
156 |     tgz_name = "setuptools-%s.tar.gz" % version
157 |     url = download_base + tgz_name
158 |     saveto = os.path.join(to_dir, tgz_name)
159 |     src = dst = None
160 |     if not os.path.exists(saveto): # Avoid repeated downloads
161 |         try:
162 |             log.warn("Downloading %s", url)
163 |             src = urlopen(url)
164 |             # Read/write all in one block, so we don't create a corrupt file
165 |             # if the download is interrupted.
166 |             data = src.read()
167 |             dst = open(saveto, "wb")
168 |             dst.write(data)
169 |         finally:
170 |             if src:
171 |                 src.close()
172 |             if dst:
173 |                 dst.close()
174 |     return os.path.realpath(saveto)
175 | 
176 | 
177 | def _extractall(self, path=".", members=None):
178 |     """Extract all members from the archive to the current working
179 | directory and set owner, modification time and permissions on
180 | directories afterwards. `path' specifies a different directory
181 | to extract to. `members' is optional and must be a subset of the
182 | list returned by getmembers().
183 | """
184 |     import copy
185 |     import operator
186 |     from tarfile import ExtractError
187 |     directories = []
188 | 
189 |     if members is None:
190 |         members = self
191 | 
192 |     for tarinfo in members:
193 |         if tarinfo.isdir():
194 |             # Extract directories with a safe mode.
195 |             directories.append(tarinfo)
196 |             tarinfo = copy.copy(tarinfo)
197 |             tarinfo.mode = 448 # decimal for oct 0700
198 |         self.extract(tarinfo, path)
199 | 
200 |     # Reverse sort directories.
201 |     if sys.version_info < (2, 4):
202 |         def sorter(dir1, dir2):
203 |             return cmp(dir1.name, dir2.name)
204 |         directories.sort(sorter)
205 |         directories.reverse()
206 |     else:
207 |         directories.sort(key=operator.attrgetter('name'), reverse=True)
208 | 
209 |     # Set correct owner, mtime and filemode on directories.
210 |     for tarinfo in directories:
211 |         dirpath = os.path.join(path, tarinfo.name)
212 |         try:
213 |             self.chown(tarinfo, dirpath)
214 |             self.utime(tarinfo, dirpath)
215 |             self.chmod(tarinfo, dirpath)
216 |         except ExtractError:
217 |             e = sys.exc_info()[1]
218 |             if self.errorlevel > 1:
219 |                 raise
220 |             else:
221 |                 self._dbg(1, "tarfile: %s" % e)
222 | 
223 | 
224 | def _build_install_args(options):
225 |     """
226 | Build the arguments to 'python setup.py install' on the setuptools package
227 | """
228 |     install_args = []
229 |     if options.user_install:
230 |         if sys.version_info < (2, 6):
231 |             log.warn("--user requires Python 2.6 or later")
232 |             raise SystemExit(1)
233 |         install_args.append('--user')
234 |     return install_args
235 | 
236 | def _parse_args():
237 |     """
238 | Parse the command line for options
239 | """
240 |     parser = optparse.OptionParser()
241 |     parser.add_option(
242 |         '--user', dest='user_install', action='store_true', default=False,
243 |         help='install in user site package (requires Python 2.6 or later)')
244 |     parser.add_option(
245 |         '--download-base', dest='download_base', metavar="URL",
246 |         default=DEFAULT_URL,
247 |         help='alternative URL from where to download the setuptools package')
248 |     options, args = parser.parse_args()
249 |     # positional arguments are ignored
250 |     return options
251 | 
252 | def main(version=DEFAULT_VERSION):
253 |     """Install or upgrade setuptools and EasyInstall"""
254 |     options = _parse_args()
255 |     tarball = download_setuptools(download_base=options.download_base)
256 |     return _install(tarball, _build_install_args(options))
257 | 
258 | if __name__ == '__main__':
259 |     sys.exit(main())
260 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # lala documentation build configuration file, created by
  4 | # sphinx-quickstart on Sat Jul 13 14:47:48 2013.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys, os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #sys.path.insert(0, os.path.abspath('.'))
 20 | 
 21 | # -- General configuration -----------------------------------------------------
 22 | 
 23 | # If your documentation needs a minimal Sphinx version, state it here.
 24 | #needs_sphinx = '1.0'
 25 | 
 26 | # Add any Sphinx extension module names here, as strings. They can be extensions
 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 28 | extensions = [
 29 |     'sphinx.ext.autodoc',
 30 |     'sphinx.ext.todo',
 31 |     'sphinx.ext.viewcode',
 32 |     'numpydoc',
 33 |     'sphinxcontrib.mermaid'
 34 | ]
 35 | numpydoc_show_class_members = False
 36 | # Add any paths that contain templates here, relative to this directory.
 37 | templates_path = ['_templates']
 38 | 
 39 | # The suffix of source filenames.
 40 | source_suffix = ['.rst']
 41 | 
 42 | # The encoding of source files.
 43 | #source_encoding = 'utf-8-sig'
 44 | 
 45 | # The master toctree document.
 46 | master_doc = 'index'
 47 | 
 48 | # General information about the project.
 49 | project = u'lala'
 50 | copyright = u'2017, Edinburgh Genome Foundry'
 51 | 
 52 | # The version info for the project you're documenting, acts as replacement for
 53 | # |version| and |release|, also used in various other places throughout the
 54 | # built documents.
 55 | #
 56 | # The short X.Y version.
 57 | version = '0.1.0'
 58 | # The full version, including alpha/beta/rc tags.
 59 | release = '0.1.0'
 60 | 
 61 | # The language for content autogenerated by Sphinx. Refer to documentation
 62 | # for a list of supported languages.
 63 | #language = None
 64 | 
 65 | # There are two options for replacing |today|: either, you set today to some
 66 | # non-false value, then it is used:
 67 | #today = ''
 68 | # Else, today_fmt is used as the format for a strftime call.
 69 | #today_fmt = '%B %d, %Y'
 70 | 
 71 | # List of patterns, relative to source directory, that match files and
 72 | # directories to ignore when looking for source files.
 73 | exclude_patterns = ['_build']
 74 | 
 75 | # The reST default role (used for this markup: `text`) to use for all documents.
 76 | #default_role = None
 77 | 
 78 | # If true, '()' will be appended to :func: etc. cross-reference text.
 79 | #add_function_parentheses = True
 80 | 
 81 | # If true, the current module name will be prepended to all description
 82 | # unit titles (such as .. function::).
 83 | #add_module_names = True
 84 | 
 85 | # If true, sectionauthor and moduleauthor directives will be shown in the
 86 | # output. They are ignored by default.
 87 | #show_authors = False
 88 | 
 89 | # A list of ignored prefixes for module index sorting.
 90 | #modindex_common_prefix = []
 91 | 
 92 | # If true, keep warnings as "system message" paragraphs in the built documents.
 93 | #keep_warnings = False
 94 | 
 95 | 
 96 | # -- Options for HTML output ---------------------------------------------------
 97 | 
 98 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 99 | # a list of builtin themes.
100 | 
101 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
102 | 
103 | if not on_rtd:  # only import and set the theme if we're building docs locally
104 |     import sphinx_rtd_theme
105 |     html_theme = 'sphinx_rtd_theme'
106 |     html_theme_path = sphinx_rtd_theme.get_html_theme_path()
107 |     def setup(app):
108 |         app.add_stylesheet('css/main.css')
109 | else:
110 |     html_context = {
111 |       'css_files': [
112 |           'https://media.readthedocs.org/css/sphinx_rtd_theme.css',
113 |           'https://media.readthedocs.org/css/readthedocs-doc-embed.css',
114 |           '_static/css/main.css',
115 |       ],
116 |     }
117 | #sys.path.append(os.path.abspath('_themes'))
118 | # Theme options are theme-specific and customize the look and feel of a theme
119 | # further.  For a list of options available for each theme, see the
120 | # documentation.
121 | #html_theme_options = {}
122 | 
123 | # Add any paths that contain custom themes here, relative to this directory.
124 | #html_theme_path = []
125 | 
126 | # The name for this set of Sphinx documents.  If None, it defaults to
127 | # "<project> v<release> documentation".
128 | #html_title = None
129 | 
130 | # A shorter title for the navigation bar.  Default is the same as html_title.
131 | #html_short_title = None
132 | 
133 | # The name of an image file (relative to this directory) to place at the top
134 | # of the sidebar.
135 | # html_logo = '_static/images/logo.png'
136 | 
137 | # The name of an image file (within the static path) to use as favicon of the
138 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
139 | # pixels large.
140 | #html_favicon = None
141 | 
142 | # Add any paths that contain custom static files (such as style sheets) here,
143 | # relative to this directory. They are copied after the builtin static files,
144 | # so a file named "default.css" will overwrite the builtin "default.css".
145 | html_static_path = ['_static']
146 | 
147 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
148 | # using the given strftime format.
149 | #html_last_updated_fmt = '%b %d, %Y'
150 | 
151 | # If true, SmartyPants will be used to convert quotes and dashes to
152 | # typographically correct entities.
153 | #html_use_smartypants = True
154 | 
155 | # Custom sidebar templates, maps document names to template names.
156 | #html_sidebars = {}
157 | 
158 | # Additional templates that should be rendered to pages, maps page names to
159 | # template names.
160 | #html_additional_pages = {}
161 | 
162 | # If false, no module index is generated.
163 | #html_domain_indices = True
164 | 
165 | # If false, no index is generated.
166 | #html_use_index = True
167 | 
168 | # If true, the index is split into individual pages for each letter.
169 | #html_split_index = False
170 | 
171 | # If true, links to the reST sources are added to the pages.
172 | #html_show_sourcelink = True
173 | 
174 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
175 | #html_show_sphinx = True
176 | 
177 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
178 | #html_show_copyright = True
179 | 
180 | # If true, an OpenSearch description file will be output, and all pages will
181 | # contain a <link> tag referring to it.  The value of this option must be the
182 | # base URL from which the finished HTML is served.
183 | #html_use_opensearch = ''
184 | 
185 | # This is the file name suffix for HTML files (e.g. ".xhtml").
186 | #html_file_suffix = None
187 | 
188 | # Output file base name for HTML help builder.
189 | htmlhelp_basename = 'laladoc'
190 | 
191 | 
192 | # -- Options for LaTeX output --------------------------------------------------
193 | 
194 | latex_elements = {
195 | # The paper size ('letterpaper' or 'a4paper').
196 | #'papersize': 'letterpaper',
197 | 
198 | # The font size ('10pt', '11pt' or '12pt').
199 | #'pointsize': '10pt',
200 | 
201 | # Additional stuff for the LaTeX preamble.
202 | #'preamble': '',
203 | }
204 | 
205 | # Grouping the document tree into LaTeX files. List of tuples
206 | # (source start file, target name, title, author, documentclass [howto/manual]).
207 | latex_documents = [
208 |   ('index', 'lala.tex', u'lala Documentation',
209 |    u'Zulko', 'manual'),
210 | ]
211 | 
212 | # The name of an image file (relative to this directory) to place at the top of
213 | # the title page.
214 | #latex_logo = None
215 | 
216 | # For "manual" documents, if this is true, then toplevel headings are parts,
217 | # not chapters.
218 | #latex_use_parts = False
219 | 
220 | # If true, show page references after internal links.
221 | #latex_show_pagerefs = False
222 | 
223 | # If true, show URL addresses after external links.
224 | #latex_show_urls = False
225 | 
226 | # Documents to append as an appendix to all manuals.
227 | #latex_appendices = []
228 | 
229 | # If false, no module index is generated.
230 | #latex_domain_indices = True
231 | 
232 | 
233 | # -- Options for manual page output --------------------------------------------
234 | 
235 | # One entry per manual page. List of tuples
236 | # (source start file, name, description, authors, manual section).
237 | man_pages = [
238 |     ('index', 'lala', u'PACKAGE_NAME Documentation',
239 |      [u'Zulko'], 1)
240 | ]
241 | 
242 | # If true, show URL addresses after external links.
243 | #man_show_urls = False
244 | 
245 | 
246 | # -- Options for Texinfo output ------------------------------------------------
247 | 
248 | # Grouping the document tree into Texinfo files. List of tuples
249 | # (source start file, target name, title, author,
250 | #  dir menu entry, description, category)
251 | texinfo_documents = [
252 |   ('index', 'lala', u'lala Documentation',
253 |    u'Zulko', 'lala', 'One line description of project.',
254 |    'Miscellaneous'),
255 | ]
256 | 
257 | # Documents to append as an appendix to all manuals.
258 | #texinfo_appendices = []
259 | 
260 | # If false, no module index is generated.
261 | #texinfo_domain_indices = True
262 | 
263 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
264 | #texinfo_show_urls = 'footnote'
265 | 
266 | # If true, do not generate a @detailmenu in the "Top" node's menu.
267 | #texinfo_no_detailmenu = False
268 | 
269 | 
270 | # -- Options for Epub output ---------------------------------------------------
271 | 
272 | # Bibliographic Dublin Core info.
273 | epub_title = u'lala'
274 | epub_author = u'Zulko'
275 | epub_publisher = u'Zulko'
276 | epub_copyright = u'2016, Zulko'
277 | 
278 | # The language of the text. It defaults to the language option
279 | # or en if the language is not set.
280 | #epub_language = ''
281 | 
282 | # The scheme of the identifier. Typical schemes are ISBN or URL.
283 | #epub_scheme = ''
284 | 
285 | # The unique identifier of the text. This can be a ISBN number
286 | # or the project homepage.
287 | #epub_identifier = ''
288 | 
289 | # A unique identification for the text.
290 | #epub_uid = ''
291 | 
292 | # A tuple containing the cover image and cover page html template filenames.
293 | #epub_cover = ()
294 | 
295 | # A sequence of (type, uri, title) tuples for the guide element of content.opf.
296 | #epub_guide = ()
297 | 
298 | # HTML files that should be inserted before the pages created by sphinx.
299 | # The format is a list of tuples containing the path and title.
300 | #epub_pre_files = []
301 | 
302 | # HTML files shat should be inserted after the pages created by sphinx.
303 | # The format is a list of tuples containing the path and title.
304 | #epub_post_files = []
305 | 
306 | # A list of files that should not be packed into the epub file.
307 | #epub_exclude_files = []
308 | 
309 | # The depth of the table of contents in toc.ncx.
310 | #epub_tocdepth = 3
311 | 
312 | # Allow duplicate toc entries.
313 | #epub_tocdup = True
314 | 
315 | # Fix unsupported image types using the PIL.
316 | #epub_fix_images = False
317 | 
318 | # Scale large images.
319 | #epub_max_image_width = 0
320 | 
321 | # If 'no', URL addresses will not be shown.
322 | #epub_show_urls = 'inline'
323 | 
324 | # If false, no index is generated.
325 | #epub_use_index = True
326 | 
327 | #autodoc_member_order = 'bysource'
328 | 


--------------------------------------------------------------------------------
/lala/WebLogs.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | import re
  4 | import time
  5 | import subprocess as sp
  6 | import urllib
  7 | import os
  8 | import gzip
  9 | from io import BytesIO
 10 | import socket
 11 | 
 12 | import pygeoip
 13 | import pandas
 14 | import proglog
 15 | from pdf_reports import pug_to_html, write_report
 16 | 
 17 | from .conf import conf
 18 | 
 19 | import numpy as np
 20 | from matplotlib import cm
 21 | import matplotlib.pyplot as plt
 22 | from matplotlib.ticker import MaxNLocator
 23 | 
 24 | try:
 25 |     import cartopy
 26 |     import cartopy.io.shapereader as shpreader
 27 |     import cartopy.crs as ccrs
 28 |     shpfilename = shpreader.natural_earth(resolution='110m',
 29 |                                           category='cultural',
 30 |                                           name='admin_0_countries')
 31 |     reader = shpreader.Reader(shpfilename)
 32 |     countries = list(reader.records())
 33 |     name_to_geometry = {
 34 |         country.attributes[e]: country.geometry
 35 |         for country in countries
 36 |         for e in ('ADM0_A3', 'BRK_NAME')
 37 |     }
 38 |     name_to_extent = {
 39 |         name: geometry.bounds
 40 |         for name, geometry in name_to_geometry.items()
 41 |     }
 42 |     CARTOPY_INSTALLED = True
 43 | 
 44 | except ImportError:
 45 |     name_to_geometry = None
 46 |     name_to_extent = None
 47 |     cartopy = None
 48 |     ccrs = None
 49 |     CARTOPY_INSTALLED = False
 50 | 
 51 | 
 52 | if not os.path.exists(conf['geolite_path']):
 53 |     response = urllib.request.urlopen(conf['geolite_url'])
 54 |     geolite_gz = response.read()
 55 |     geolite_bites = BytesIO(geolite_gz)
 56 |     with gzip.open(geolite_bites, 'rb') as f:
 57 |         geolite_content = f.read()
 58 |     if not os.path.exists(conf['data_dir']):
 59 |         os.makedirs(conf['data_dir'])
 60 |     with open(conf['geolite_path'], 'wb') as f:
 61 |         f.write(geolite_content)
 62 | 
 63 | geoip = pygeoip.GeoIP(conf['geolite_path'])
 64 | 
 65 | durations = {
 66 |     'second': 1,
 67 |     'minute': 60,
 68 |     'hour': 60 * 60,
 69 |     'day': 60 * 60 * 24,
 70 |     'week': 60 * 60 * 24 * 7,
 71 |     'month': 60 * 60 * 24 * 30,
 72 |     'year': 60 * 60 * 24 * 365,
 73 | }
 74 | def time_of_last(num, duration):
 75 |     """Returns the EPOCH time (in seconds) of XX ago (relative to the present).
 76 | 
 77 |     Examples
 78 |     --------
 79 | 
 80 |     >>> time_of_last(2, 'week') # => EPOCH time of two weeks ago
 81 |     >>> time_of_last(5, 'hour') # => EPOCH time of five hours ago
 82 |     """
 83 |     return time.time() - num * durations[duration]
 84 | 
 85 | def get_remote_file_content(filename='/var/log/nginx/access.log',
 86 |                             host='localhost', user='root', decode='utf8',
 87 |                             target=None):
 88 |     """
 89 |     Parameters
 90 |     ----------
 91 | 
 92 |     filename
 93 |       path to the file in the host machine
 94 | 
 95 |     host
 96 |       IP address or domain name of the host.
 97 | 
 98 |     user
 99 |       Username on the host.
100 | 
101 |     decode
102 |       If not None, the file content received from the server will be
103 |       decoded into a string using this format.
104 |     """
105 |     proc = sp.Popen(['ssh', '%s@%s' % (user, host), 'cat %s' % filename],
106 |                     stderr=sp.PIPE, stdout=sp.PIPE)
107 |     out, err = proc.communicate()
108 |     if len(err):
109 |         raise IOError(err)
110 |     if decode is not None:
111 |         out = out.decode(decode)
112 |     if target is not None:
113 |         with open(target, "w") as f:
114 |             f.write(out)
115 |     return out
116 | 
117 | def init_map(figsize=(12, 8),  extent=(-150, 60, -25, 60)):
118 |     """Initialize a world map with the given dimensions.
119 | 
120 |     ``figsize`` is the figure's size in inches. ``extent`` is the boundaries
121 |     of the map, in its own PlateCarree coordinates.
122 |     """
123 |     if not CARTOPY_INSTALLED:
124 |         raise ImportError('This feature requires Cartopy installed.')
125 |     ax = plt.axes(projection=cartopy.crs.PlateCarree())
126 |     ax.add_feature(cartopy.feature.LAND)
127 |     ax.add_feature(cartopy.feature.OCEAN)
128 |     ax.add_feature(cartopy.feature.COASTLINE)
129 |     ax.add_feature(cartopy.feature.BORDERS, linestyle='-', alpha=.5)
130 | 
131 |     ax.set_extent(extent)
132 |     ax.figure.set_size_inches(figsize)
133 |     return ax
134 | 
135 | class WebLogs(pandas.DataFrame):
136 |     "Custom Pandas dataframe class for reading web logs."
137 |     def __init__(self, *args, **kw):
138 |         super(WebLogs, self).__init__(*args, **kw)
139 | 
140 |     @property
141 |     def _constructor(self):
142 |         return WebLogs
143 | 
144 |     @staticmethod
145 |     def from_nginx_weblogs(filepath=None, log_lines=None):
146 |         """Return a dataframe of access log entries, from lines of NGINX logs.
147 | 
148 |         The log_lines are a list of strings, each representing one access
149 |         logged by NGINX.
150 |         """
151 |         if log_lines is None:
152 |             with open(filepath, 'r') as f:
153 |                 log_lines = f.read().split("\n")
154 |         regexpr = r'(.*) -(.*) - \[(.*)\] "(.*)" (\d+) (\d+) "(.*)" "(.*)"'
155 |         regexpr = re.compile(regexpr)
156 |         errored_lines = []
157 |         records = []
158 |         for i, line in enumerate(log_lines):
159 |             match = re.match(regexpr, line)
160 |             fields = ('IP', 'stuff', 'date', 'request', 'response', 'status',
161 |                       'referrer', 'browser')
162 |             if match is None:
163 |                 errored_lines.append(i)
164 |             else:
165 |                 records.append(dict(zip(fields, match.groups())))
166 |         weblogs = WebLogs.from_records(records)
167 |         weblogs['parsed_date'] = [
168 |             datetime.strptime(s, '%d/%b/%Y:%H:%M:%S %z')
169 |             for s in weblogs['date']
170 |         ]
171 |         weblogs['timestamp'] = [x.timestamp()
172 |                                 for x in weblogs['parsed_date']]
173 |         fields = ['country_name', 'city', 'country_code3', 'latitude',
174 |                   'longitude']
175 |         d = {f: [] for f in fields}
176 |         for ip in weblogs.IP:
177 |             rec = geoip.record_by_addr(ip)
178 |             if rec is None:
179 |                 rec = {field: None for field in fields}
180 |             for field in fields:
181 |                 d[field].append(rec[field])
182 |         for field in fields:
183 |             weblogs[field] = d[field]
184 | 
185 |         methods, urls, https = zip(*[
186 |             request.split()
187 |             if len(request.split()) == 3
188 |             else (None, None, None)
189 |             for request in weblogs.request
190 |         ])
191 |         for name, data in [('method', methods),
192 |                            ('url', urls),
193 |                            ('http', https)]:
194 |             weblogs[name] = data
195 | 
196 |         return weblogs, errored_lines
197 | 
198 |     @staticmethod
199 |     def from_weblogs_spreadsheet(filepath=None):
200 |         if filepath.lower().endswith((".csv")):
201 |             dataframe = pandas.read_csv(filepath)
202 |         else:
203 |             dataframe = pandas.read_excel(filepath)
204 |         return WebLogs(dataframe)
205 | 
206 | 
207 |     def identify_ips_domains(self, logger='bar', known_ips=None):
208 |         """Add a `ip_owner` column to self."""
209 |         if isinstance(known_ips, pandas.DataFrame):
210 |             known_ips = {
211 |                 row.IP: row.domain
212 |                 for i, row in known_ips.iterrows()
213 |             }
214 |         if known_ips is None:
215 |             known_ips = {}
216 |         if logger == 'bar':
217 |             logger = proglog.TqdmProgressBarLogger()
218 | 
219 |         ips_domains = {}
220 |         for ip in logger.iter_bar(ip=list(set(self.IP))):
221 |             if ip in known_ips:
222 |                 ips_domains[ip] = known_ips[ip]
223 |             else:
224 |                 try:
225 |                     ips_domains[ip] = known_ips[ip] = socket.getfqdn(ip)
226 |                 except socket.herror:
227 |                     ips_domains[ip] = 'Unknown'
228 |         self.loc[:, 'domain'] = [ips_domains[ip] for ip in self.IP]
229 |         return known_ips
230 | 
231 |     def blacklist_ips(self, ips_blacklist):
232 |         """Return a new version of self minus the blacklisted ips."""
233 |         ips_set = set(self.IP)
234 |         blacklisted_ips = set([
235 |            ip for ip in ips_set
236 |            if ip in ips_blacklist
237 |         ])
238 |         return self[[
239 |             ip not in blacklisted_ips
240 |             for ip in self.IP
241 |         ]]
242 | 
243 |     def entries_last(self, num, duration):
244 |         """Returns the weblogs of the latest entries up to XX ago.
245 | 
246 |         Examples
247 |         --------
248 | 
249 |         >>> # Filter out all entries more than 1 hour old
250 |         >>> last_hour_weblogs = self.entries_last(1, 'hour')
251 |         >>> # Filter out all entries more than 5 days old
252 |         >>> last_days_weblogs = self.entries_last(5, 'days')
253 |         """
254 |         return self[self.timestamp >= time_of_last(num, duration)]
255 | 
256 |     def filter_by_text_search(self, terms, are_in=None, not_in=None):
257 |         """Return a filtered version of self based on searched terms.
258 |         """
259 | 
260 |         if not_in is not None:
261 |             field = not_in
262 |             def filtr(v):
263 |                 return (v is not None) and isinstance(v, str) and not any([
264 |                     term in v for term in terms
265 |                 ])
266 |         else:
267 |             field = are_in
268 |             def filtr(v):
269 |                 return (v is not None) and isinstance(v, str) and any([
270 |                     term in v for term in terms
271 |                 ])
272 |         field_dict = {
273 |             val: filtr(val)
274 |             for val in set(self[field])
275 |         }
276 |         indices = [field_dict[v] for v in self[field]]
277 |         return self[indices]
278 | 
279 |     def cluster_dates(self, max_interval=60):
280 |         dates_intervals = [[self.parsed_date[0], self.parsed_date[0]]]
281 |         for date in self.parsed_date[1:]:
282 |             interval = (date - dates_intervals[-1][-1]).total_seconds()
283 |             if interval < max_interval:
284 |                 dates_intervals[-1][-1] = date
285 |             else:
286 |                 dates_intervals.append([date, date])
287 |         return dates_intervals
288 | 
289 |     def visitors_and_visits(self, max_visits_interval=60, per='IP'):
290 |         return {
291 |             ip: df.cluster_dates(max_interval=max_visits_interval)
292 |             for ip, df in self.groupby(per)
293 |             if ip is not None
294 |         }
295 | 
296 |     def most_frequent_visitors(self, criterion='n_visits', n_visitors='all',
297 |                                max_visits_interval=60, per='IP'):
298 |         visitors = self.visitors_and_visits(
299 |             max_visits_interval=max_visits_interval, per=per)
300 |         if n_visitors == 'all':
301 |             n_visitors = len(visitors.keys())
302 | 
303 |         criterion_function = {
304 |             'n_visits': lambda visits: len(visits),
305 |             'time_spent': lambda visits: sum([(v[1] - v[0]).total_seconds()
306 |                                               for v in visits]) / 60.0
307 |         }[criterion]
308 | 
309 |         return sorted([
310 |             (criterion_function(visits), visitor)
311 |             for visitor, visits in visitors.items()
312 |         ])[::-1][:n_visitors]
313 | 
314 |     def visitors_locations(self):
315 |         return {
316 |             ip: " ".join([
317 |                 df.iloc[0].city if df.iloc[0].city else "",
318 |                 df.iloc[0].country_name if df.iloc[0].country_name else ""
319 |             ])
320 |             for ip, df in self.groupby('IP')
321 |         }
322 | 
323 | 
324 |     def countries_colormap(self, mini='auto', maxi='auto', ax=None):
325 |         """Plot a colormap of different countries, return the Matplotlib ax.
326 | 
327 |         Parameters
328 |         ----------
329 |         country_values
330 |           A list of couples (coutry_name, value)
331 | 
332 |         mini, maxi
333 |           Extreme values leading to read or white colors. Leave to auto to adjust
334 |           this range to the values of country_values.
335 | 
336 |         ax
337 |           A Matplotlib ax with a representation of the world. If None, one is
338 |           created automatically
339 |         """
340 |         if not CARTOPY_INSTALLED:
341 |             raise ImportError('This feature requires Cartopy installed.')
342 |         country_values = self.country_name.value_counts()
343 |         countries = country_values.index
344 |         values = country_values.values
345 |         if mini == 'auto':
346 |             mini = values.min()
347 |         if maxi == 'auto':
348 |             maxi = values.max()
349 |         values = (values - mini) / (maxi - mini)
350 |         country_values = zip(countries, values)
351 | 
352 |         if ax is None:
353 |             ax = init_map(figsize=(12, 8), extent=(-150, 60, -25, 60))
354 |         for (country_name, value) in country_values:
355 |             if country_name not in name_to_geometry:
356 |                 continue
357 |             color = cm.YlOrBr(value)
358 |             ax.add_geometries(name_to_geometry[country_name], ccrs.PlateCarree(),
359 |                               facecolor=color)
360 |         return ax
361 | 
362 | 
363 |     def plot_geo_positions(self, ax=None, country_colors=True):
364 |         """Plot circles on a map around positions of the entries in the access log.
365 | 
366 |         Parameters
367 |         ----------
368 | 
369 |         ax
370 |           Matplotlib ax with a representation of the world.
371 |         """
372 |         if not CARTOPY_INSTALLED:
373 |             raise ImportError('This feature requires Cartopy installed.')
374 |         if ax is None:
375 |             ax = init_map(figsize=(12, 8), extent=(-150, 60, -25, 60))
376 |         if country_colors:
377 |             self.countries_colormap(mini='auto', maxi='auto', ax=ax)
378 | 
379 |         counts = [
380 |             (len(dataframe_), ll)
381 |             for (ll, dataframe_) in self.groupby(['longitude', 'latitude'])
382 |         ]
383 |         counts, xy = zip(*(sorted(counts)[::-1]))
384 |         counts = 1.0 * np.array(counts)
385 |         counts = np.maximum(5, 600 * counts / counts.max())
386 |         xx, yy = [list(e) for e in zip(*xy)]
387 |         ax.scatter(xx, yy, c='w', s=counts, zorder=2000, linewidths=2,
388 |                    edgecolor='k', transform=ccrs.Geodetic())
389 |         return ax
390 | 
391 | 
392 |     def plot_piechart(self, column, ax=None):
393 |         """Plot circles on a map around positions of the entries in the access log.
394 | 
395 |         Parameters
396 |         ----------
397 | 
398 |         column
399 |            name of the column to plot
400 | 
401 |         ax
402 |           Matplotlib ax on which to plot the pie chart. If None, one is created
403 |           automatically.
404 |         """
405 |         count = self[column].value_counts()
406 |         if ax is None:
407 |             fig, ax = plt.subplots(1)
408 |         ax = count.plot(kind='pie', ax=ax)
409 |         ax.set_aspect('equal')
410 |         ax.set_ylabel('')
411 |         return ax, count
412 | 
413 | 
414 |     def plot_timeline(self, bins_per_day=4, ax=None):
415 |         """Plot a time profile of access.
416 | 
417 |         Parameters
418 |         ----------
419 | 
420 |         bins_per_day
421 |            number of time points per day.
422 | 
423 |         ax
424 |           Matplotlib ax on which to plot the profile. If None, one is created
425 |           automatically.
426 |         """
427 |         mini, maxi = self['timestamp'].min(), self['timestamp'].max()
428 |         bins = int(bins_per_day * (maxi - mini) / durations['day'])
429 |         if ax is None:
430 |             fig, ax = plt.subplots(1, figsize=(12, 3))
431 |         self['timestamp'].plot(kind='hist', bins=bins, alpha=0.6)
432 |         x_ticks = ax.get_xticks()
433 |         xlabels = [datetime.fromtimestamp(int(x)).strftime('%Y-%m-%d')
434 |                    for x in x_ticks]
435 |         ax.set_xticklabels(xlabels, rotation=45)
436 |         ax.set_xlim(mini, maxi)
437 |         ax.set_ylabel('occurences')
438 |         return ax
439 | 
440 | 
441 | 
442 |     def plot_most_frequent_visitors(self, plot_ips=True, n_visitors='all',
443 |                                     criterion='n_visits'):
444 |         visitors_locations = self.visitors_locations()
445 |         most_frequent = self.most_frequent_visitors(
446 |             criterion=criterion, n_visitors=n_visitors)
447 |         label = {
448 |             'n_visits': 'Number of visits',
449 |             'time_spent': 'Time spent (mins)'
450 |         }[criterion]
451 |         fig, ax = plt.subplots(1)
452 |         scores, visitors = zip(*most_frequent)
453 |         if visitors_locations is not None:
454 |             visitors = [
455 |                 v + " - " + visitors_locations[v]
456 |                 for v in visitors
457 |             ]
458 |         ticks = list(range(len(scores)))[::-1]
459 |         ax.bar(left=1, height=0.5, bottom=ticks, width=scores,
460 |                tick_label=visitors if plot_ips else None,
461 |                orientation='horizontal', alpha=0.6)
462 | 
463 |         # Hide the right and top spines
464 |         ax.spines['right'].set_visible(False)
465 |         ax.spines['top'].set_visible(False)
466 | 
467 |         # Only show ticks on the left and bottom spines
468 |         ax.yaxis.set_ticks_position('left')
469 |         ax.xaxis.set_ticks_position('bottom')
470 |         ax.set_xlabel(label)
471 |         ax.xaxis.set_major_locator(MaxNLocator(integer=True))
472 |         if not plot_ips:
473 |             ax.set_ylabel('Visitors')
474 |         return ax
475 | 
476 |     def write_report(self, template_path=None, template_string=None,
477 |                   target=None, stylesheets=(), **context):
478 |         html = pug_to_html(path=template_path,
479 |                            string=template_string,
480 |                            weblogs=self, **context)
481 |         return write_report(html, target=target, extra_stylesheets=stylesheets)
482 | 


--------------------------------------------------------------------------------