├── pyClickModels
├── __init__.py
├── __version__.py
├── jsonc.pxd
├── DBN.pxd
└── DBN.pyx
├── MANIFEST.in
├── requirements.txt
├── notebooks
├── dbn.png
├── styles
│ ├── bmh_matplotlibrc.json
│ ├── custom.css
│ └── matplotlibrc
└── DBN.ipynb
├── tests
├── test_DBN.py
├── fixtures
│ ├── eighty_skus
│ │ └── judgments.gz
│ ├── all_clicks_set
│ │ └── judgments.gz
│ └── null_test
│ │ └── judgments_test_null.gz
├── conftest.py
└── test_cy_DBN.pyx
├── setup.cfg
├── .flake8
├── .coveragerc
├── scripts
└── build_wheels.sh
├── .travis.yml
├── Makefile
├── LICENSE
├── .gitignore
├── setup.py
└── README.md
/pyClickModels/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 |
--------------------------------------------------------------------------------
/pyClickModels/__version__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.0.2'
2 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cython
2 | wheel
3 | numpy
4 | ujson
5 |
--------------------------------------------------------------------------------
/notebooks/dbn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WillianFuks/pyClickModels/HEAD/notebooks/dbn.png
--------------------------------------------------------------------------------
/tests/test_DBN.py:
--------------------------------------------------------------------------------
1 | def test_DBN():
2 | from test_cy_DBN import run_tests
3 | run_tests()
4 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | license_file = LICENSE
3 |
4 | [isort]
5 | known_first_party = pyClickModels
6 | default_section = THIRDPARTY
7 |
--------------------------------------------------------------------------------
/tests/fixtures/eighty_skus/judgments.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WillianFuks/pyClickModels/HEAD/tests/fixtures/eighty_skus/judgments.gz
--------------------------------------------------------------------------------
/tests/fixtures/all_clicks_set/judgments.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WillianFuks/pyClickModels/HEAD/tests/fixtures/all_clicks_set/judgments.gz
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length=90
3 | filename = *.pyx,*.px*
4 | exclude = .eggs,*.egg,build,*.pxd
5 | ignore = E901,E225,E226,E227,E999,W504
6 |
--------------------------------------------------------------------------------
/tests/fixtures/null_test/judgments_test_null.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WillianFuks/pyClickModels/HEAD/tests/fixtures/null_test/judgments_test_null.gz
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | plugins = Cython.Coverage
3 | include =
4 | pyClickModels/*
5 | omit =
6 | tests/*
7 | pyClickModels/__version__.py
8 |
9 | [report]
10 | show_missing = true
11 | exclude_lines =
12 | pragma: no cover
13 |
--------------------------------------------------------------------------------
/notebooks/styles/bmh_matplotlibrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "lines.linewidth": 2.0,
3 | "axes.edgecolor": "#bcbcbc",
4 | "patch.linewidth": 0.5,
5 | "legend.fancybox": true,
6 | "axes.color_cycle": [
7 | "#348ABD",
8 | "#A60628",
9 | "#7A68A6",
10 | "#467821",
11 | "#CF4457",
12 | "#188487",
13 | "#E24A33"
14 | ],
15 | "axes.facecolor": "#eeeeee",
16 | "axes.labelsize": "large",
17 | "axes.grid": true,
18 | "patch.edgecolor": "#eeeeee",
19 | "axes.titlesize": "x-large",
20 | "svg.fonttype": "path",
21 | "examples.directory": ""
22 | }
--------------------------------------------------------------------------------
/scripts/build_wheels.sh:
--------------------------------------------------------------------------------
1 | docker run -v $(pwd):/pyClickModels quay.io/pypa/manylinux1_x86_64 sh -c '''
2 | yum update
3 | yum install -y json-c-devel
4 |
5 | cd /pyClickModels
6 |
7 | for PYVER in /opt/python/*/bin/; do
8 | if [[ $PYVER != *"27"* ]]; then
9 | "${PYVER}/pip" install -U pip
10 | "${PYVER}/pip" install -U setuptools
11 | "${PYVER}/pip" install -r requirements.txt
12 | "${PYVER}/python" setup.py sdist bdist_wheel
13 | fi
14 | done
15 |
16 | for whl in dist/*.whl; do
17 | auditwheel repair "$whl" --plat "manylinux2010_x86_64" -w dist/
18 | rm $whl
19 | done
20 | '''
21 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | matrix:
4 | include:
5 | - python: 3.6
6 | - python: 3.7
7 | - python: 3.8
8 |
9 | before_install:
10 | - sudo apt-get -y install libjson0 libjson0-dev
11 | addons:
12 | apt:
13 | update: true
14 | sources:
15 | - ubuntu-toolchain-r-test
16 | packages:
17 | - g++-7
18 |
19 | install:
20 | - pip install -U setuptools cython coveralls
21 |
22 | script:
23 | - |
24 | if [[ $TRAVIS_PYTHON_VERSION == 3.8 ]]; then
25 | make isort-check
26 | make flake8
27 | fi
28 | python setup.py test
29 |
30 | after_success:
31 | - |
32 | if [[ $TRAVIS_PYTHON_VERSION == 3.8 ]]; then
33 | travis_wait 30 python setup.py test --coverage=true && coveralls
34 | else echo failed
35 | fi
36 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: flake8 coverage coverage-html test publish
2 |
3 | flake8:
4 | pip install -U flake8
5 | flake8 pyClickModels
6 |
7 | isort:
8 | pip install -U isort
9 | isort -rc pyClickModels
10 | isort -rc tests
11 |
12 | isort-check:
13 | pip install -U isort
14 | isort -ns __init__.py -rc -c -df -p pyClickModels pyClickModels tests
15 |
16 | coverage:
17 | python setup.py test --coverage=true
18 |
19 | coverage-html:
20 | python setup.py test --coverage=true --html=true
21 |
22 | test:
23 | python setup.py test
24 |
25 | publish:
26 | pip install -U setuptools
27 | pip install -U wheel
28 | pip install 'twine>=1.5.0'
29 | pip install auditwheel
30 | sh ./scripts/build_wheels.sh
31 | #twine upload --repository testpypi dist/*
32 | twine upload dist/*
33 | #rm -fr build dist .egg *.egg-info
34 |
--------------------------------------------------------------------------------
/pyClickModels/jsonc.pxd:
--------------------------------------------------------------------------------
1 | cdef extern from "/usr/include/json-c/json.h":
2 | struct json_object:
3 | pass
4 |
5 | ctypedef bint json_bool
6 | json_object *json_tokener_parse(const char *str)
7 | json_bool json_object_object_get_ex(const json_object *obj, const char *key, json_object **value)
8 | const char *json_object_get_string(json_object *jso)
9 |
10 | struct lh_entry:
11 | void *k
12 | void *v
13 | lh_entry *next
14 |
15 | struct lh_table:
16 | int size
17 | lh_entry *head
18 |
19 | lh_table *json_object_get_object(const json_object *jso)
20 |
21 | void *lh_entry_k(lh_entry *entry)
22 | size_t json_object_array_length(const json_object *obj)
23 | json_object *json_object_array_get_idx(const json_object *jso, size_t idx)
24 | int json_object_get_int(const json_object *obj)
25 | int json_object_put(json_object *obj)
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Willian Fuks
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/notebooks/styles/custom.css:
--------------------------------------------------------------------------------
1 |
62 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 | *.c
9 | *.cpp
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | pip-wheel-metadata/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .nox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | *.py,cover
53 | .hypothesis/
54 | .pytest_cache/
55 | *.html
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | target/
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # IPython
84 | profile_default/
85 | ipython_config.py
86 |
87 | # pyenv
88 | .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98 | __pypackages__/
99 |
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 |
104 | # SageMath parsed files
105 | *.sage.py
106 |
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 |
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 |
120 | # Rope project settings
121 | .ropeproject
122 |
123 | # mkdocs documentation
124 | /site
125 |
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 |
131 | # Pyre type checker
132 | .pyre/
133 |
134 | # Cython annotations
135 | pyClickModels/*.html
136 |
137 | # json-c building folders
138 | json-c-build/
139 | json-c/
140 |
--------------------------------------------------------------------------------
/pyClickModels/DBN.pxd:
--------------------------------------------------------------------------------
1 | from libcpp.string cimport string
2 | from libcpp.unordered_map cimport unordered_map
3 | from libcpp.vector cimport vector
4 |
5 | from pyClickModels.jsonc cimport *
6 |
7 |
8 | cdef class DBNModel:
9 | cpdef void fit(self, str input_folder, int iters=*)
10 | cpdef void export_judgments(self, str output, str format_=*)
11 | cdef:
12 | float gamma_param
13 | unordered_map[string, unordered_map[string, float]] alpha_params
14 | unordered_map[string, unordered_map[string, float]] sigma_params
15 | string get_search_context_string(self, lh_table *tbl)
16 | void compute_cr(self, string *query, json_object *sessions, unordered_map[string, unordered_map[string, float]] *cr_dict)
17 | float *get_param(self, string param, string *query=*, string *doc=*)
18 | vector[float] build_e_r_vector(self, json_object *clickstream, string *query, unordered_map[string, float] *cr_dict)
19 | vector[float] build_X_r_vector(self, json_object *clisktream, string *query)
20 | vector[float] build_e_r_vector_given_CP(self, json_object *clickstream, unsigned int idx, string *query)
21 | float compute_cp_p(self, json_object *clickstream, unsigned int idx, string *query, vector[float] *e_r_array_given_CP, unordered_map[string, float] *cr_dict)
22 | vector[float] build_CP_vector_given_e(self, json_object *session, string *query, unordered_map[string, float] *cr_dict)
23 | int get_last_r(self, json_object *clickstream, const char *event=*)
24 | void update_tmp_alpha(self, int r, string *query, json_object *doc_data, vector[float] *e_r_vector, vector[float] *X_r_vector, int last_r, unordered_map[string, vector[float]] *tmp_alpha_param)
25 | void update_tmp_sigma(self, string *query, int r, json_object *doc_data, vector[float] *X_r_vector, int last_r, unordered_map[string, vector[float]] *tmp_sigma_param)
26 | void update_tmp_gamma(self, int r, int last_r, json_object *doc_data, string *query, vector[float] *cp_vector_given_e, vector[float] *e_r_vector_given_CP, unordered_map[string, float] *cr_dict, vector[float] *tmp_gamma_param)
27 | void update_alpha_param(self, string *query, unordered_map[string, vector[float]] *tmp_alpha_param)
28 | void update_sigma_param(self, string *query, unordered_map[string, vector[float]] *tmp_sigma_param)
29 | void update_gamma_param(self, vector[float] *tmp_gamma_param)
30 | void update_tmp_params(self, json_object *session, unordered_map[string, vector[float]] *tmp_alpha_param, unordered_map[string, vector[float]] *tmp_sigma_param, vector[float] *tmp_gamma_param, string *query, unordered_map[string, float] *cr_dict)
31 | void restart_tmp_params(self, unordered_map[string, vector[float]] *tmp_alpha_param, unordered_map[string, vector[float]] *tmp_sigma_param, vector[float] *tmp_gamma_param)
32 |
33 | cdef class Factor:
34 | cdef:
35 | unsigned int r
36 | unsigned int last_r
37 | bint click
38 | bint purchase
39 | float alpha
40 | float sigma
41 | float gamma
42 | float cr
43 | vector[float] *e_r_vector_given_CP
44 | vector[float] *cp_vector_given_e
45 | float compute_factor(self, bint x, bint y, bint z)
46 | cinit(self, unsigned int r, unsigned int last_r, bint click, bint purchase, float alpha, float sigma, float gamma, float cr, vector[float] *e_r_vector_given_CP, vector[float] *cp_vector_given_e)
47 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import gzip
2 | import json
3 | import tempfile
4 |
5 | import numpy as np
6 | import pytest
7 |
8 |
9 | @pytest.mark.slow
10 | def build_DBN_test_data(users=10, docs=10, queries=2):
11 | # first z column is alpha, second is sigma and third is purchase rate.
12 | params = np.random.random(size=(queries, docs, 3))
13 | persistence = 0.7
14 |
15 | final_result = []
16 | for q in range(queries):
17 | inner_result = {
18 | "search_keys": {
19 | "search_term": q,
20 | "region": "north",
21 | "favorite_size": "L"
22 | },
23 | "judgment_keys": []
24 | }
25 | for u in range(users):
26 | session = []
27 | counter = 0
28 | tmp_docs = list(range(docs))
29 | np.random.shuffle(tmp_docs)
30 | stopped_examining = False
31 | while True:
32 | counter += 1
33 | if counter > docs:
34 | break
35 | doc = tmp_docs.pop()
36 | if stopped_examining:
37 | data = {
38 | 'click': 0,
39 | 'purchase': 0,
40 | 'doc': str(doc)
41 | }
42 | session.append(data)
43 | continue
44 | persist = np.random.random()
45 | satisfied = np.random.random()
46 | click_event = np.random.random()
47 | purchase_event = np.random.random()
48 | observed_click = 1 if click_event < params[q, doc, 0] else 0
49 | observed_purchase = (
50 | 1 if observed_click and purchase_event < params[q, doc, 2] else 0
51 | )
52 | data = {
53 | 'click': observed_click,
54 | 'purchase': observed_purchase,
55 | 'doc': str(doc)
56 | }
57 | session.append(data)
58 | # if clicked then there's chance user is satisfied
59 | if observed_click:
60 | # user is certainly satisfied
61 | if observed_purchase:
62 | stopped_examining = True
63 | if satisfied < params[q, doc, 1]:
64 | stopped_examining = True
65 | else:
66 | if persist > persistence:
67 | stopped_examining = True
68 | # if didn't click then only continue browsing given persistence
69 | else:
70 | if persist > persistence:
71 | stopped_examining = True
72 | inner_result['judgment_keys'].append({'session': session})
73 | final_result.append(inner_result)
74 | tmp_folder = tempfile.TemporaryDirectory()
75 | tmp_folder.name = '/tmp'
76 | half_results = int(len(final_result) / 2)
77 | with gzip.GzipFile(tmp_folder.name + '/judgments_model_test_data_1.gz', 'wb') as f:
78 | for row in final_result[:half_results]:
79 | f.write(json.dumps(row).encode() + '\n'.encode())
80 |
81 | with gzip.GzipFile(tmp_folder.name + '/judgments_model_test_data_2.gz', 'wb') as f:
82 | for row in final_result[half_results:]:
83 | f.write(json.dumps(row).encode() + '\n'.encode())
84 | return persistence, params, tmp_folder
85 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | import os
4 | import sys
5 | from codecs import open
6 | from setuptools import setup
7 | from setuptools.command.test import test as TestCommand
8 | from distutils.extension import Extension
9 | import Cython.Compiler.Options
10 | from Cython.Distutils import build_ext
11 | from Cython.Build import cythonize
12 |
13 |
14 | here = os.path.abspath(os.path.dirname(__file__))
15 | Cython.Compiler.Options.annotate = True
16 |
17 | _version = {}
18 | _version_path = os.path.join(here, 'pyClickModels', '__version__.py')
19 |
20 | with open(_version_path, 'r', 'utf-8') as f:
21 | exec(f.read(), _version)
22 |
23 | with open('README.md', 'r', 'utf-8') as f:
24 | readme = f.read()
25 |
26 |
27 | if sys.argv[-1] == 'publish':
28 | """
29 | Deploy to PyPI is still somewhat manual. It runs locally on Docker instead of relying
30 | on Travis.
31 | """
32 | os.system('./scripts/build_wheels.sh')
33 | os.system('python setup.py sdist bdist_wheel')
34 | os.system('twine upload -r pypitest dist/*')
35 | sys.exit()
36 |
37 |
38 | def build_define_macros():
39 | """
40 | Macro CYTHON_TRACE is set to True so coverage report is available. More info in:
41 |
42 | https://stackoverflow.com/questions/50967268/cython-generating-coverage-for-pyx-file
43 | """
44 | args_ = sys.argv
45 | if len(args_) > 1:
46 | command = args_[1]
47 | define_macros = [('CYTHON_TRACE', '1')] if command == 'test' else []
48 | return define_macros
49 |
50 |
51 | define_macros = build_define_macros()
52 |
53 |
54 | class PyTest(TestCommand):
55 |
56 | user_options = [
57 | ('coverage=', None, 'Runs coverage report.'),
58 | ('html=', None, 'Saves result to html report.'),
59 | ]
60 |
61 | def initialize_options(self):
62 | TestCommand.initialize_options(self)
63 | self.pytest_args = []
64 | self.coverage = False
65 | self.html = False
66 |
67 | def finalize_options(self):
68 | TestCommand.finalize_options(self)
69 |
70 | if self.coverage:
71 | self.pytest_args.extend(['--cov-config', '.coveragerc'])
72 | self.pytest_args.extend([
73 | '--cov', 'pyClickModels', '--cov-report', 'term-missing'])
74 |
75 | if self.html:
76 | self.pytest_args.extend(['--cov-report', 'html'])
77 |
78 | self.pytest_args.extend(['-p', 'no:warnings'])
79 |
80 | def run_tests(self):
81 | import pytest
82 |
83 | errno = pytest.main(self.pytest_args)
84 | sys.exit(errno)
85 |
86 |
87 | ext_modules = [
88 | Extension(
89 | 'pyClickModels.DBN',
90 | ['pyClickModels/DBN.pyx'],
91 | language='c++',
92 | libraries=['json-c'],
93 | include_dirs=['pyClickModels'],
94 | define_macros=define_macros,
95 | extra_compile_args=["-std=c++11"],
96 | extra_link_args=["-std=c++11"]
97 | ),
98 | Extension(
99 | 'tests.test_cy_DBN',
100 | ['tests/test_cy_DBN.pyx'],
101 | language='c++',
102 | libraries=['json-c'],
103 | extra_compile_args=["-std=c++11"],
104 | extra_link_args=["-std=c++11"]
105 | )
106 | ]
107 |
108 | install_requires = [
109 | 'cython',
110 | 'numpy',
111 | 'ujson'
112 | ]
113 |
114 | tests_require = [
115 | 'pytest',
116 | 'pytest-cov',
117 | 'mock'
118 | ]
119 |
120 | setup_requires = [
121 | 'flake8',
122 | 'isort',
123 | 'pytest-runner'
124 | ]
125 |
126 | extras_require = {
127 | 'testing': tests_require
128 | }
129 |
130 | compiler_directives = {
131 | 'language_level': '3',
132 | 'binding': False,
133 | 'boundscheck': False,
134 | 'wraparound': False,
135 | 'cdivision': True,
136 | 'linetrace': True
137 | }
138 |
139 | packages = ['pyClickModels']
140 |
141 | setup(
142 | name='pyClickModels',
143 | version=_version['__version__'],
144 | author='Willian Fuks',
145 | author_email='willian.fuks@gmail.com',
146 | description='ClickModels for Search Engines Implemented on top of Cython.',
147 | packages=packages,
148 | include_package_data=True,
149 | package_data={
150 | 'pyClickModels': ['*.pxd']
151 | },
152 | long_description=readme,
153 | long_description_content_type='text/markdown',
154 | install_requires=install_requires,
155 | tests_require=tests_require,
156 | setup_requires=setup_requires,
157 | license='MIT',
158 | ext_modules=cythonize(
159 | ext_modules,
160 | compiler_directives=compiler_directives
161 | ),
162 | cmdclass={
163 | 'build_ext': build_ext,
164 | 'test': PyTest
165 | },
166 | zip_safe=False,
167 | classifiers=[
168 | 'Development Status :: 3 - Alpha',
169 | 'Environment :: Console',
170 | 'Intended Audience :: Developers',
171 | 'Intended Audience :: Science/Research',
172 | 'License :: OSI Approved :: MIT License',
173 | 'Natural Language :: English',
174 | 'Operating System :: POSIX :: Linux',
175 | 'Programming Language :: Python :: 3.5',
176 | 'Programming Language :: Python :: 3.6',
177 | 'Programming Language :: Python :: 3.7',
178 | 'Programming Language :: Python :: 3.8',
179 | 'Programming Language :: Python :: Implementation :: CPython',
180 | 'Programming Language :: Cython',
181 | 'Topic :: Scientific/Engineering',
182 | ],
183 | )
184 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pyClickModels [](https://travis-ci.org/WillianFuks/pyClickModels) [](https://coveralls.io/github/WillianFuks/pyClickModels?branch=master) [](https://badge.fury.io/py/pyClickModels) [](https://pypi.python.org/pypi/pyClickModels) [](https://github.com/WillianFuks/pyClickModels/blob/master/LICENSE)
2 |
3 | A Cython implementation of [ClickModels](https://github.com/varepsilon/clickmodels) that uses Probabilistic Graphical Models to infer user behavior when interacting with Search Page Results (Ranking).
4 |
5 | ## How It Works
6 |
7 | ClickModels uses the concept of [Probabilistic Graphical Models](https://en.wikipedia.org/wiki/Graphical_model) to model components that describe the interactions between users and a list of items ranked by a set of retrieval rules.
8 |
9 | These models tend to be useful when it's desired to understand whether a given document is a good match for a given search query or not which is also known in literature as *Judgments* grades. This is possible through evaluating past observed clicks and the positions at which the document appeared on the results pages for each query.
10 |
11 | There are several [proposed approaches](https://clickmodels.weebly.com/uploads/5/2/2/5/52257029/mc2015-clickmodels.pdf) to handle this problem. This repository implements a Dynamic Bayesian Network, similar to [previous works](https://github.com/varepsilon/clickmodels) also done in Python:
12 |
13 | 
14 |
15 | Main differences are:
16 |
17 | 1. **Implemented on top of Cython**: solutions already public available rely on CPython integrated with PyPy for additional speed ups. Unfortunatelly this still might not be good enough in terms of performance. To work on that, this implementation relies 100% on C/C++ for further optimization in speed. Despite not having an official benchmark, it's expected an improvement of **15x** ~ **18x** on top of CPython (same data lead to an increase of ~3x when using PyPy).
18 | 2. **Memory Friendly**: expects input data to follow a JSON format with all sessions of clickstream already expressed for each row. This saves memory and allows for the library to process bigger amounts of data.
19 | 3. **Purchase variable**: as businesses such as eCommerces can greately benefit from better understanding their search engine, this repository added the variable Purchase to further describe customers behaviors.
20 |
21 | The file [notebooks/DBN.ipynb](notebooks/DBN.ipynb) has a complete description of how the model has been implemented along with all the mathematics involved.
22 |
23 |
24 |
25 | ## Instalation
26 |
27 | As this project relies on binaries compiled by Cython, currently only Linux (manylinux) platform is supported. It can be installed with:
28 |
29 | pip install pyClickModels
30 |
31 | ## Getting Started
32 |
33 | ### Input Data
34 |
35 | pyClickModels expects input data to be stored in a set of compressed `gz` files located on the same folder. They all should start with the string "judgments", for instance, `judgments0.gz`.
36 | Each file should contain line separated JSONs. The following is an example of each JSON line:
37 |
38 | ```json
39 | {
40 | "search_keys": {
41 | "search_term": "blue shoes",
42 | "region": "south",
43 | "favorite_brand": "super brand",
44 | "user_size": "L",
45 | "avg_ticket": 10
46 | },
47 | "judgment_keys": [
48 | {
49 | "session": [
50 | {"click": 0, "purchase": 0, "doc": "doc0"}
51 | {"click": 1, "purchase": 0, "doc": "doc1"}
52 | {"click": 1, "purchase": 1, "doc": "doc2"}
53 | ]
54 | },
55 | {
56 | "session": [
57 | {"click": 1, "purchase": 0, "doc": "doc0"}
58 | {"click": 0, "purchase": 0, "doc": "doc1"}
59 | {"click": 0, "purchase": 0, "doc": "doc2"}
60 | ]
61 | }
62 | ]
63 | }
64 | ```
65 |
66 | The key `search_keys` sets the context for the search. In the above example, a given customer (or cluster of customers with the same context) searched for `blue shoes`. Their region is `south` (it could be any chosen value), favorite brand is `super brand` and so on.
67 |
68 | These keys sets the context for which the search happened. When pyClickModels runs its optimization, it will consider all the context at once. This means that the Judgments obtained are also on the whole context setting.
69 |
70 | If no context is desired, just use `{"search_keys": {"search_term": "user search"}}`.
71 |
72 | There's no required schema here which means the library loops through all keys available in `search_keys` and builds the optimization process considering the whole context as a single query.
73 |
74 | As for the `judgment_keys`, this is a list of sessions. The key `session` is mandatory. Each session contains the clickstream of users (if the variable purchase is not required set it to 0).
75 |
76 | For running DBN from pyClickModels, here's a simple example:
77 |
78 | ```python
79 | from pyClickModels.DBN import DBN
80 |
81 | model = DBN()
82 | model.fit(input_folder="/tmp/clicks_data/", iters=10)
83 | model.export_judgments("/tmp/output.gz")
84 | ```
85 |
86 | Output file will contain a NEWLINE JSON separated file with the judgments for each query and each document observed for that query, i.e.:
87 |
88 | ```json
89 | {"search_term:blue shoes|region:south|brand:super brand": {"doc0": 0.2, "doc1": 0.3, "doc2": 0.4}}
90 | {"search_term:query|region:north|brand:other_brand": {"doc0": 0.0, "doc1": 0.0, "doc2": 0.1}}
91 | ```
92 |
93 | Judgments here varies between 0 and 1. Some libraries requires it to range between integers 0 and 4. Choose a proper transformation in this case that better suits your data.
94 |
95 | ## Warnings
96 |
97 | **This library is still alpha!** Use it with caution. It's been fully unittested but still parts of it uses pure C whose exceptions might not have been fully considered yet. It's recommended to, before using this library in production evironments, to fully test it with different datasets and sizes to evaluate how it performs.
98 |
99 | ## Contributing
100 |
101 | Contributions are very welcome! Also, if you find bugs, please report them :).
102 |
--------------------------------------------------------------------------------
/notebooks/styles/matplotlibrc:
--------------------------------------------------------------------------------
1 | ### MATPLOTLIBRC FORMAT
2 |
3 | # This is a sample matplotlib configuration file - you can find a copy
4 | # of it on your system in
5 | # site-packages/matplotlib/mpl-data/matplotlibrc. If you edit it
6 | # there, please note that it will be overwritten in your next install.
7 | # If you want to keep a permanent local copy that will not be
8 | # overwritten, place it in HOME/.matplotlib/matplotlibrc (unix/linux
9 | # like systems) and C:\Documents and Settings\yourname\.matplotlib
10 | # (win32 systems).
11 | #
12 | # This file is best viewed in a editor which supports python mode
13 | # syntax highlighting. Blank lines, or lines starting with a comment
14 | # symbol, are ignored, as are trailing comments. Other lines must
15 | # have the format
16 | # key : val # optional comment
17 | #
18 | # Colors: for the color values below, you can either use - a
19 | # matplotlib color string, such as r, k, or b - an rgb tuple, such as
20 | # (1.0, 0.5, 0.0) - a hex string, such as ff00ff or #ff00ff - a scalar
21 | # grayscale intensity such as 0.75 - a legal html color name, eg red,
22 | # blue, darkslategray
23 |
24 | #### CONFIGURATION BEGINS HERE
25 |
26 | # the default backend; one of GTK GTKAgg GTKCairo GTK3Agg GTK3Cairo
27 | # CocoaAgg FltkAgg MacOSX QtAgg Qt4Agg TkAgg WX WXAgg Agg Cairo GDK PS
28 | # PDF SVG Template
29 | # You can also deploy your own backend outside of matplotlib by
30 | # referring to the module name (which must be in the PYTHONPATH) as
31 | # 'module://my_backend'
32 | backend : TkAgg
33 |
34 | # If you are using the Qt4Agg backend, you can choose here
35 | # to use the PyQt4 bindings or the newer PySide bindings to
36 | # the underlying Qt4 toolkit.
37 | #backend.qt4 : PyQt4 # PyQt4 | PySide
38 |
39 | # Note that this can be overridden by the environment variable
40 | # QT_API used by Enthought Tool Suite (ETS); valid values are
41 | # "pyqt" and "pyside". The "pyqt" setting has the side effect of
42 | # forcing the use of Version 2 API for QString and QVariant.
43 |
44 | # if you are running pyplot inside a GUI and your backend choice
45 | # conflicts, we will automatically try to find a compatible one for
46 | # you if backend_fallback is True
47 | #backend_fallback: True
48 |
49 | #interactive : False
50 | #toolbar : toolbar2 # None | toolbar2 ("classic" is deprecated)
51 | #timezone : UTC # a pytz timezone string, eg US/Central or Europe/Paris
52 |
53 | # Where your matplotlib data lives if you installed to a non-default
54 | # location. This is where the matplotlib fonts, bitmaps, etc reside
55 | #datapath : /home/jdhunter/mpldata
56 |
57 |
58 | ### LINES
59 | # See http://matplotlib.org/api/artist_api.html#module-matplotlib.lines for more
60 | # information on line properties.
61 | lines.linewidth : 2.0 # line width in points
62 | #lines.linestyle : - # solid line
63 | #lines.color : blue # has no affect on plot(); see axes.color_cycle
64 | #lines.marker : None # the default marker
65 | #lines.markeredgewidth : 0.5 # the line width around the marker symbol
66 | #lines.markersize : 6 # markersize, in points
67 | #lines.dash_joinstyle : miter # miter|round|bevel
68 | #lines.dash_capstyle : butt # butt|round|projecting
69 | #lines.solid_joinstyle : miter # miter|round|bevel
70 | #lines.solid_capstyle : projecting # butt|round|projecting
71 | #lines.antialiased : True # render lines in antialised (no jaggies)
72 |
73 | ### PATCHES
74 | # Patches are graphical objects that fill 2D space, like polygons or
75 | # circles. See
76 | # http://matplotlib.org/api/artist_api.html#module-matplotlib.patches
77 | # information on patch properties
78 | patch.linewidth : 0.5 # edge width in points
79 | patch.facecolor : blue
80 | patch.edgecolor : eeeeee
81 | patch.antialiased : True
82 |
83 | ### FONT
84 | #
85 | # font properties used by text.Text. See
86 | # http://matplotlib.org/api/font_manager_api.html for more
87 | # information on font properties. The 6 font properties used for font
88 | # matching are given below with their default values.
89 | #
90 | # The font.family property has five values: 'serif' (e.g. Times),
91 | # 'sans-serif' (e.g. Helvetica), 'cursive' (e.g. Zapf-Chancery),
92 | # 'fantasy' (e.g. Western), and 'monospace' (e.g. Courier). Each of
93 | # these font families has a default list of font names in decreasing
94 | # order of priority associated with them.
95 | #
96 | # The font.style property has three values: normal (or roman), italic
97 | # or oblique. The oblique style will be used for italic, if it is not
98 | # present.
99 | #
100 | # The font.variant property has two values: normal or small-caps. For
101 | # TrueType fonts, which are scalable fonts, small-caps is equivalent
102 | # to using a font size of 'smaller', or about 83% of the current font
103 | # size.
104 | #
105 | # The font.weight property has effectively 13 values: normal, bold,
106 | # bolder, lighter, 100, 200, 300, ..., 900. Normal is the same as
107 | # 400, and bold is 700. bolder and lighter are relative values with
108 | # respect to the current weight.
109 | #
110 | # The font.stretch property has 11 values: ultra-condensed,
111 | # extra-condensed, condensed, semi-condensed, normal, semi-expanded,
112 | # expanded, extra-expanded, ultra-expanded, wider, and narrower. This
113 | # property is not currently implemented.
114 | #
115 | # The font.size property is the default font size for text, given in pts.
116 | # 12pt is the standard value.
117 | #
118 | #font.family : monospace
119 | #font.style : normal
120 | #font.variant : normal
121 | #font.weight : medium
122 | #font.stretch : normal
123 | # note that font.size controls default text sizes. To configure
124 | # special text sizes tick labels, axes, labels, title, etc, see the rc
125 | # settings for axes and ticks. Special text sizes can be defined
126 | # relative to font.size, using the following values: xx-small, x-small,
127 | # small, medium, large, x-large, xx-large, larger, or smaller
128 | #font.size : 12.0
129 | #font.serif : Bitstream Vera Serif, New Century Schoolbook, Century Schoolbook L, Utopia, ITC Bookman, Bookman, Nimbus Roman No9 L, Times New Roman, Times, Palatino, Charter, serif
130 | #font.sans-serif : Bitstream Vera Sans, Lucida Grande, Verdana, Geneva, Lucid, Arial, Helvetica, Avant Garde, sans-serif
131 | #font.cursive : Apple Chancery, Textile, Zapf Chancery, Sand, cursive
132 | #font.fantasy : Comic Sans MS, Chicago, Charcoal, Impact, Western, fantasy
133 | #font.monospace : Andale Mono, Nimbus Mono L, Courier New, Courier, Fixed, Terminal, monospace
134 |
135 |
136 | ### TEXT
137 | # text properties used by text.Text. See
138 | # http://matplotlib.org/api/artist_api.html#module-matplotlib.text for more
139 | # information on text properties
140 |
141 | #text.color : black
142 |
143 | ### LaTeX customizations. See http://www.scipy.org/Wiki/Cookbook/Matplotlib/UsingTex
144 | #text.usetex : False # use latex for all text handling. The following fonts
145 | # are supported through the usual rc parameter settings:
146 | # new century schoolbook, bookman, times, palatino,
147 | # zapf chancery, charter, serif, sans-serif, helvetica,
148 | # avant garde, courier, monospace, computer modern roman,
149 | # computer modern sans serif, computer modern typewriter
150 | # If another font is desired which can loaded using the
151 | # LaTeX \usepackage command, please inquire at the
152 | # matplotlib mailing list
153 | #text.latex.unicode : False # use "ucs" and "inputenc" LaTeX packages for handling
154 | # unicode strings.
155 | #text.latex.preamble : # IMPROPER USE OF THIS FEATURE WILL LEAD TO LATEX FAILURES
156 | # AND IS THEREFORE UNSUPPORTED. PLEASE DO NOT ASK FOR HELP
157 | # IF THIS FEATURE DOES NOT DO WHAT YOU EXPECT IT TO.
158 | # preamble is a comma separated list of LaTeX statements
159 | # that are included in the LaTeX document preamble.
160 | # An example:
161 | # text.latex.preamble : \usepackage{bm},\usepackage{euler}
162 | # The following packages are always loaded with usetex, so
163 | # beware of package collisions: color, geometry, graphicx,
164 | # type1cm, textcomp. Adobe Postscript (PSSNFS) font packages
165 | # may also be loaded, depending on your font settings
166 |
167 | #text.dvipnghack : None # some versions of dvipng don't handle alpha
168 | # channel properly. Use True to correct
169 | # and flush ~/.matplotlib/tex.cache
170 | # before testing and False to force
171 | # correction off. None will try and
172 | # guess based on your dvipng version
173 |
174 | #text.hinting : 'auto' # May be one of the following:
175 | # 'none': Perform no hinting
176 | # 'auto': Use freetype's autohinter
177 | # 'native': Use the hinting information in the
178 | # font file, if available, and if your
179 | # freetype library supports it
180 | # 'either': Use the native hinting information,
181 | # or the autohinter if none is available.
182 | # For backward compatibility, this value may also be
183 | # True === 'auto' or False === 'none'.
184 | text.hinting_factor : 8 # Specifies the amount of softness for hinting in the
185 | # horizontal direction. A value of 1 will hint to full
186 | # pixels. A value of 2 will hint to half pixels etc.
187 |
188 | #text.antialiased : True # If True (default), the text will be antialiased.
189 | # This only affects the Agg backend.
190 |
191 | # The following settings allow you to select the fonts in math mode.
192 | # They map from a TeX font name to a fontconfig font pattern.
193 | # These settings are only used if mathtext.fontset is 'custom'.
194 | # Note that this "custom" mode is unsupported and may go away in the
195 | # future.
196 | #mathtext.cal : cursive
197 | #mathtext.rm : serif
198 | #mathtext.tt : monospace
199 | #mathtext.it : serif:italic
200 | #mathtext.bf : serif:bold
201 | #mathtext.sf : sans
202 | mathtext.fontset : cm # Should be 'cm' (Computer Modern), 'stix',
203 | # 'stixsans' or 'custom'
204 | #mathtext.fallback_to_cm : True # When True, use symbols from the Computer Modern
205 | # fonts when a symbol can not be found in one of
206 | # the custom math fonts.
207 |
208 | #mathtext.default : it # The default font to use for math.
209 | # Can be any of the LaTeX font names, including
210 | # the special name "regular" for the same font
211 | # used in regular text.
212 |
213 | ### AXES
214 | # default face and edge color, default tick sizes,
215 | # default fontsizes for ticklabels, and so on. See
216 | # http://matplotlib.org/api/axes_api.html#module-matplotlib.axes
217 | #axes.hold : True # whether to clear the axes by default on
218 | axes.facecolor : eeeeee # axes background color
219 | axes.edgecolor : bcbcbc # axes edge color
220 | #axes.linewidth : 1.0 # edge linewidth
221 | axes.grid : True # display grid or not
222 | axes.titlesize : x-large # fontsize of the axes title
223 | axes.labelsize : large # fontsize of the x any y labels
224 | #axes.labelweight : normal # weight of the x and y labels
225 | #axes.labelcolor : black
226 | #axes.axisbelow : False # whether axis gridlines and ticks are below
227 | # the axes elements (lines, text, etc)
228 | #axes.formatter.limits : -7, 7 # use scientific notation if log10
229 | # of the axis range is smaller than the
230 | # first or larger than the second
231 | #axes.formatter.use_locale : False # When True, format tick labels
232 | # according to the user's locale.
233 | # For example, use ',' as a decimal
234 | # separator in the fr_FR locale.
235 | #axes.formatter.use_mathtext : False # When True, use mathtext for scientific
236 | # notation.
237 | #axes.unicode_minus : True # use unicode for the minus symbol
238 | # rather than hyphen. See
239 | # http://en.wikipedia.org/wiki/Plus_and_minus_signs#Character_codes
240 | axes.color_cycle : 348ABD, A60628, 7A68A6, 467821,D55E00, CC79A7, 56B4E9, 009E73, F0E442, 0072B2 # color cycle for plot lines
241 | # as list of string colorspecs:
242 | # single letter, long name, or
243 | # web-style hex
244 |
245 | #polaraxes.grid : True # display grid on polar axes
246 | #axes3d.grid : True # display grid on 3d axes
247 |
248 | ### TICKS
249 | # see http://matplotlib.org/api/axis_api.html#matplotlib.axis.Tick
250 | #xtick.major.size : 4 # major tick size in points
251 | #xtick.minor.size : 2 # minor tick size in points
252 | #xtick.major.width : 0.5 # major tick width in points
253 | #xtick.minor.width : 0.5 # minor tick width in points
254 | #xtick.major.pad : 4 # distance to major tick label in points
255 | #xtick.minor.pad : 4 # distance to the minor tick label in points
256 | #xtick.color : k # color of the tick labels
257 | #xtick.labelsize : medium # fontsize of the tick labels
258 | #xtick.direction : in # direction: in, out, or inout
259 |
260 | #ytick.major.size : 4 # major tick size in points
261 | #ytick.minor.size : 2 # minor tick size in points
262 | #ytick.major.width : 0.5 # major tick width in points
263 | #ytick.minor.width : 0.5 # minor tick width in points
264 | #ytick.major.pad : 4 # distance to major tick label in points
265 | #ytick.minor.pad : 4 # distance to the minor tick label in points
266 | #ytick.color : k # color of the tick labels
267 | #ytick.labelsize : medium # fontsize of the tick labels
268 | #ytick.direction : in # direction: in, out, or inout
269 |
270 |
271 | ### GRIDS
272 | #grid.color : black # grid color
273 | #grid.linestyle : : # dotted
274 | #grid.linewidth : 0.5 # in points
275 | #grid.alpha : 1.0 # transparency, between 0.0 and 1.0
276 |
277 | ### Legend
278 | legend.fancybox : True # if True, use a rounded box for the
279 | # legend, else a rectangle
280 | #legend.isaxes : True
281 | #legend.numpoints : 2 # the number of points in the legend line
282 | #legend.fontsize : large
283 | #legend.pad : 0.0 # deprecated; the fractional whitespace inside the legend border
284 | #legend.borderpad : 0.5 # border whitespace in fontsize units
285 | #legend.markerscale : 1.0 # the relative size of legend markers vs. original
286 | # the following dimensions are in axes coords
287 | #legend.labelsep : 0.010 # deprecated; the vertical space between the legend entries
288 | #legend.labelspacing : 0.5 # the vertical space between the legend entries in fraction of fontsize
289 | #legend.handlelen : 0.05 # deprecated; the length of the legend lines
290 | #legend.handlelength : 2. # the length of the legend lines in fraction of fontsize
291 | #legend.handleheight : 0.7 # the height of the legend handle in fraction of fontsize
292 | #legend.handletextsep : 0.02 # deprecated; the space between the legend line and legend text
293 | #legend.handletextpad : 0.8 # the space between the legend line and legend text in fraction of fontsize
294 | #legend.axespad : 0.02 # deprecated; the border between the axes and legend edge
295 | #legend.borderaxespad : 0.5 # the border between the axes and legend edge in fraction of fontsize
296 | #legend.columnspacing : 2. # the border between the axes and legend edge in fraction of fontsize
297 | #legend.shadow : False
298 | #legend.frameon : True # whether or not to draw a frame around legend
299 |
300 | ### FIGURE
301 | # See http://matplotlib.org/api/figure_api.html#matplotlib.figure.Figure
302 | figure.figsize : 11, 8 # figure size in inches
303 | figure.dpi : 100 # figure dots per inch
304 | #figure.facecolor : 0.75 # figure facecolor; 0.75 is scalar gray
305 | #figure.edgecolor : white # figure edgecolor
306 | #figure.autolayout : False # When True, automatically adjust subplot
307 | # parameters to make the plot fit the figure
308 |
309 | # The figure subplot parameters. All dimensions are a fraction of the
310 | # figure width or height
311 | #figure.subplot.left : 0.125 # the left side of the subplots of the figure
312 | #figure.subplot.right : 0.9 # the right side of the subplots of the figure
313 | #figure.subplot.bottom : 0.1 # the bottom of the subplots of the figure
314 | #figure.subplot.top : 0.9 # the top of the subplots of the figure
315 | #figure.subplot.wspace : 0.2 # the amount of width reserved for blank space between subplots
316 | #figure.subplot.hspace : 0.2 # the amount of height reserved for white space between subplots
317 |
318 | ### IMAGES
319 | #image.aspect : equal # equal | auto | a number
320 | #image.interpolation : bilinear # see help(imshow) for options
321 | #image.cmap : jet # gray | jet etc...
322 | #image.lut : 256 # the size of the colormap lookup table
323 | #image.origin : upper # lower | upper
324 | #image.resample : False
325 |
326 | ### CONTOUR PLOTS
327 | #contour.negative_linestyle : dashed # dashed | solid
328 |
329 | ### Agg rendering
330 | ### Warning: experimental, 2008/10/10
331 | #agg.path.chunksize : 0 # 0 to disable; values in the range
332 | # 10000 to 100000 can improve speed slightly
333 | # and prevent an Agg rendering failure
334 | # when plotting very large data sets,
335 | # especially if they are very gappy.
336 | # It may cause minor artifacts, though.
337 | # A value of 20000 is probably a good
338 | # starting point.
339 | ### SAVING FIGURES
340 | #path.simplify : True # When True, simplify paths by removing "invisible"
341 | # points to reduce file size and increase rendering
342 | # speed
343 | #path.simplify_threshold : 0.1 # The threshold of similarity below which
344 | # vertices will be removed in the simplification
345 | # process
346 | #path.snap : True # When True, rectilinear axis-aligned paths will be snapped to
347 | # the nearest pixel when certain criteria are met. When False,
348 | # paths will never be snapped.
349 |
350 | # the default savefig params can be different from the display params
351 | # Eg, you may want a higher resolution, or to make the figure
352 | # background white
353 | savefig.dpi : 300 # figure dots per inch
354 | #savefig.facecolor : white # figure facecolor when saving
355 | #savefig.edgecolor : white # figure edgecolor when saving
356 | #savefig.format : png # png, ps, pdf, svg
357 | #savefig.bbox : standard # 'tight' or 'standard'.
358 | #savefig.pad_inches : 0.1 # Padding to be used when bbox is set to 'tight'
359 |
360 | # tk backend params
361 | #tk.window_focus : False # Maintain shell focus for TkAgg
362 |
363 | # ps backend params
364 | #ps.papersize : letter # auto, letter, legal, ledger, A0-A10, B0-B10
365 | #ps.useafm : False # use of afm fonts, results in small files
366 | #ps.usedistiller : False # can be: None, ghostscript or xpdf
367 | # Experimental: may produce smaller files.
368 | # xpdf intended for production of publication quality files,
369 | # but requires ghostscript, xpdf and ps2eps
370 | #ps.distiller.res : 6000 # dpi
371 | #ps.fonttype : 3 # Output Type 3 (Type3) or Type 42 (TrueType)
372 |
373 | # pdf backend params
374 | #pdf.compression : 6 # integer from 0 to 9
375 | # 0 disables compression (good for debugging)
376 | #pdf.fonttype : 3 # Output Type 3 (Type3) or Type 42 (TrueType)
377 |
378 | # svg backend params
379 | #svg.image_inline : True # write raster image data directly into the svg file
380 | #svg.image_noscale : False # suppress scaling of raster data embedded in SVG
381 | #svg.fonttype : 'path' # How to handle SVG fonts:
382 | # 'none': Assume fonts are installed on the machine where the SVG will be viewed.
383 | # 'path': Embed characters as paths -- supported by most SVG renderers
384 | # 'svgfont': Embed characters as SVG fonts -- supported only by Chrome,
385 | # Opera and Safari
386 |
387 | # docstring params
388 | #docstring.hardcopy = False # set this when you want to generate hardcopy docstring
389 |
390 | # Set the verbose flags. This controls how much information
391 | # matplotlib gives you at runtime and where it goes. The verbosity
392 | # levels are: silent, helpful, debug, debug-annoying. Any level is
393 | # inclusive of all the levels below it. If your setting is "debug",
394 | # you'll get all the debug and helpful messages. When submitting
395 | # problems to the mailing-list, please set verbose to "helpful" or "debug"
396 | # and paste the output into your report.
397 | #
398 | # The "fileo" gives the destination for any calls to verbose.report.
399 | # These objects can a filename, or a filehandle like sys.stdout.
400 | #
401 | # You can override the rc default verbosity from the command line by
402 | # giving the flags --verbose-LEVEL where LEVEL is one of the legal
403 | # levels, eg --verbose-helpful.
404 | #
405 | # You can access the verbose instance in your code
406 | # from matplotlib import verbose.
407 | #verbose.level : silent # one of silent, helpful, debug, debug-annoying
408 | #verbose.fileo : sys.stdout # a log filename, sys.stdout or sys.stderr
409 |
410 | # Event keys to interact with figures/plots via keyboard.
411 | # Customize these settings according to your needs.
412 | # Leave the field(s) empty if you don't need a key-map. (i.e., fullscreen : '')
413 |
414 | #keymap.fullscreen : f # toggling
415 | #keymap.home : h, r, home # home or reset mnemonic
416 | #keymap.back : left, c, backspace # forward / backward keys to enable
417 | #keymap.forward : right, v # left handed quick navigation
418 | #keymap.pan : p # pan mnemonic
419 | #keymap.zoom : o # zoom mnemonic
420 | #keymap.save : s # saving current figure
421 | #keymap.quit : ctrl+w # close the current figure
422 | #keymap.grid : g # switching on/off a grid in current axes
423 | #keymap.yscale : l # toggle scaling of y-axes ('log'/'linear')
424 | #keymap.xscale : L, k # toggle scaling of x-axes ('log'/'linear')
425 | #keymap.all_axes : a # enable all axes
426 |
427 | ###ANIMATION settings
428 | #animation.writer : ffmpeg # MovieWriter 'backend' to use
429 | #animation.codec : mp4 # Codec to use for writing movie
430 | #animation.bitrate: -1 # Controls size/quality tradeoff for movie.
431 | # -1 implies let utility auto-determine
432 | #animation.frame_format: 'png' # Controls frame format used by temp files
433 | #animation.ffmpeg_path: 'ffmpeg' # Path to ffmpeg binary. Without full path
434 | # $PATH is searched
435 | #animation.ffmpeg_args: '' # Additional arugments to pass to mencoder
436 | #animation.mencoder_path: 'ffmpeg' # Path to mencoder binary. Without full path
437 | # $PATH is searched
438 | #animation.mencoder_args: '' # Additional arugments to pass to mencoder
--------------------------------------------------------------------------------
/pyClickModels/DBN.pyx:
--------------------------------------------------------------------------------
1 | # cython: linetrace=True
2 |
3 | import gzip
4 | import os
5 | import time
6 | from glob import glob
7 |
8 | import ujson
9 |
10 | from cython.operator cimport dereference, postincrement
11 | from libc.stdlib cimport RAND_MAX, rand, srand
12 | from libc.time cimport time as ctime
13 | from libcpp.string cimport string
14 | from libcpp.unordered_map cimport unordered_map
15 | from libcpp.vector cimport vector
16 |
17 | from pyClickModels.jsonc cimport (json_object, json_object_array_get_idx,
18 | json_object_array_length,
19 | json_object_get_int, json_object_get_string,
20 | json_object_object_get_ex, json_object_put,
21 | json_tokener_parse, lh_entry, lh_table)
22 |
23 | # Start by setting the seed for the random values required for initalizing the DBN
24 | # parameters.
25 | SEED = ctime(NULL)
26 | srand(SEED)
27 |
28 |
29 | cdef class Factor:
30 | """
31 | Helper class to implement the Factor component as discussed in:
32 |
33 | https://clickmodels.weebly.com/uploads/5/2/2/5/52257029/mc2015-clickmodels.pdf
34 |
35 | page 37 equation 4.43
36 |
37 | Args
38 | ----
39 | r: int
40 | Rank position in search results.
41 | last_r: int
42 | Last observed click or purchase from search results.
43 | click: bint
44 | purchase: bint
45 | alpha: float
46 | Updated values of alpha.
47 | sigma: float
48 | Updated values of sigma.
49 | gamma: float
50 | Updated value of gamma
51 | cr: float
52 | Conversion Rate of current document in session.
53 | vector[float] e_r_vector_given_CP*
54 | Probability that document at position r was examined (E_r=1) given clicks
55 | and purchases.
56 | vector[float] cp_vector_given_e*
57 | Probability of observing Clicks and Purchases at positions greater than
58 | r given that position r + 1 was examined.
59 | """
60 | # Use cinit instead of __cinit__ so to send pointers as input.
61 | cdef cinit(
62 | self,
63 | unsigned int r,
64 | unsigned int last_r,
65 | bint click,
66 | bint purchase,
67 | float alpha,
68 | float sigma,
69 | float gamma,
70 | float cr,
71 | vector[float] *e_r_vector_given_CP,
72 | vector[float] *cp_vector_given_e
73 | ):
74 | self.r = r
75 | self.last_r = last_r
76 | self.alpha = alpha
77 | self.sigma = sigma
78 | self.gamma = gamma
79 | self.click = click
80 | self.purchase = purchase
81 | self.cr = cr
82 | self.e_r_vector_given_CP = e_r_vector_given_CP
83 | self.cp_vector_given_e = cp_vector_given_e
84 |
85 | cdef float compute_factor(self, bint x, bint y, bint z):
86 | """
87 | Responsible for computing the following equation:
88 |
89 | P(E_r = x, S_r = y, E_{r+1} = z, C_{>=r+1}, P_{>=r+1} | C_{r},P_{>r} | E_{r+1})
131 | if not z:
132 | if self.last_r >= self.r + 1:
133 | return 0
134 | else:
135 | if self.r < self.cp_vector_given_e[0].size():
136 | result *= self.cp_vector_given_e[0][self.r]
137 | # P(E_r=x | Crand() / RAND_MAX
173 | return &self.gamma_param
174 | elif param == b'alpha':
175 | tmp = &self.alpha_params
176 | else:
177 | # param = b'sigma':
178 | tmp = &self.sigma_params
179 |
180 | # query not in map
181 | if tmp[0].find(query[0]) == tmp[0].end():
182 | # using c rand function as it's ~ 15 - 30 times faster than Python's random
183 | tmp[0][query[0]][doc[0]] = rand() / RAND_MAX
184 | # query is in map but document is not
185 | elif tmp[0][query[0]].find(doc[0]) == tmp[0][query[0]].end():
186 | tmp[0][query[0]][doc[0]] = rand() / RAND_MAX
187 |
188 | return &tmp[0][query[0]][doc[0]]
189 |
190 | cdef string get_search_context_string(self, lh_table *tbl):
191 | """
192 | In pyClickModels, the input data can contain not only the search the user
193 | inserted but also more information that describes the context of the search,
194 | such as the region of user, their favorite brands or average purchasing price
195 | and so on.
196 |
197 | The computation of Judgments happens, therefore, not only on top of the search
198 | term but also on the context at which the search was made.
199 |
200 | This method combines all those keys together so the optimization happens on
201 | a single string as the final query.
202 |
203 | Args
204 | ----
205 | search_keys: lh_table
206 | Context at which search happened, expressed in JSON. Example:
207 | `{"search_term": "query", "region": "northeast", "avg_ticket": 20}`
208 |
209 | Returns
210 | -------
211 | final_query: str
212 | string with sorted values joined by the `_` character.
213 | """
214 | cdef:
215 | string result
216 | char *k
217 | json_object *v
218 | lh_entry *entry = tbl.head
219 |
220 | k = entry.k
221 | v = entry.v
222 | # CPython now optimizes `+` operations. It's expected Cython will have the same
223 | # compilation rules.
224 | result = string(k) + string(b':') + string(json_object_get_string(v))
225 |
226 | entry = entry.next
227 | while entry:
228 | k = entry.k
229 | v = entry.v
230 | # Stores keys and values separated by ":" and then by "|". This is done so
231 | # there's a base vale for the input query as expressed by its complete
232 | # context (context here means possible keys that discriminate the search
233 | # such as the region of user, favorite brand, average ticket and so on.
234 | result = (
235 | result + string(b'|') + string(k) + string(b':') +
236 | string(json_object_get_string(v))
237 | )
238 | entry = entry.next
239 | return result
240 |
241 | cdef void compute_cr(self, string *query, json_object *sessions,
242 | unordered_map[string, unordered_map[string, float]] *cr_dict):
243 | """
244 | pyClickModels can also consider data related to purchases events. This method
245 | computes the conversion rate (cr) that each document had on each observed
246 | query context.
247 |
248 | Args
249 | ----
250 | query: *string
251 | sessions: *json_object
252 | List of session ids where each session contains all documents a given user
253 | interacted with along with clicks and purchases
254 | cr_dict: unordered_map[string, float]]
255 | Map of documents and their respective conversion rates for each specific
256 | query.
257 | """
258 | # If query is already available on cr_dict then it's not required to be
259 | # processed again.
260 | if cr_dict[0].find(query[0]) != cr_dict[0].end():
261 | return
262 |
263 | cdef:
264 | size_t nsessions = json_object_array_length(sessions)
265 | size_t nclicks
266 | json_object *jso_session
267 | json_object *clickstream
268 | json_object *doc_data
269 | json_object *tmp_jso
270 | string doc
271 | bint click
272 | bint purchase
273 | unsigned int i
274 | unsigned int j
275 | vector[int] vec
276 | unordered_map[string, vector[int]] tmp_cr
277 | unordered_map[string, vector[int]].iterator it
278 | float cr
279 |
280 | for i in range(nsessions):
281 | jso_session = json_object_array_get_idx(sessions, i)
282 | json_object_object_get_ex(jso_session, b'session', &clickstream)
283 |
284 | nclicks = json_object_array_length(clickstream)
285 |
286 | for j in range(nclicks):
287 | doc_data = json_object_array_get_idx(clickstream, j)
288 |
289 | json_object_object_get_ex(doc_data, b'doc', &tmp_jso)
290 | doc = json_object_get_string(tmp_jso)
291 |
292 | json_object_object_get_ex(doc_data, b'click', &tmp_jso)
293 | click = json_object_get_int(tmp_jso)
294 |
295 | json_object_object_get_ex(doc_data, b'purchase', &tmp_jso)
296 | purchase = json_object_get_int(tmp_jso)
297 |
298 | # First time seeing the document. Prepare a mapping to store total
299 | # purchases and total times the document appeared on a given query
300 | # across all sessions.
301 | if tmp_cr.find(doc) == tmp_cr.end():
302 | tmp_cr[doc] = vector[int](2)
303 | tmp_cr[doc][0] = 0
304 | tmp_cr[doc][1] = 0
305 |
306 | if purchase:
307 | tmp_cr[doc][0] += 1
308 |
309 | tmp_cr[doc][1] += 1
310 |
311 | it = tmp_cr.begin()
312 | while(it != tmp_cr.end()):
313 | cr = dereference(it).second[0] / dereference(it).second[1]
314 | cr_dict[0][query[0]][dereference(it).first] = cr
315 | postincrement(it)
316 |
317 | cdef vector[float] build_e_r_vector(
318 | self,
319 | json_object *clickstream,
320 | string *query,
321 | unordered_map[string, float] *cr_dict,
322 | ):
323 | """
324 | Computes the probability of each document in user session being examined.
325 |
326 | The equation implemented is:
327 |
328 | $P(E_{r+1}=1) = \\epsilon_r \\gamma \\left((1 - \\alpha_{uq}) +
329 | (1 - \\sigma_{uq})(1 - cr_{uq})\\alpha_{uq} \\right)$
330 |
331 | Args
332 | ----
333 | clickstream: json_object *
334 | JSON obect representing the user clickstream. Example:
335 | [
336 | {"doc": "doc0", "click": 0, "purchase": 0},
337 | {"doc": "doc1", "click": 1, "purchase": 0}
338 | ]
339 | query: string
340 | cr_dict: unordered_map[string, float] *
341 | Conversion rates of each document for a given query. Example:
342 | {"doc0": 0.2, "doc1": 0.51}
343 |
344 | Returns
345 | -------
346 | e_r_vector: vector[float]
347 | vector to receive final probabilities
348 | """
349 | cdef:
350 | size_t total_docs = json_object_array_length(clickstream)
351 | string doc
352 | unsigned int r
353 | json_object *tmp
354 | float *alpha
355 | float *beta
356 | float *gamma
357 | float cr
358 | float e_r_next
359 | # Add +1 to total_docs to compute P(E_{r+1})
360 | vector[float] e_r_vector = vector[float](total_docs + 1)
361 |
362 | # Probability of Examination at r=0 (first document in search page results)
363 | # is always 100%
364 | e_r_vector[0] = 1
365 |
366 | # Compute P(E_{r+1}) so add +1 to the total docs
367 | for r in range(1, total_docs + 1):
368 | json_object_object_get_ex(
369 | json_object_array_get_idx(clickstream, r - 1),
370 | b'doc',
371 | &tmp
372 | )
373 | doc = json_object_get_string(tmp)
374 | alpha = self.get_param(b'alpha', query, &doc)
375 | sigma = self.get_param(b'sigma', query, &doc)
376 | gamma = self.get_param(b'gamma')
377 | cr = dereference(cr_dict)[doc]
378 |
379 | e_r_next = (e_r_vector[r - 1] * gamma[0] * ((1 - sigma[0]) * (1 - cr) *
380 | alpha[0] + (1 - alpha[0])))
381 | e_r_vector[r] = e_r_next
382 | return e_r_vector
383 |
384 | cdef vector[float] build_X_r_vector(self, json_object *clickstream, string *query):
385 | """
386 | X_r is given by P(C_{\\geq r} \\mid E_r=1). It extends for the probability of
387 | click on any rank starting from current until last one. This vector is also
388 | used in the EM optimization process.
389 |
390 | The probability of click after the very last sku is considered zero. This
391 | allows to build the `X_r` vector recursively.
392 |
393 | The equation is:
394 |
395 | X{_r} = P(C_{\\geq r} \\mid E_r=1) &=
396 | &= \\alpha_{uq} + (1 - \\alpha_{uq})\\gamma X_{r+1}
397 |
398 | Args
399 | ----
400 | clickstream: *json_object
401 | Session clickstream (clicks and purchases)
402 | query: *string
403 | """
404 | cdef:
405 | size_t total_docs = json_object_array_length(clickstream)
406 | unsigned int r
407 | string doc
408 | # Add one to the length because of the zero value added for position
409 | # N + 1 where N is the amount of documents returned in the search page.
410 | vector[float] X_r_vector = vector[float](total_docs + 1)
411 | json_object *tmp
412 | float X_r_1
413 | float X_r
414 | float *alpha
415 | float *beta
416 | float *gamma
417 |
418 | # Probability of clicks at positions greater than the last document in results
419 | # page is zero.
420 | X_r_vector[total_docs] = 0
421 | gamma = self.get_param(b'gamma')
422 |
423 | for r in range(total_docs - 1, -1, -1):
424 | json_object_object_get_ex(
425 | json_object_array_get_idx(clickstream, r),
426 | b'doc',
427 | &tmp
428 | )
429 | doc = json_object_get_string(tmp)
430 | alpha = self.get_param(b'alpha', query, &doc)
431 |
432 | X_r_1 = X_r_vector[r + 1]
433 | X_r = alpha[0] + (1 - alpha[0]) * gamma[0] * X_r_1
434 | X_r_vector[r] = X_r
435 | return X_r_vector
436 |
437 | cdef vector[float] build_e_r_vector_given_CP(self, json_object *clickstream,
438 | unsigned int idx, string *query):
439 | """
440 | Computes the probability that a given document was examined given the array of
441 | previous clicks and purchases.
442 |
443 | Mathematically: P(E_r = 1 | C_{json_object_get_int(tmp)
495 |
496 | json_object_object_get_ex(
497 | json_object_array_get_idx(clickstream, r),
498 | b'purchase',
499 | &tmp
500 | )
501 | purchase = json_object_get_int(tmp)
502 |
503 | alpha = self.get_param(b'alpha', query, &doc)
504 | sigma = self.get_param(b'sigma', query, &doc)
505 |
506 | if purchase:
507 | return e_r_vector_given_CP
508 | elif click:
509 | e_r_vector_given_CP[r + 1 - idx] = (1 - sigma[0]) * gamma[0]
510 | else:
511 | e_r_vector_given_CP[r + 1 - idx] = (
512 | (gamma[0] * (1 - alpha[0]) * e_r_vector_given_CP[r - idx]) /
513 | (1 - alpha[0] * e_r_vector_given_CP[r - idx])
514 | )
515 | return e_r_vector_given_CP
516 |
517 | cdef float compute_cp_p(
518 | self,
519 | json_object *clickstream,
520 | unsigned int idx,
521 | string *query,
522 | vector[float] *e_r_array_given_CP,
523 | unordered_map[string, float] *cr_dict
524 | ):
525 | """
526 | Helper function that computes the probability of observing Clicks and Purchases
527 | at positions greater than r given that position r + 1 was examined.
528 |
529 | Mathematically:
530 |
531 | P(C_{>= r+1}, P_{>= r+1} | E_{r+1})
532 |
533 | Args
534 | ----
535 | session: *json_object
536 | Customer's clickstream.
537 | idx: unsigned int
538 | Index from where to start slicing json session
539 | query: *string
540 | cr_dict: unordered_map[string, float] *cr_dict
541 | Conversion Rate (CR) of documents for current query
542 | e_r_array_given_CP: vector[float]
543 | Probability of document being examined at position r given Clicks and
544 | Purchases observed before r.
545 |
546 | Returns
547 | -------
548 | cp_p: float
549 | Computes the probability of observing Clicks and Purchases at positions
550 | greater than r given that r + 1 was examined.
551 | """
552 | cdef:
553 | size_t total_docs = json_object_array_length(clickstream)
554 | unsigned int r
555 | string doc
556 | float *alpha
557 | bint click
558 | bint purchase
559 | json_object *tmp
560 | float cp_p = 1
561 |
562 | for r in range(idx, total_docs):
563 | json_object_object_get_ex(
564 | json_object_array_get_idx(clickstream, r),
565 | b'doc',
566 | &tmp
567 | )
568 | doc = json_object_get_string(tmp)
569 |
570 | json_object_object_get_ex(
571 | json_object_array_get_idx(clickstream, r),
572 | b'click',
573 | &tmp
574 | )
575 | click = json_object_get_int(tmp)
576 |
577 | json_object_object_get_ex(
578 | json_object_array_get_idx(clickstream, r),
579 | b'purchase',
580 | &tmp
581 | )
582 | purchase = json_object_get_int(tmp)
583 |
584 | alpha = self.get_param(b'alpha', query, &doc)
585 |
586 | # Subtract `idx` from `r` because the input `e_r_array_given_CP`
587 | # should always be counted from the beginning (despite the slicing in
588 | # sessions, this variable should still be counted as if the new session
589 | # is not a slice of any sort).
590 | if purchase:
591 | cp_p *= cr_dict[0][doc] * alpha[0] * e_r_array_given_CP[0][r - idx]
592 | elif click:
593 | cp_p *= (
594 | (1 - cr_dict[0][doc]) * alpha[0] * e_r_array_given_CP[0][r - idx]
595 | )
596 | else:
597 | cp_p *= 1 - alpha[0] * e_r_array_given_CP[0][r - idx]
598 | return cp_p
599 |
600 | cdef vector[float] build_CP_vector_given_e(
601 | self,
602 | json_object *clickstream,
603 | string *query,
604 | unordered_map[string, float] *cr_dict
605 | ):
606 | """
607 | Computes the probability that Clicks and Purchases will be observed at positions
608 | greater than r given that position at r+1 was examined.
609 |
610 | Mathematically:
611 |
612 | P(C_{>r}, P_{>r} | E_{r+1})
613 |
614 | This is equation (25) from blog post:
615 |
616 | https://towardsdatascience.com/how-to-extract-relevance-from-clickstream-data-2a870df219fb
617 |
618 | Args
619 | ----
620 | clickstream: *json_object
621 | User clickstream
622 | query: *string
623 | cr_dict: *unordered_map[string, float]
624 | Conversion Rate (CR) of documents for current query
625 |
626 | Returns
627 | -------
628 | cp_vector_given_e: vector[float]
629 | Probability of observing Clicks and Purchases at positions greater than
630 | r given that position r + 1 was examined.
631 | """
632 | cdef:
633 | unsigned int r
634 | size_t total_docs = json_object_array_length(clickstream)
635 | vector[float] e_r_vector_given_CP
636 | vector[float] cp_vector_given_e = vector[float](total_docs - 1)
637 |
638 | # Subtract 1 as E_{r+1} is defined up to r - 1 documents
639 | for r in range(total_docs - 1):
640 | e_r_vector_given_CP = self.build_e_r_vector_given_CP(clickstream, r + 1,
641 | query)
642 | cp_vector_given_e[r] = self.compute_cp_p(clickstream, r + 1, query,
643 | &e_r_vector_given_CP, cr_dict)
644 | return cp_vector_given_e
645 |
646 | cdef int get_last_r(self, json_object *clickstream, const char *event=b'click'):
647 | """
648 | Loops through all documents in session and find at which position the desired
649 | event happend. It can be either a 'click' or a 'purchase' (still, in DBN, if
650 | a purchase is observed then it automatically means it is the very last r
651 | observed).
652 |
653 | Args
654 | ----
655 | session: *json_object
656 | User clickstream
657 | event: const char*
658 | Name of desired event to track.
659 |
660 | Returns
661 | -------
662 | last_r: int
663 | Index at which the last desired event was observed.
664 | """
665 | cdef:
666 | unsigned int r
667 | size_t total_docs = json_object_array_length(clickstream)
668 | unsigned int idx = 0
669 | json_object *tmp
670 | bint value
671 |
672 | for r in range(total_docs):
673 | json_object_object_get_ex(
674 | json_object_array_get_idx(clickstream, r),
675 | event,
676 | &tmp
677 | )
678 | value = json_object_get_int(tmp)
679 | if value:
680 | idx = r
681 | return idx
682 |
683 | cdef void update_tmp_alpha(
684 | self,
685 | int r,
686 | string *query,
687 | json_object *doc_data,
688 | vector[float] *e_r_vector,
689 | vector[float] *X_r_vector,
690 | int last_r,
691 | unordered_map[string, vector[float]] *tmp_alpha_param
692 | ):
693 | """
694 | Updates the parameter alpha (attractiveness) by running the EM Algorithm.
695 |
696 | The equation for updating alpha is:
697 |
698 | \\alpha_{uq}^{(t+1)} = \\frac{\\sum_{s \\in S_{uq}}\\left(c_r^{(s)} +
699 | \\left(1 - c_r^{(s)}\\right)\\left(1 - c_{>r}^{(s)}\\right) \\cdot
700 | \\frac{\\left(1 - \\epsilon_r^{(t)}\\right)\\alpha_{uq}^{(t)}}{\\left(1 -
701 | \\epsilon_r^{(t)}X_r^{(t)} \\right)} \\right)}{|S_{uq}|}
702 |
703 | Args
704 | ----
705 | r: int
706 | Rank position.
707 | query: string*
708 | doc_data: json_object*
709 | JSON object describing specific document from the search results page
710 | in the clickstream of a specific user.
711 | e_r_vector: vector[float]
712 | Probability of Examination at position r.
713 | X_r_vector: vector[float]
714 | Probability of clicks at position greater than r given that position r
715 | was Examined (E=1).
716 | last_r: int
717 | Last position r where click or purchase is observed.
718 | tmp_alpha_param: unordered_map[string, vector[int]]
719 | Holds temporary data for updating the alpha parameter.
720 | """
721 | cdef:
722 | float *alpha
723 | string doc
724 | bint click
725 | json_object *tmp
726 |
727 | json_object_object_get_ex(doc_data, b'doc', &tmp)
728 | doc = json_object_get_string(tmp)
729 |
730 | json_object_object_get_ex(doc_data, b'click', &tmp)
731 | click = json_object_get_int(tmp)
732 |
733 | # doc not available yet.
734 | if tmp_alpha_param[0].find(doc) == tmp_alpha_param[0].end():
735 | tmp_alpha_param[0][doc] = vector[float](2)
736 | tmp_alpha_param[0][doc][0] = 1
737 | tmp_alpha_param[0][doc][1] = 2
738 |
739 | if click:
740 | tmp_alpha_param[0][doc][0] += 1
741 | elif r > last_r:
742 | alpha = self.get_param(b'alpha', query, &doc)
743 |
744 | tmp_alpha_param[0][doc][0] += (
745 | (1 - e_r_vector[0][r]) * alpha[0] /
746 | (1 - e_r_vector[0][r] * X_r_vector[0][r])
747 | )
748 | tmp_alpha_param[0][doc][1] += 1
749 |
750 | cdef void update_tmp_sigma(
751 | self,
752 | string *query,
753 | int r,
754 | json_object *doc_data,
755 | vector[float] *X_r_vector,
756 | int last_r,
757 | unordered_map[string, vector[float]] *tmp_sigma_param,
758 | ):
759 | """
760 | Updates parameter sigma (satisfaction) by running the EM Algorithm.
761 |
762 | The equation for updating sigma is:
763 |
764 | \\sigma_{uq}^{(t+1)} = \\frac{\\sum_{s \\in S^{[1, 0]}}\\frac{(1 - c_r^{(t)})
765 | (1-p_r^{(t)})\\sigma_{uq}^{(t)}}{(1 - X_{r+1}\\cdot (1-\\sigma_{uq}^{(t)})
766 | \\gamma^{(t)})}}{|S^{[1, 0]}|}
767 |
768 | Args
769 | ----
770 | query: string*
771 | r: int
772 | Rank position.
773 | doc_data: json_object*
774 | Clickstream data at position r.
775 | X_r_vector: vector[float]
776 | Probability of clicks at position greater than r given that position r
777 | was Examined (E=1).
778 | last_r: int
779 | Last position r where click or purchase is observed.
780 | """
781 | cdef:
782 | float *sigma
783 | bint click
784 | json_object *tmp
785 | string doc
786 |
787 | json_object_object_get_ex(doc_data, b'doc', &tmp)
788 | doc = json_object_get_string(tmp)
789 |
790 | json_object_object_get_ex(doc_data, b'click', &tmp)
791 | click = json_object_get_int(tmp)
792 |
793 | json_object_object_get_ex(doc_data, b'purchase', &tmp)
794 | purchase = json_object_get_int(tmp)
795 |
796 | # doc not available yet.
797 | if tmp_sigma_param[0].find(doc) == tmp_sigma_param[0].end():
798 | tmp_sigma_param[0][doc] = vector[float](2)
799 | tmp_sigma_param[0][doc][0] = 1
800 | tmp_sigma_param[0][doc][1] = 2
801 |
802 | # satisfaction is only defined for ranks where click or no purchase were
803 | # observed.
804 | if not click or purchase:
805 | return
806 |
807 | if r == last_r:
808 | sigma = self.get_param(b'sigma', query, &doc)
809 | gamma = self.get_param(b'gamma')
810 |
811 | tmp_sigma_param[0][doc][0] += (
812 | sigma[0] / (1 - (X_r_vector[0][r + 1] * (1 - sigma[0]) * gamma[0]))
813 | )
814 | tmp_sigma_param[0][doc][1] += 1
815 |
816 | cdef void update_tmp_gamma(
817 | self,
818 | int r,
819 | int last_r,
820 | json_object *doc_data,
821 | string *query,
822 | vector[float] *cp_vector_given_e,
823 | vector[float] *e_r_vector_given_CP,
824 | unordered_map[string, float] *cr_dict,
825 | vector[float] *tmp_gamma_param
826 | ):
827 | """
828 | Updates the parameter gamma (persistence) by running the EM Algorithm.
829 |
830 | The equations for this parameter are considerably more complex than for
831 | parameters alpha and sigma. Using the Factor extension method to help out in
832 | the computation.
833 |
834 |
835 | Args
836 | ----
837 | r: int
838 | Rank position.
839 | last_r: int
840 | Last rank where either click or purchase was observed.
841 | doc_data: json_object*
842 | JSON object with clickstream information of document at position r.
843 | query: string*
844 | cp_vector_given_e: vector[float]*
845 | Probability of observing Clicks and Purchases at positions greater than
846 | r given that position r + 1 was examined.
847 | e_r_vector_given_CP: vector[float]*
848 | Probability that document at position r was examined (E_r=1) given clicks
849 | and purchases.
850 | cr_dict: unordered_map[string, float]*
851 | Conversion Rate of documents for respective query.
852 | tmp_gamma_param: vector[float]*
853 | Temporary updates for gamma.
854 | """
855 | cdef:
856 | Factor factor
857 | bint i = 0
858 | bint j = 0
859 | bint k = 0
860 | float ESS_0 = 0
861 | float ESS_1 = 0
862 | float ESS_denominator = 0
863 | float alpha
864 | float sigma
865 | float gamma
866 | json_object *tmp
867 | string doc
868 | bint click
869 | bint purchase
870 | float cr
871 |
872 | json_object_object_get_ex(doc_data, b'doc', &tmp)
873 | doc = json_object_get_string(tmp)
874 |
875 | json_object_object_get_ex(doc_data, b'click', &tmp)
876 | click = json_object_get_int(tmp)
877 |
878 | json_object_object_get_ex(doc_data, b'purchase', &tmp)
879 | purchase = json_object_get_int(tmp)
880 |
881 | alpha = self.get_param(b'alpha', query, &doc)[0]
882 | sigma = self.get_param(b'sigma', query, &doc)[0]
883 | gamma = self.get_param(b'gamma')[0]
884 |
885 | cr = cr_dict[0][doc]
886 |
887 | factor = Factor()
888 | factor.cinit(
889 | r,
890 | last_r,
891 | click,
892 | purchase,
893 | alpha,
894 | sigma,
895 | gamma,
896 | cr,
897 | e_r_vector_given_CP,
898 | cp_vector_given_e
899 | )
900 |
901 | # Loop through all possible values of x, y and z, where each is an integer
902 | # boolean.
903 | for i in range(2):
904 | for j in range(2):
905 | for k in range(2):
906 | ESS_denominator += factor.compute_factor(i, j, k)
907 |
908 | if not ESS_denominator:
909 | ESS_0, ESS_1 = 0, 0
910 | else:
911 | ESS_0 = factor.compute_factor(1, 0, 0) / ESS_denominator
912 | ESS_1 = factor.compute_factor(1, 0, 1) / ESS_denominator
913 |
914 | tmp_gamma_param[0][0] += ESS_1
915 | tmp_gamma_param[0][1] += ESS_0 + ESS_1
916 |
917 | cdef void update_alpha_param(
918 | self,
919 | string *query,
920 | unordered_map[string, vector[float]] *tmp_alpha_param,
921 | ):
922 | """
923 | After all sessions for a given query have been analyzed, the new values of
924 | alpha in `tmp_alpha_param` are copied into `alpha_params` where they'll
925 | be used into new optimization iterations.
926 |
927 | Args
928 | ----
929 | query: string*
930 | tmp_alpha_param: unordered_map[string, vector[float]]
931 | Optimized values for updating alpha
932 | """
933 | cdef:
934 | unordered_map[string, vector[float]].iterator it = (
935 | tmp_alpha_param[0].begin()
936 | )
937 | string doc
938 | vector[float] value
939 |
940 | while(it != tmp_alpha_param[0].end()):
941 | doc = dereference(it).first
942 | value = dereference(it).second
943 | self.alpha_params[query[0]][doc] = value[0] / value[1]
944 | postincrement(it)
945 |
946 | cdef void update_sigma_param(
947 | self,
948 | string *query,
949 | unordered_map[string, vector[float]] *tmp_sigma_param,
950 | ):
951 | """
952 | After all sessions for a given query have been analyzed, the new values of
953 | sigma in `tmp_sigma_param` are copied into `sigma_params` where they'll
954 | be used into new optimization iterations.
955 |
956 | Args
957 | ----
958 | query: string*
959 | tmp_sigma_param: unordered_map[string, vector[float]]
960 | Optimized values for updating sigma
961 | """
962 | cdef:
963 | unordered_map[string, vector[float]].iterator it = (
964 | tmp_sigma_param[0].begin()
965 | )
966 | string doc
967 | vector[float] value
968 |
969 | while(it != tmp_sigma_param[0].end()):
970 | doc = dereference(it).first
971 | value = dereference(it).second
972 | self.sigma_params[query[0]][doc] = value[0] / value[1]
973 | postincrement(it)
974 |
975 | cdef void update_gamma_param(
976 | self,
977 | vector[float] *tmp_gamma_param
978 | ):
979 | """
980 | After all sessions for a given query have been analyzed, the new value of
981 | gamma in `tmp_sigma_param` is copied into `gamma_param` where they'll
982 | be used into new optimization iterations.
983 |
984 | Args
985 | ----
986 | tmp_gamma_param: vector[float]*
987 | Optimized values for updating sigma
988 | """
989 | # Considered that a denominator of zero cannot happen.
990 | self.gamma_param = tmp_gamma_param[0][0] / tmp_gamma_param[0][1]
991 |
992 | cpdef void export_judgments(self, str output, str format_='NEWLINE_JSON'):
993 | """
994 | After running the fit optimization process, exports judgment results to an
995 | external file in accordance to the selected input `format_`. Judgments are
996 | computed as:
997 |
998 | J_{uq} = P(\\alpha_{uq}) \\cdot P(\\sigma_{uq})
999 |
1000 | where `u` represents the document and `q` the query.
1001 |
1002 | Args
1003 | ----
1004 | output: str
1005 | Filepath where to save results. If `gz` is present in `output` then
1006 | compresses file.
1007 | format_: str
1008 | Sets how to write result file. Options includes:
1009 | - NEWLINE_JSON: writes in JSON format, like:
1010 | {'query0': {'doc0': 0.3, 'doc1': 0.2}}
1011 | {'query1': {'doc0': 0.76, 'doc1': 0.41}}
1012 | """
1013 | cdef:
1014 | unordered_map[string, unordered_map[string, float]].iterator it
1015 | unordered_map[string, float].iterator doc_it
1016 | string query
1017 | string doc
1018 | float alpha
1019 | float sigma
1020 | dict tmp
1021 |
1022 | file_manager = gzip.GzipFile if '.gz' in output else open
1023 |
1024 | with file_manager(output, 'wb') as f:
1025 | it = self.alpha_params.begin()
1026 | while(it != self.alpha_params.end()):
1027 | query = dereference(it).first
1028 | tmp = {}
1029 | tmp[query] = {}
1030 | doc_it = self.alpha_params[query].begin()
1031 | while(doc_it != self.alpha_params[query].end()):
1032 | doc = dereference(doc_it).first
1033 | alpha = dereference(doc_it).second
1034 | sigma = self.sigma_params[query][doc]
1035 | tmp[query][doc] = alpha * sigma
1036 | postincrement(doc_it)
1037 | f.write(ujson.dumps(tmp).encode() + '\n'.encode())
1038 | postincrement(it)
1039 |
1040 | cpdef void fit(self, str input_folder, int iters=30):
1041 | """
1042 | Reads through data of queries and customers sessions to find appropriate values
1043 | of `\\alpha_{uq}` (attractiveness), `\\sigma_{uq}` (satisfaction) and `\\gama`
1044 | (persistence) where `u` represents the document and `q` the input query.
1045 |
1046 | Args
1047 | ----
1048 | input_folder: str
1049 | Path where gzipped clickstream files are located. Each file. Here's an
1050 | example of the expected input data on each compressed file:
1051 |
1052 | `{
1053 | "search_keys": {
1054 | "search_term": "query",
1055 | "key0": "value0"
1056 | },
1057 | "judgment_keys": [
1058 | {
1059 | "session": [
1060 | {"click": 0, "purchase": 0, "doc": "document0"}
1061 | ]
1062 | }
1063 | ]
1064 | }`
1065 |
1066 | `search_keys` contains all keys that describe and are associated to the
1067 | search term as inserted by the user. `key0` for instance could mean any
1068 | further description of context such as the region of user, their
1069 | preferences among many possibilities.
1070 | iters: int
1071 | Total iterations the fitting method should run in the optimization
1072 | process. The implemented algorithm is Expectation-Maximization which means
1073 | the more iterations there are the more guaranteed it is values will
1074 | converge.
1075 | """
1076 | cdef:
1077 | list files = glob(os.path.join(input_folder, 'jud*'))
1078 | # row has to be bytes so Cython can interchange its value between char* and
1079 | # bytes
1080 | bytes row
1081 | json_object *row_json
1082 | json_object *search_keys
1083 | json_object *sessions
1084 | json_object *session
1085 | json_object *clickstream
1086 | lh_table *search_keys_tbl
1087 | int c = 0
1088 | unsigned int i = 0
1089 | string query
1090 | unordered_map[string, vector[float]] tmp_alpha_param
1091 | unordered_map[string, vector[float]] tmp_sigma_param
1092 | vector[float] tmp_gamma_param = vector[float](2)
1093 | unordered_map[string, unordered_map[string, float]] cr_dict
1094 |
1095 | for _ in range(iters):
1096 | print('running iteration: ', _)
1097 | for file_ in files:
1098 | for row in gzip.GzipFile(file_, 'rb'):
1099 | # Start by erasing the temporary container of the parameters as
1100 | # each new query requires a new computation in the EM algorithm.
1101 | self.restart_tmp_params(&tmp_alpha_param, &tmp_sigma_param,
1102 | &tmp_gamma_param)
1103 |
1104 | row_json = json_tokener_parse(row)
1105 |
1106 | json_object_object_get_ex(row_json, b'search_keys', &search_keys)
1107 | search_keys_tbl = json_object_get_object(search_keys)
1108 |
1109 | query = self.get_search_context_string(search_keys_tbl)
1110 | json_object_object_get_ex(row_json, b'judgment_keys', &sessions)
1111 | self.compute_cr(&query, sessions, &cr_dict)
1112 |
1113 | for i in range(json_object_array_length(sessions)):
1114 | session = json_object_array_get_idx(sessions, i)
1115 | json_object_object_get_ex(session, b'session', &clickstream)
1116 |
1117 | self.update_tmp_params(clickstream, &tmp_alpha_param,
1118 | &tmp_sigma_param, &tmp_gamma_param,
1119 | &query, &cr_dict[query])
1120 |
1121 | self.update_alpha_param(&query, &tmp_alpha_param)
1122 | self.update_sigma_param(&query, &tmp_sigma_param)
1123 | self.update_gamma_param(&tmp_gamma_param)
1124 | json_object_put(row_json)
1125 |
1126 | cdef void update_tmp_params(
1127 | self,
1128 | json_object *clickstream,
1129 | unordered_map[string, vector[float]] *tmp_alpha_param,
1130 | unordered_map[string, vector[float]] *tmp_sigma_param,
1131 | vector[float] *tmp_gamma_param,
1132 | string *query,
1133 | unordered_map[string, float] *cr_dict
1134 | ):
1135 | """
1136 | For each session, applies the EM algorithm and save temporary results into
1137 | the tmp input parameters.
1138 |
1139 | Args
1140 | ----
1141 | clickstream: json_object*
1142 | JSON containing documents users observed on search results page and their
1143 | interaction with each item. Example:
1144 |
1145 | `[
1146 | {"doc": "doc0", "click": 0, "purchase": 0},
1147 | {"doc": "doc1", "click": 1, "purchase": 1}
1148 | ]`
1149 |
1150 | tmp_alpha_param: vector[float]*
1151 | Holds temporary values for adapting each variable alpha.
1152 | tmp_sigma_param: vector[float]*
1153 | Holds temporary values for adapting each variable sigma.
1154 | tmp_gamma_param: vector[float]*
1155 | Holds temporary values for adapting gamma.
1156 | query: string*
1157 | cr_dict: unordered_map[string, float]*
1158 | Conversion Rates of each document for the current query.
1159 | """
1160 | cdef:
1161 | json_object *doc_data
1162 | vector[float] e_r_vector
1163 | vector[float] X_r_vector
1164 | vector[float] e_r_vector_given_CP
1165 | vector[float] cp_vector_given_e
1166 | unsigned int last_r
1167 | unsigned int r
1168 |
1169 | e_r_vector = self.build_e_r_vector(clickstream, query, cr_dict)
1170 | X_r_vector = self.build_X_r_vector(clickstream, query)
1171 | e_r_vector_given_CP = self.build_e_r_vector_given_CP(clickstream, 0, query)
1172 | cp_vector_given_e = self.build_CP_vector_given_e(clickstream, query, cr_dict)
1173 | # last clicked position
1174 | last_r = self.get_last_r(clickstream)
1175 |
1176 | for r in range(json_object_array_length(clickstream)):
1177 | doc_data = json_object_array_get_idx(clickstream, r)
1178 | self.update_tmp_alpha(r, query, doc_data, &e_r_vector, &X_r_vector, last_r,
1179 | tmp_alpha_param)
1180 | self.update_tmp_sigma(query, r, doc_data, &X_r_vector, last_r,
1181 | tmp_sigma_param)
1182 | self.update_tmp_gamma(r, last_r, doc_data, query, &cp_vector_given_e,
1183 | &e_r_vector_given_CP, cr_dict, tmp_gamma_param)
1184 |
1185 | cdef void restart_tmp_params(
1186 | self,
1187 | unordered_map[string, vector[float]] *tmp_alpha_param,
1188 | unordered_map[string, vector[float]] *tmp_sigma_param,
1189 | vector[float] *tmp_gamma_param
1190 | ):
1191 | """
1192 | Re-creates temporary parameters to be used in the optimization process for each
1193 | query and step.
1194 | """
1195 | tmp_alpha_param[0].erase(
1196 | tmp_alpha_param[0].begin(),
1197 | tmp_alpha_param[0].end()
1198 | )
1199 | tmp_sigma_param[0].erase(
1200 | tmp_sigma_param[0].begin(),
1201 | tmp_sigma_param[0].end()
1202 | )
1203 | tmp_gamma_param[0][0] = 1
1204 | tmp_gamma_param[0][1] = 2
1205 |
--------------------------------------------------------------------------------
/tests/test_cy_DBN.pyx:
--------------------------------------------------------------------------------
1 | import gzip
2 | import tempfile
3 |
4 | import ujson
5 |
6 | from cython.operator cimport dereference, postincrement
7 | from libcpp.string cimport string
8 | from libcpp.unordered_map cimport unordered_map
9 | from libcpp.vector cimport vector
10 |
11 | from pyClickModels.DBN cimport DBNModel, Factor
12 |
13 | from pyClickModels.DBN import DBN
14 |
15 | from pyClickModels.jsonc cimport (json_object, json_object_get_object,
16 | json_object_put, json_tokener_parse,
17 | lh_table)
18 |
19 | from conftest import build_DBN_test_data
20 | from numpy.testing import assert_allclose, assert_almost_equal
21 |
22 | ctypedef unordered_map[string, unordered_map[string, float]] dbn_param
23 |
24 |
25 | cdef string query = b'query'
26 | cdef dbn_param alpha_params
27 | cdef dbn_param sigma_params
28 | cdef float gamma_param
29 |
30 | alpha_params[query][b'doc0'] = 0.5
31 | alpha_params[query][b'doc1'] = 0.5
32 | alpha_params[query][b'doc2'] = 0.5
33 |
34 | sigma_params[query][b'doc0'] = 0.5
35 | sigma_params[query][b'doc1'] = 0.5
36 | sigma_params[query][b'doc2'] = 0.5
37 |
38 | gamma_param = 0.7
39 |
40 |
41 | cdef bint test_fit():
42 | cdef:
43 | DBNModel model = DBN()
44 | unordered_map[string, unordered_map[string, float]].iterator it
45 | string query
46 | dict dquery
47 | string doc
48 |
49 | gamma, params, tmp_folder = build_DBN_test_data(users=30000, docs=6, queries=2)
50 |
51 | # print('expected value of sigma: ', params[0][0][1])
52 |
53 | model.fit(tmp_folder.name, iters=10)
54 | # print('model gamma ', model.gamma_param)
55 | # print('real gamma: ', gamma)
56 |
57 | # it = model.alpha_params.begin()
58 | while(it != model.alpha_params.end()):
59 | # print(dereference(it).first)
60 | query = (dereference(it).first)
61 | dquery = extract_keys(query)
62 |
63 | if dquery == {'search_term': 0, 'region': 'north', 'favorite_size': 'L'}:
64 | # print(
65 | # 'model.alpha_params doc 0', model.alpha_params[
66 | # b'search_term:0|region:north|favorite_size:L'][b'0']
67 | # )
68 | # print('params alpha ', params[0][0][0])
69 |
70 | # print(
71 | # 'model.sigma_params doc 0', model.sigma_params[
72 | # b'search_term:0|region:north|favorite_size:L'][b'0']
73 | # )
74 | # print('params sigma ', params[0][0][1])
75 |
76 | try:
77 | assert_allclose(model.gamma_param, gamma, atol=.1)
78 | assert_allclose(
79 | model.alpha_params[query][b'0'], params[0][0][0], atol=.15
80 | )
81 | assert_allclose(
82 | model.sigma_params[query][b'0'], params[0][0][1], atol=.15
83 | )
84 | except AssertionError:
85 | return False
86 |
87 | postincrement(it)
88 | return True
89 |
90 | cdef dict extract_keys(string result):
91 | return dict(e.split(':') for e in str(bytes(result).decode()).split('|'))
92 |
93 | cdef bint test_get_search_context_string():
94 | cdef:
95 | DBNModel model = DBNModel()
96 | json_object *search_keys = json_tokener_parse(b"{'search_term': 'query'}")
97 | lh_table *tbl = json_object_get_object(search_keys)
98 | string result = model.get_search_context_string(tbl)
99 | dict expected = {'search_term': 'query'}
100 | dict r = extract_keys(result)
101 | if not r == expected:
102 | return False
103 |
104 | search_keys = json_tokener_parse(
105 | b"{'search_term': 'query', 'key0': 'value0', 'key1': 'value1'}"
106 | )
107 |
108 | tbl = json_object_get_object(search_keys)
109 | # result is something like: b'search_term:query|key0:value0|key1:value1'
110 | result = model.get_search_context_string(tbl)
111 | r = extract_keys(result)
112 | expected = {'search_term': 'query', 'key0': 'value0', 'key1': 'value1'}
113 |
114 | if not r == expected:
115 | return False
116 |
117 | json_object_put(search_keys)
118 | return True
119 |
120 |
121 | cdef bint test_compute_cr():
122 | cdef:
123 | DBNModel model = DBNModel()
124 | string query = b'query'
125 | # cr_dict is like: {'query_term': {'doc0': 0.2, 'doc1: 0}}
126 | unordered_map[string, unordered_map[string, float]] cr_dict
127 | unordered_map[string, unordered_map[string, float]] expected
128 | const char *sessions = b"""
129 | [
130 | {
131 | 'session': [
132 | {"doc": "doc0", "click": 0, "purchase": 0},
133 | {"doc": "doc1", "click": 1, "purchase": 0},
134 | {"doc": "doc2", "click": 1, "purchase": 1}
135 | ]
136 | },
137 | {
138 | 'session': [
139 | {"doc": "doc0", "click": 0, "purchase": 0},
140 | {"doc": "doc1", "click": 1, "purchase": 0},
141 | ]
142 | },
143 | ]
144 | """
145 | json_object *jso_sessions = json_tokener_parse(sessions)
146 |
147 | expected[query][b'doc0'] = 0
148 | expected[query][b'doc1'] = 0
149 | expected[query][b'doc2'] = 1
150 |
151 | model.compute_cr(&query, jso_sessions, &cr_dict)
152 |
153 | if not expected == cr_dict:
154 | return False
155 |
156 | # test if query is already available in cr_dict
157 | jso_sessions = json_tokener_parse('')
158 | model.compute_cr(&query, jso_sessions, &cr_dict)
159 | if not expected == cr_dict:
160 | return False
161 |
162 | json_object_put(jso_sessions)
163 | return True
164 |
165 |
166 | cdef bint test_get_param():
167 | cdef:
168 | string query = b'query'
169 | string doc = b'doc0'
170 | DBNModel model = DBNModel()
171 | float result
172 | float result2
173 | float result3
174 |
175 | result = model.get_param(b'alpha', &query, &doc)[0]
176 | if not result > 0 and result < 1:
177 | return False
178 |
179 | model.alpha_params.erase(query)
180 | result2 = model.get_param(b'alpha', &query, &doc)[0]
181 | if not(
182 | result2 > 0 and result2 < 1
183 | or result != result2
184 | ):
185 | return False
186 |
187 | result3 = model.get_param(b'alpha', &query, &doc)[0]
188 | if not result2 == result3:
189 | return False
190 | return True
191 |
192 |
193 | cdef bint test_build_e_r_vector(dbn_param *alpha_params, dbn_param *sigma_params,
194 | float *gamma_param):
195 | cdef:
196 | const char *s = (
197 | b'[{"doc": "doc0", "click": 0, "purchase": 0},'
198 | b'{"doc": "doc1", "click": 1, "purchase": 0},'
199 | b'{"doc": "doc2", "click": 1, "purchase": 1}]'
200 | )
201 | json_object *session = json_tokener_parse(s)
202 | string query = b'query'
203 | vector[float] expected = [1, 0.4375, 0.1914, 0.0837]
204 | vector[float] result
205 | unordered_map[string, float] cr_dict
206 | DBNModel model = DBNModel()
207 |
208 | cr_dict[b'doc0'] = 0.5
209 | cr_dict[b'doc1'] = 0.5
210 | cr_dict[b'doc2'] = 0.5
211 |
212 | model.alpha_params = alpha_params[0]
213 | model.sigma_params = sigma_params[0]
214 | model.gamma_param = gamma_param[0]
215 |
216 | result = model.build_e_r_vector(session, &query, &cr_dict)
217 | try:
218 | assert_almost_equal(result, expected, decimal=4)
219 | except AssertionError:
220 | return False
221 |
222 | json_object_put(session)
223 | return True
224 |
225 |
226 | cdef bint test_build_X_r_vector(dbn_param *alpha_params, dbn_param *sigma_params,
227 | float *gamma_param):
228 | cdef:
229 | const char *s = (
230 | b'[{"doc": "doc0", "click": 0, "purchase": 0},'
231 | b'{"doc": "doc1", "click": 1, "purchase": 0},'
232 | b'{"doc": "doc2", "click": 1, "purchase": 1}]'
233 | )
234 | json_object *session = json_tokener_parse(s)
235 | vector[float] expected = [0.73625, 0.675, 0.5, 0]
236 | vector[float] result
237 | string query = b'query'
238 |
239 | DBNModel model = DBNModel()
240 |
241 | model.alpha_params = alpha_params[0]
242 | model.sigma_params = sigma_params[0]
243 | model.gamma_param = gamma_param[0]
244 |
245 | result = model.build_X_r_vector(session, &query)
246 | try:
247 | assert_almost_equal(result, expected, decimal=4)
248 | except AssertionError:
249 | return False
250 |
251 | json_object_put(session)
252 | return True
253 |
254 |
255 | cdef bint test_build_e_r_vector_given_CP(dbn_param *alpha_params,
256 | dbn_param *sigma_params,
257 | float *gamma_param):
258 | cdef:
259 | char *s = (
260 | b'[{"doc": "doc0", "click": 0, "purchase": 0},'
261 | b'{"doc": "doc0", "click": 1, "purchase": 1},'
262 | b'{"doc": "doc1", "click": 0, "purchase": 0}]'
263 | )
264 | json_object *session = json_tokener_parse(s)
265 | vector[float] expected = [1, 0.7, 0, 0]
266 | vector[float] result
267 | string query = b'query'
268 | DBNModel model = DBNModel()
269 |
270 | model.alpha_params = alpha_params[0]
271 | model.sigma_params = sigma_params[0]
272 | model.gamma_param = gamma_param[0]
273 |
274 | result = model.build_e_r_vector_given_CP(session, 0, &query)
275 |
276 | try:
277 | assert_almost_equal(result, expected, decimal=4)
278 | except AssertionError:
279 | return False
280 |
281 | result = model.build_e_r_vector_given_CP(session, 1, &query)
282 | expected = [1, 0, 0]
283 |
284 | try:
285 | assert_almost_equal(result, expected, decimal=4)
286 | except AssertionError:
287 | return False
288 |
289 | result = model.build_e_r_vector_given_CP(session, 2, &query)
290 | expected = [1, 0.7]
291 |
292 | try:
293 | assert_almost_equal(result, expected, decimal=4)
294 | except AssertionError:
295 | return False
296 |
297 | s = (
298 | b'[{"doc": "doc0", "click": 0, "purchase": 0},'
299 | b'{"doc": "doc0", "click": 1, "purchase": 0},'
300 | b'{"doc": "doc1", "click": 0, "purchase": 0}]'
301 | )
302 | session = json_tokener_parse(s)
303 | expected = [1, 0.7, 0.35, 0.1484]
304 |
305 | result = model.build_e_r_vector_given_CP(session, 0, &query)
306 |
307 | try:
308 | assert_almost_equal(result, expected, decimal=4)
309 | except AssertionError:
310 | return False
311 |
312 | result = model.build_e_r_vector_given_CP(session, 1, &query)
313 | expected = [1, 0.35, 0.148484]
314 |
315 | try:
316 | assert_almost_equal(result, expected, decimal=4)
317 | except AssertionError:
318 | return False
319 |
320 | result = model.build_e_r_vector_given_CP(session, 2, &query)
321 | expected = [1, 0.7]
322 |
323 | try:
324 | assert_almost_equal(result, expected, decimal=4)
325 | except AssertionError:
326 | return False
327 |
328 | json_object_put(session)
329 | return True
330 |
331 |
332 | cdef bint test_build_cp_p(dbn_param *alpha_params):
333 | cdef:
334 | const char *s = (
335 | b'[{"doc": "doc0", "click": 0, "purchase": 0},'
336 | b'{"doc": "doc0", "click": 1, "purchase": 1},'
337 | b'{"doc": "doc1", "click": 1, "purchase": 0}]'
338 | )
339 | json_object *session = json_tokener_parse(s)
340 | float expected = 0.005625
341 | float result
342 | string query = b'query'
343 | vector[float] e_r_vector_given_CP = [1, 0.6, 0.3]
344 | DBNModel model = DBNModel()
345 | unordered_map[string, float] cr_dict
346 |
347 | cr_dict[b'doc0'] = 0.5
348 | cr_dict[b'doc1'] = 0.5
349 | cr_dict[b'doc2'] = 0.5
350 |
351 | model.alpha_params = alpha_params[0]
352 |
353 | result = model.compute_cp_p(session, 0, &query, &e_r_vector_given_CP, &cr_dict)
354 |
355 | try:
356 | assert_almost_equal(result, expected, decimal=4)
357 | except AssertionError:
358 | return False
359 |
360 | expected = 0.0375
361 | result = model.compute_cp_p(session, 1, &query, &e_r_vector_given_CP, &cr_dict)
362 |
363 | try:
364 | assert_almost_equal(result, expected, decimal=4)
365 | except AssertionError:
366 | return False
367 |
368 | json_object_put(session)
369 | return True
370 |
371 |
372 | cdef bint test_build_CP_vector_given_e(dbn_param *alpha_params, dbn_param *sigma_params,
373 | float *gamma_param):
374 | cdef:
375 | char *s = (
376 | b'[{"doc": "doc0", "click": 0, "purchase": 0},'
377 | b'{"doc": "doc0", "click": 1, "purchase": 1}]'
378 | )
379 | json_object *session = json_tokener_parse(s)
380 | DBNModel model = DBNModel()
381 | vector[float] result
382 | vector[float] expected
383 | unordered_map[string, float] cr_dict
384 |
385 | cr_dict[b'doc0'] = 0.5
386 | cr_dict[b'doc1'] = 0.5
387 | cr_dict[b'doc2'] = 0.5
388 |
389 | model.alpha_params = alpha_params[0]
390 | model.sigma_params = sigma_params[0]
391 | model.gamma_param = gamma_param[0]
392 |
393 | result = model.build_CP_vector_given_e(session, &query, &cr_dict)
394 | expected = [0.25]
395 |
396 | try:
397 | assert_almost_equal(result, expected, decimal=4)
398 | except AssertionError:
399 | return False
400 |
401 | s = (
402 | b'[{"doc": "doc0", "click": 0, "purchase": 0},'
403 | b'{"doc": "doc0", "click": 1, "purchase": 0},'
404 | b'{"doc": "doc0", "click": 1, "purchase": 1}]'
405 | )
406 | session = json_tokener_parse(s)
407 |
408 | result = model.build_CP_vector_given_e(session, &query, &cr_dict)
409 | expected = [0.021875, 0.25]
410 |
411 | try:
412 | assert_almost_equal(result, expected, decimal=4)
413 | except AssertionError:
414 | return False
415 |
416 | s = (
417 | b'[{"doc": "doc0", "click": 0, "purchase": 0},'
418 | b'{"doc": "doc0", "click": 1, "purchase": 0},'
419 | b'{"doc": "doc0", "click": 0, "purchase": 0}]'
420 | )
421 | session = json_tokener_parse(s)
422 |
423 | result = model.build_CP_vector_given_e(session, &query, &cr_dict)
424 | expected = [0.2062, 0.5]
425 |
426 | try:
427 | assert_almost_equal(result, expected, decimal=4)
428 | except AssertionError:
429 | return False
430 |
431 | json_object_put(session)
432 | return True
433 |
434 |
435 | cdef bint test_get_last_r():
436 | cdef:
437 | DBNModel model = DBNModel()
438 | char *s = (
439 | b'[{"doc": "doc0", "click": 0, "purchase": 0},'
440 | b'{"doc": "doc0", "click": 1, "purchase": 1},'
441 | b'{"doc": "doc1", "click": 1, "purchase": 0},'
442 | b'{"doc": "doc2", "click": 1, "purchase": 1}]'
443 | )
444 | json_object *session = json_tokener_parse(s)
445 | int result = model.get_last_r(session)
446 | if not result == 3:
447 | return False
448 |
449 | s = (
450 | b'[{"doc": "doc0", "click": 0, "purchase": 0},'
451 | b'{"doc": "doc0", "click": 1, "purchase": 1},'
452 | b'{"doc": "doc1", "click": 1, "purchase": 0},'
453 | b'{"doc": "doc2", "click": 0, "purchase": 1}]'
454 | )
455 | session = json_tokener_parse(s)
456 | result = model.get_last_r(session)
457 | if not result == 2:
458 | return False
459 |
460 | s = (
461 | b'[{"doc": "doc0", "click": 0, "purchase": 0},'
462 | b'{"doc": "doc0", "click": 0, "purchase": 1},'
463 | b'{"doc": "doc1", "click": 0, "purchase": 0},'
464 | b'{"doc": "doc2", "click": 0, "purchase": 1}]'
465 | )
466 | session = json_tokener_parse(s)
467 | result = model.get_last_r(session)
468 | if not result == 0:
469 | return False
470 |
471 | json_object_put(session)
472 | return True
473 |
474 |
475 | cdef bint test_update_tmp_alpha(dbn_param *alpha_params, dbn_param *sigma_params,
476 | float *gamma_param):
477 | cdef:
478 | DBNModel model = DBNModel()
479 | unsigned int r = 0
480 | unsigned int last_r = 1
481 | char *s = b'{"doc": "doc0", "click": 1}'
482 | json_object *doc_data = json_tokener_parse(s)
483 | vector[float] e_r_vector = [0.5]
484 | vector[float] X_r_vector = [0.5]
485 | string query = b'query'
486 | unordered_map[string, vector[float]] tmp_alpha_param
487 | vector[float] expected
488 |
489 | model.alpha_params = alpha_params[0]
490 | model.sigma_params = sigma_params[0]
491 | model.gamma_param = gamma_param[0]
492 |
493 | tmp_alpha_param[b'doc0'] = [0, 0]
494 |
495 | model.update_tmp_alpha(r, &query, doc_data, &e_r_vector, &X_r_vector, last_r,
496 | &tmp_alpha_param)
497 | if not tmp_alpha_param[b'doc0'] == [1, 1]:
498 | return False
499 |
500 | r = 1
501 | last_r = 0
502 | s = b'{"doc": "doc0", "click": 0}'
503 | doc_data = json_tokener_parse(s)
504 | e_r_vector = [0.5, 0.5]
505 | X_r_vector = [0.5, 0.5]
506 | tmp_alpha_param[b'doc0'] = [0, 0]
507 | model.update_tmp_alpha(r, &query, doc_data, &e_r_vector, &X_r_vector, last_r,
508 | &tmp_alpha_param)
509 | expected = [1. / 3, 1]
510 |
511 | try:
512 | assert_almost_equal(tmp_alpha_param[b'doc0'], expected, decimal=4)
513 | except AssertionError:
514 | return False
515 |
516 | r = 1
517 | last_r = 2
518 | s = b'{"doc": "doc0", "click": 0}'
519 | doc_data = json_tokener_parse(s)
520 | e_r_vector = [0.5, 0.5]
521 | X_r_vector = [0.5, 0.5]
522 | tmp_alpha_param[b'doc0'] = [0, 0]
523 | model.update_tmp_alpha(r, &query, doc_data, &e_r_vector, &X_r_vector, last_r,
524 | &tmp_alpha_param)
525 | expected = [0.0, 1]
526 |
527 | try:
528 | assert_almost_equal(tmp_alpha_param[b'doc0'], expected, decimal=4)
529 | except AssertionError:
530 | return False
531 |
532 | json_object_put(doc_data)
533 | return True
534 |
535 |
536 | cdef bint test_update_tmp_sigma(dbn_param *alpha_params, dbn_param *sigma_params,
537 | float *gamma_param):
538 | cdef:
539 | DBNModel model = DBNModel()
540 | unsigned int r = 0
541 | unsigned int last_r = 1
542 | char *s = b'{"doc": "doc0", "click": 0, "purchase": 0}'
543 | json_object *doc_data = json_tokener_parse(s)
544 | vector[float] X_r_vector = [0.5, 0.5, 0.5]
545 | unordered_map[string, vector[float]] tmp_sigma_param
546 | vector[float] expected
547 | string query = b'query'
548 |
549 | model.alpha_params = alpha_params[0]
550 | model.sigma_params = sigma_params[0]
551 | model.gamma_param = gamma_param[0]
552 |
553 | tmp_sigma_param[b'doc0'] = [0, 0]
554 |
555 | model.update_tmp_sigma(&query, r, doc_data, &X_r_vector, last_r, &tmp_sigma_param)
556 |
557 | expected = [0, 0]
558 |
559 | try:
560 | assert_almost_equal(tmp_sigma_param[b'doc0'], expected, decimal=4)
561 | except AssertionError:
562 | return False
563 |
564 | s = b'{"doc": "doc0", "click": 1, "purchase": 0}'
565 | doc_data = json_tokener_parse(s)
566 |
567 | model.update_tmp_sigma(&query, r, doc_data, &X_r_vector, last_r, &tmp_sigma_param)
568 | expected = [0, 1]
569 |
570 | try:
571 | assert_almost_equal(tmp_sigma_param[b'doc0'], expected, decimal=4)
572 | except AssertionError:
573 | return False
574 |
575 | r = 1
576 | tmp_sigma_param[b'doc0'] = [0, 0]
577 | model.update_tmp_sigma(&query, r, doc_data, &X_r_vector, last_r, &tmp_sigma_param)
578 | expected = [0.6060, 1]
579 |
580 | try:
581 | assert_almost_equal(tmp_sigma_param[b'doc0'], expected, decimal=4)
582 | except AssertionError:
583 | return False
584 |
585 | json_object_put(doc_data)
586 | return True
587 |
588 |
589 | cdef bint test_compute_factor_last_click_lower_than_r():
590 | cdef:
591 | float result
592 | int r = 0
593 | int last_r = 0
594 | vector[float] cp_vector_given_e = [0.2]
595 | vector[float] e_r_vector_given_CP = [0.4]
596 | unordered_map[string, float] cr_dict
597 | DBNModel model = DBNModel()
598 | dbn_param alpha_params
599 | dbn_param sigma_params
600 | float gamma
601 | string query = b'query'
602 | bint click = False
603 | bint purchase = True
604 | string doc = b'doc0'
605 | Factor factor
606 |
607 | cr_dict[doc] = 0.1
608 | alpha_params[query][doc] = 0.4
609 | sigma_params[query][doc] = 0.4
610 | gamma = 0.7
611 |
612 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
613 | factor = Factor()
614 | factor.cinit(
615 | r,
616 | last_r,
617 | click,
618 | purchase,
619 | alpha_params[query][doc],
620 | sigma_params[query][doc],
621 | gamma,
622 | cr_dict[doc],
623 | &e_r_vector_given_CP,
624 | &cp_vector_given_e
625 | )
626 | result = factor.compute_factor(0, 0, 0)
627 |
628 | try:
629 | assert_almost_equal(result, 0.6)
630 | except AssertionError:
631 | return False
632 |
633 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
634 | click = True
635 | purchase = False
636 | factor = Factor()
637 | factor.cinit(
638 | r,
639 | last_r,
640 | click,
641 | purchase,
642 | alpha_params[query][doc],
643 | sigma_params[query][doc],
644 | gamma,
645 | cr_dict[doc],
646 | &e_r_vector_given_CP,
647 | &cp_vector_given_e
648 | )
649 | result = factor.compute_factor(0, 0, 0)
650 |
651 | try:
652 | assert_almost_equal(result, 0.0)
653 | except AssertionError:
654 | return False
655 |
656 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
657 | click = True
658 | purchase = True
659 | factor = Factor()
660 | factor.cinit(
661 | r,
662 | last_r,
663 | click,
664 | purchase,
665 | alpha_params[query][doc],
666 | sigma_params[query][doc],
667 | gamma,
668 | cr_dict[doc],
669 | &e_r_vector_given_CP,
670 | &cp_vector_given_e
671 | )
672 | result = factor.compute_factor(0, 0, 0)
673 |
674 | try:
675 | assert_almost_equal(result, 0.0)
676 | except AssertionError:
677 | return False
678 |
679 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
680 | click = True
681 | purchase = False
682 | factor = Factor()
683 | factor.cinit(
684 | r,
685 | last_r,
686 | click,
687 | purchase,
688 | alpha_params[query][doc],
689 | sigma_params[query][doc],
690 | gamma,
691 | cr_dict[doc],
692 | &e_r_vector_given_CP,
693 | &cp_vector_given_e
694 | )
695 | result = factor.compute_factor(0, 0, 1)
696 |
697 | try:
698 | assert_almost_equal(result, 0.0)
699 | except AssertionError:
700 | return False
701 |
702 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
703 | click = True
704 | purchase = False
705 | factor = Factor()
706 | factor.cinit(
707 | r,
708 | last_r,
709 | click,
710 | purchase,
711 | alpha_params[query][doc],
712 | sigma_params[query][doc],
713 | gamma,
714 | cr_dict[doc],
715 | &e_r_vector_given_CP,
716 | &cp_vector_given_e
717 | )
718 | result = factor.compute_factor(0, 0, 1)
719 |
720 | try:
721 | assert_almost_equal(result, 0.0)
722 | except AssertionError:
723 | return False
724 |
725 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
726 | click = True
727 | purchase = True
728 | factor = Factor()
729 | factor.cinit(
730 | r,
731 | last_r,
732 | click,
733 | purchase,
734 | alpha_params[query][doc],
735 | sigma_params[query][doc],
736 | gamma,
737 | cr_dict[doc],
738 | &e_r_vector_given_CP,
739 | &cp_vector_given_e
740 | )
741 | result = factor.compute_factor(0, 0, 1)
742 |
743 | try:
744 | assert_almost_equal(result, 0.0)
745 | except AssertionError:
746 | return False
747 |
748 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
749 | click = False
750 | purchase = False
751 | factor = Factor()
752 | factor.cinit(
753 | r,
754 | last_r,
755 | click,
756 | purchase,
757 | alpha_params[query][doc],
758 | sigma_params[query][doc],
759 | gamma,
760 | cr_dict[doc],
761 | &e_r_vector_given_CP,
762 | &cp_vector_given_e
763 | )
764 | result = factor.compute_factor(0, 1, 0)
765 |
766 | try:
767 | assert_almost_equal(result, 0.0)
768 | except AssertionError:
769 | return False
770 |
771 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
772 | click = True
773 | purchase = False
774 | factor = Factor()
775 | factor.cinit(
776 | r,
777 | last_r,
778 | click,
779 | purchase,
780 | alpha_params[query][doc],
781 | sigma_params[query][doc],
782 | gamma,
783 | cr_dict[doc],
784 | &e_r_vector_given_CP,
785 | &cp_vector_given_e
786 | )
787 | result = factor.compute_factor(0, 1, 0)
788 |
789 | try:
790 | assert_almost_equal(result, 0.0)
791 | except AssertionError:
792 | return False
793 |
794 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
795 | click = True
796 | purchase = True
797 | factor = Factor()
798 | factor.cinit(
799 | r,
800 | last_r,
801 | click,
802 | purchase,
803 | alpha_params[query][doc],
804 | sigma_params[query][doc],
805 | gamma,
806 | cr_dict[doc],
807 | &e_r_vector_given_CP,
808 | &cp_vector_given_e
809 | )
810 | result = factor.compute_factor(0, 1, 0)
811 |
812 | try:
813 | assert_almost_equal(result, 0.0)
814 | except AssertionError:
815 | return False
816 |
817 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
818 | click = False
819 | purchase = False
820 | factor = Factor()
821 | factor.cinit(
822 | r,
823 | last_r,
824 | click,
825 | purchase,
826 | alpha_params[query][doc],
827 | sigma_params[query][doc],
828 | gamma,
829 | cr_dict[doc],
830 | &e_r_vector_given_CP,
831 | &cp_vector_given_e
832 | )
833 | result = factor.compute_factor(0, 1, 1)
834 |
835 | try:
836 | assert_almost_equal(result, 0.0)
837 | except AssertionError:
838 | return False
839 |
840 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
841 | click = True
842 | purchase = False
843 | factor = Factor()
844 | factor.cinit(
845 | r,
846 | last_r,
847 | click,
848 | purchase,
849 | alpha_params[query][doc],
850 | sigma_params[query][doc],
851 | gamma,
852 | cr_dict[doc],
853 | &e_r_vector_given_CP,
854 | &cp_vector_given_e
855 | )
856 | result = factor.compute_factor(0, 1, 1)
857 |
858 | try:
859 | assert_almost_equal(result, 0.0)
860 | except AssertionError:
861 | return False
862 |
863 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
864 | click = True
865 | purchase = True
866 | factor = Factor()
867 | factor.cinit(
868 | r,
869 | last_r,
870 | click,
871 | purchase,
872 | alpha_params[query][doc],
873 | sigma_params[query][doc],
874 | gamma,
875 | cr_dict[doc],
876 | &e_r_vector_given_CP,
877 | &cp_vector_given_e
878 | )
879 | result = factor.compute_factor(0, 1, 1)
880 |
881 | try:
882 | assert_almost_equal(result, 0.0)
883 | except AssertionError:
884 | return False
885 |
886 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
887 | click = True
888 | purchase = False
889 | factor = Factor()
890 | factor.cinit(
891 | r,
892 | last_r,
893 | click,
894 | purchase,
895 | alpha_params[query][doc],
896 | sigma_params[query][doc],
897 | gamma,
898 | cr_dict[doc],
899 | &e_r_vector_given_CP,
900 | &cp_vector_given_e
901 | )
902 | result = factor.compute_factor(1, 0, 0)
903 |
904 | try:
905 | assert_almost_equal(result, 0.02592)
906 | except AssertionError:
907 | return False
908 |
909 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
910 | click = True
911 | purchase = True
912 | factor = Factor()
913 | factor.cinit(
914 | r,
915 | last_r,
916 | click,
917 | purchase,
918 | alpha_params[query][doc],
919 | sigma_params[query][doc],
920 | gamma,
921 | cr_dict[doc],
922 | &e_r_vector_given_CP,
923 | &cp_vector_given_e
924 | )
925 | result = factor.compute_factor(1, 0, 0)
926 |
927 | try:
928 | assert_almost_equal(result, 0.0)
929 | except AssertionError:
930 | return False
931 |
932 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
933 | click = False
934 | purchase = False
935 | factor = Factor()
936 | factor.cinit(
937 | r,
938 | last_r,
939 | click,
940 | purchase,
941 | alpha_params[query][doc],
942 | sigma_params[query][doc],
943 | gamma,
944 | cr_dict[doc],
945 | &e_r_vector_given_CP,
946 | &cp_vector_given_e
947 | )
948 | result = factor.compute_factor(1, 0, 1)
949 |
950 | try:
951 | assert_almost_equal(result, 0.02016)
952 | except AssertionError:
953 | return False
954 |
955 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
956 | click = True
957 | purchase = False
958 | factor = Factor()
959 | factor.cinit(
960 | r,
961 | last_r,
962 | click,
963 | purchase,
964 | alpha_params[query][doc],
965 | sigma_params[query][doc],
966 | gamma,
967 | cr_dict[doc],
968 | &e_r_vector_given_CP,
969 | &cp_vector_given_e
970 | )
971 | result = factor.compute_factor(1, 0, 1)
972 |
973 | try:
974 | assert_almost_equal(result, 0.012096)
975 | except AssertionError:
976 | return False
977 |
978 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
979 | click = True
980 | purchase = True
981 | factor = Factor()
982 | factor.cinit(
983 | r,
984 | last_r,
985 | click,
986 | purchase,
987 | alpha_params[query][doc],
988 | sigma_params[query][doc],
989 | gamma,
990 | cr_dict[doc],
991 | &e_r_vector_given_CP,
992 | &cp_vector_given_e
993 | )
994 | result = factor.compute_factor(1, 0, 1)
995 |
996 | try:
997 | assert_almost_equal(result, 0.0)
998 | except AssertionError:
999 | return False
1000 |
1001 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
1002 | click = False
1003 | purchase = False
1004 | factor = Factor()
1005 | factor.cinit(
1006 | r,
1007 | last_r,
1008 | click,
1009 | purchase,
1010 | alpha_params[query][doc],
1011 | sigma_params[query][doc],
1012 | gamma,
1013 | cr_dict[doc],
1014 | &e_r_vector_given_CP,
1015 | &cp_vector_given_e
1016 | )
1017 | result = factor.compute_factor(1, 1, 0)
1018 |
1019 | try:
1020 | assert_almost_equal(result, 0.0)
1021 | except AssertionError:
1022 | return False
1023 |
1024 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
1025 | click = True
1026 | purchase = False
1027 | factor = Factor()
1028 | factor.cinit(
1029 | r,
1030 | last_r,
1031 | click,
1032 | purchase,
1033 | alpha_params[query][doc],
1034 | sigma_params[query][doc],
1035 | gamma,
1036 | cr_dict[doc],
1037 | &e_r_vector_given_CP,
1038 | &cp_vector_given_e
1039 | )
1040 | result = factor.compute_factor(1, 1, 0)
1041 |
1042 | try:
1043 | assert_almost_equal(result, 0.01728)
1044 | except AssertionError:
1045 | return False
1046 |
1047 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
1048 | click = True
1049 | purchase = True
1050 | factor = Factor()
1051 | factor.cinit(
1052 | r,
1053 | last_r,
1054 | click,
1055 | purchase,
1056 | alpha_params[query][doc],
1057 | sigma_params[query][doc],
1058 | gamma,
1059 | cr_dict[doc],
1060 | &e_r_vector_given_CP,
1061 | &cp_vector_given_e
1062 | )
1063 | result = factor.compute_factor(1, 1, 0)
1064 |
1065 | try:
1066 | assert_almost_equal(result, 0.00192)
1067 | except AssertionError:
1068 | return False
1069 |
1070 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
1071 | click = False
1072 | purchase = False
1073 | factor = Factor()
1074 | factor.cinit(
1075 | r,
1076 | last_r,
1077 | click,
1078 | purchase,
1079 | alpha_params[query][doc],
1080 | sigma_params[query][doc],
1081 | gamma,
1082 | cr_dict[doc],
1083 | &e_r_vector_given_CP,
1084 | &cp_vector_given_e
1085 | )
1086 | result = factor.compute_factor(1, 1, 1)
1087 |
1088 | try:
1089 | assert_almost_equal(result, 0)
1090 | except AssertionError:
1091 | return False
1092 |
1093 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
1094 | click = True
1095 | purchase = False
1096 | factor = Factor()
1097 | factor.cinit(
1098 | r,
1099 | last_r,
1100 | click,
1101 | purchase,
1102 | alpha_params[query][doc],
1103 | sigma_params[query][doc],
1104 | gamma,
1105 | cr_dict[doc],
1106 | &e_r_vector_given_CP,
1107 | &cp_vector_given_e
1108 | )
1109 | result = factor.compute_factor(1, 1, 1)
1110 |
1111 | try:
1112 | assert_almost_equal(result, 0.008064)
1113 | except AssertionError:
1114 | return False
1115 |
1116 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
1117 | click = True
1118 | purchase = True
1119 | factor = Factor()
1120 | factor.cinit(
1121 | r,
1122 | last_r,
1123 | click,
1124 | purchase,
1125 | alpha_params[query][doc],
1126 | sigma_params[query][doc],
1127 | gamma,
1128 | cr_dict[doc],
1129 | &e_r_vector_given_CP,
1130 | &cp_vector_given_e
1131 | )
1132 | result = factor.compute_factor(1, 1, 1)
1133 |
1134 | try:
1135 | assert_almost_equal(result, 0)
1136 | except AssertionError:
1137 | return False
1138 |
1139 | return True
1140 |
1141 | cdef bint test_compute_factor_last_click_higher_than_r():
1142 | cdef:
1143 | float result
1144 | int r = 0
1145 | int last_r = 1
1146 | vector[float] cp_vector_given_e = [0.2]
1147 | vector[float] e_r_vector_given_CP = [0.4]
1148 | unordered_map[string, float] cr_dict
1149 | DBNModel model = DBNModel()
1150 | dbn_param alpha_params
1151 | dbn_param sigma_params
1152 | float gamma
1153 | string query = b'query'
1154 | bint click = False
1155 | bint purchase = True
1156 | string doc = b'doc0'
1157 | Factor factor
1158 |
1159 | cr_dict[doc] = 0.1
1160 | alpha_params[query][doc] = 0.4
1161 | sigma_params[query][doc] = 0.4
1162 | gamma = 0.7
1163 |
1164 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
1165 | click = False
1166 | purchase = False
1167 | factor = Factor()
1168 | factor.cinit(
1169 | r,
1170 | last_r,
1171 | click,
1172 | purchase,
1173 | alpha_params[query][doc],
1174 | sigma_params[query][doc],
1175 | gamma,
1176 | cr_dict[doc],
1177 | &e_r_vector_given_CP,
1178 | &cp_vector_given_e
1179 | )
1180 | result = factor.compute_factor(0, 0, 0)
1181 |
1182 | try:
1183 | assert_almost_equal(result, 0)
1184 | except AssertionError:
1185 | return False
1186 |
1187 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
1188 | click = True
1189 | purchase = False
1190 | factor = Factor()
1191 | factor.cinit(
1192 | r,
1193 | last_r,
1194 | click,
1195 | purchase,
1196 | alpha_params[query][doc],
1197 | sigma_params[query][doc],
1198 | gamma,
1199 | cr_dict[doc],
1200 | &e_r_vector_given_CP,
1201 | &cp_vector_given_e
1202 | )
1203 | result = factor.compute_factor(0, 0, 0)
1204 |
1205 | try:
1206 | assert_almost_equal(result, 0)
1207 | except AssertionError:
1208 | return False
1209 |
1210 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
1211 | click = True
1212 | purchase = True
1213 | factor = Factor()
1214 | factor.cinit(
1215 | r,
1216 | last_r,
1217 | click,
1218 | purchase,
1219 | alpha_params[query][doc],
1220 | sigma_params[query][doc],
1221 | gamma,
1222 | cr_dict[doc],
1223 | &e_r_vector_given_CP,
1224 | &cp_vector_given_e
1225 | )
1226 | result = factor.compute_factor(0, 0, 0)
1227 |
1228 | try:
1229 | assert_almost_equal(result, 0)
1230 | except AssertionError:
1231 | return False
1232 |
1233 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
1234 | click = False
1235 | purchase = False
1236 | factor = Factor()
1237 | factor.cinit(
1238 | r,
1239 | last_r,
1240 | click,
1241 | purchase,
1242 | alpha_params[query][doc],
1243 | sigma_params[query][doc],
1244 | gamma,
1245 | cr_dict[doc],
1246 | &e_r_vector_given_CP,
1247 | &cp_vector_given_e
1248 | )
1249 | result = factor.compute_factor(0, 0, 1)
1250 |
1251 | try:
1252 | assert_almost_equal(result, 0)
1253 | except AssertionError:
1254 | return False
1255 |
1256 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
1257 | click = True
1258 | purchase = False
1259 | factor = Factor()
1260 | factor.cinit(
1261 | r,
1262 | last_r,
1263 | click,
1264 | purchase,
1265 | alpha_params[query][doc],
1266 | sigma_params[query][doc],
1267 | gamma,
1268 | cr_dict[doc],
1269 | &e_r_vector_given_CP,
1270 | &cp_vector_given_e
1271 | )
1272 | result = factor.compute_factor(0, 0, 1)
1273 |
1274 | try:
1275 | assert_almost_equal(result, 0)
1276 | except AssertionError:
1277 | return False
1278 |
1279 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
1280 | click = True
1281 | purchase = True
1282 | factor = Factor()
1283 | factor.cinit(
1284 | r,
1285 | last_r,
1286 | click,
1287 | purchase,
1288 | alpha_params[query][doc],
1289 | sigma_params[query][doc],
1290 | gamma,
1291 | cr_dict[doc],
1292 | &e_r_vector_given_CP,
1293 | &cp_vector_given_e
1294 | )
1295 | result = factor.compute_factor(0, 0, 1)
1296 |
1297 | try:
1298 | assert_almost_equal(result, 0)
1299 | except AssertionError:
1300 | return False
1301 |
1302 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
1303 | click = False
1304 | purchase = False
1305 | factor = Factor()
1306 | factor.cinit(
1307 | r,
1308 | last_r,
1309 | click,
1310 | purchase,
1311 | alpha_params[query][doc],
1312 | sigma_params[query][doc],
1313 | gamma,
1314 | cr_dict[doc],
1315 | &e_r_vector_given_CP,
1316 | &cp_vector_given_e
1317 | )
1318 | result = factor.compute_factor(0, 1, 0)
1319 |
1320 | try:
1321 | assert_almost_equal(result, 0)
1322 | except AssertionError:
1323 | return False
1324 |
1325 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
1326 | click = False
1327 | purchase = False
1328 | factor = Factor()
1329 | factor.cinit(
1330 | r,
1331 | last_r,
1332 | click,
1333 | purchase,
1334 | alpha_params[query][doc],
1335 | sigma_params[query][doc],
1336 | gamma,
1337 | cr_dict[doc],
1338 | &e_r_vector_given_CP,
1339 | &cp_vector_given_e
1340 | )
1341 | result = factor.compute_factor(0, 1, 0)
1342 |
1343 | try:
1344 | assert_almost_equal(result, 0)
1345 | except AssertionError:
1346 | return False
1347 |
1348 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
1349 | click = True
1350 | purchase = True
1351 | factor = Factor()
1352 | factor.cinit(
1353 | r,
1354 | last_r,
1355 | click,
1356 | purchase,
1357 | alpha_params[query][doc],
1358 | sigma_params[query][doc],
1359 | gamma,
1360 | cr_dict[doc],
1361 | &e_r_vector_given_CP,
1362 | &cp_vector_given_e
1363 | )
1364 | result = factor.compute_factor(0, 1, 0)
1365 |
1366 | try:
1367 | assert_almost_equal(result, 0)
1368 | except AssertionError:
1369 | return False
1370 |
1371 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
1372 | click = False
1373 | purchase = False
1374 | factor = Factor()
1375 | factor.cinit(
1376 | r,
1377 | last_r,
1378 | click,
1379 | purchase,
1380 | alpha_params[query][doc],
1381 | sigma_params[query][doc],
1382 | gamma,
1383 | cr_dict[doc],
1384 | &e_r_vector_given_CP,
1385 | &cp_vector_given_e
1386 | )
1387 | result = factor.compute_factor(0, 1, 1)
1388 |
1389 | try:
1390 | assert_almost_equal(result, 0)
1391 | except AssertionError:
1392 | return False
1393 |
1394 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
1395 | click = True
1396 | purchase = False
1397 | factor = Factor()
1398 | factor.cinit(
1399 | r,
1400 | last_r,
1401 | click,
1402 | purchase,
1403 | alpha_params[query][doc],
1404 | sigma_params[query][doc],
1405 | gamma,
1406 | cr_dict[doc],
1407 | &e_r_vector_given_CP,
1408 | &cp_vector_given_e
1409 | )
1410 | result = factor.compute_factor(0, 1, 1)
1411 |
1412 | try:
1413 | assert_almost_equal(result, 0)
1414 | except AssertionError:
1415 | return False
1416 |
1417 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
1418 | click = True
1419 | purchase = True
1420 | factor = Factor()
1421 | factor.cinit(
1422 | r,
1423 | last_r,
1424 | click,
1425 | purchase,
1426 | alpha_params[query][doc],
1427 | sigma_params[query][doc],
1428 | gamma,
1429 | cr_dict[doc],
1430 | &e_r_vector_given_CP,
1431 | &cp_vector_given_e
1432 | )
1433 | result = factor.compute_factor(0, 1, 1)
1434 |
1435 | try:
1436 | assert_almost_equal(result, 0)
1437 | except AssertionError:
1438 | return False
1439 |
1440 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
1441 | click = False
1442 | purchase = False
1443 | factor = Factor()
1444 | factor.cinit(
1445 | r,
1446 | last_r,
1447 | click,
1448 | purchase,
1449 | alpha_params[query][doc],
1450 | sigma_params[query][doc],
1451 | gamma,
1452 | cr_dict[doc],
1453 | &e_r_vector_given_CP,
1454 | &cp_vector_given_e
1455 | )
1456 | result = factor.compute_factor(1, 0, 0)
1457 |
1458 | try:
1459 | assert_almost_equal(result, 0)
1460 | except AssertionError:
1461 | return False
1462 |
1463 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
1464 | click = True
1465 | purchase = False
1466 | factor = Factor()
1467 | factor.cinit(
1468 | r,
1469 | last_r,
1470 | click,
1471 | purchase,
1472 | alpha_params[query][doc],
1473 | sigma_params[query][doc],
1474 | gamma,
1475 | cr_dict[doc],
1476 | &e_r_vector_given_CP,
1477 | &cp_vector_given_e
1478 | )
1479 | result = factor.compute_factor(1, 0, 0)
1480 |
1481 | try:
1482 | assert_almost_equal(result, 0)
1483 | except AssertionError:
1484 | return False
1485 |
1486 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
1487 | click = True
1488 | purchase = True
1489 | factor = Factor()
1490 | factor.cinit(
1491 | r,
1492 | last_r,
1493 | click,
1494 | purchase,
1495 | alpha_params[query][doc],
1496 | sigma_params[query][doc],
1497 | gamma,
1498 | cr_dict[doc],
1499 | &e_r_vector_given_CP,
1500 | &cp_vector_given_e
1501 | )
1502 | result = factor.compute_factor(1, 0, 0)
1503 |
1504 | try:
1505 | assert_almost_equal(result, 0)
1506 | except AssertionError:
1507 | return False
1508 |
1509 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
1510 | click = False
1511 | purchase = False
1512 | factor = Factor()
1513 | factor.cinit(
1514 | r,
1515 | last_r,
1516 | click,
1517 | purchase,
1518 | alpha_params[query][doc],
1519 | sigma_params[query][doc],
1520 | gamma,
1521 | cr_dict[doc],
1522 | &e_r_vector_given_CP,
1523 | &cp_vector_given_e
1524 | )
1525 | result = factor.compute_factor(1, 0, 1)
1526 |
1527 | try:
1528 | assert_almost_equal(result, 0.02016)
1529 | except AssertionError:
1530 | return False
1531 |
1532 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
1533 | click = True
1534 | purchase = False
1535 | factor = Factor()
1536 | factor.cinit(
1537 | r,
1538 | last_r,
1539 | click,
1540 | purchase,
1541 | alpha_params[query][doc],
1542 | sigma_params[query][doc],
1543 | gamma,
1544 | cr_dict[doc],
1545 | &e_r_vector_given_CP,
1546 | &cp_vector_given_e
1547 | )
1548 | result = factor.compute_factor(1, 0, 1)
1549 |
1550 | try:
1551 | assert_almost_equal(result, 0.012096)
1552 | except AssertionError:
1553 | return False
1554 |
1555 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
1556 | click = True
1557 | purchase = True
1558 | factor = Factor()
1559 | factor.cinit(
1560 | r,
1561 | last_r,
1562 | click,
1563 | purchase,
1564 | alpha_params[query][doc],
1565 | sigma_params[query][doc],
1566 | gamma,
1567 | cr_dict[doc],
1568 | &e_r_vector_given_CP,
1569 | &cp_vector_given_e
1570 | )
1571 | result = factor.compute_factor(1, 0, 1)
1572 |
1573 | try:
1574 | assert_almost_equal(result, 0)
1575 | except AssertionError:
1576 | return False
1577 |
1578 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
1579 | click = False
1580 | purchase = False
1581 | factor = Factor()
1582 | factor.cinit(
1583 | r,
1584 | last_r,
1585 | click,
1586 | purchase,
1587 | alpha_params[query][doc],
1588 | sigma_params[query][doc],
1589 | gamma,
1590 | cr_dict[doc],
1591 | &e_r_vector_given_CP,
1592 | &cp_vector_given_e
1593 | )
1594 | result = factor.compute_factor(1, 1, 0)
1595 |
1596 | try:
1597 | assert_almost_equal(result, 0)
1598 | except AssertionError:
1599 | return False
1600 |
1601 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
1602 | click = True
1603 | purchase = False
1604 | factor = Factor()
1605 | factor.cinit(
1606 | r,
1607 | last_r,
1608 | click,
1609 | purchase,
1610 | alpha_params[query][doc],
1611 | sigma_params[query][doc],
1612 | gamma,
1613 | cr_dict[doc],
1614 | &e_r_vector_given_CP,
1615 | &cp_vector_given_e
1616 | )
1617 | result = factor.compute_factor(1, 1, 0)
1618 |
1619 | try:
1620 | assert_almost_equal(result, 0)
1621 | except AssertionError:
1622 | return False
1623 |
1624 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
1625 | click = True
1626 | purchase = True
1627 | factor = Factor()
1628 | factor.cinit(
1629 | r,
1630 | last_r,
1631 | click,
1632 | purchase,
1633 | alpha_params[query][doc],
1634 | sigma_params[query][doc],
1635 | gamma,
1636 | cr_dict[doc],
1637 | &e_r_vector_given_CP,
1638 | &cp_vector_given_e
1639 | )
1640 | result = factor.compute_factor(1, 1, 0)
1641 |
1642 | try:
1643 | assert_almost_equal(result, 0)
1644 | except AssertionError:
1645 | return False
1646 |
1647 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0}
1648 | click = False
1649 | purchase = False
1650 | factor = Factor()
1651 | factor.cinit(
1652 | r,
1653 | last_r,
1654 | click,
1655 | purchase,
1656 | alpha_params[query][doc],
1657 | sigma_params[query][doc],
1658 | gamma,
1659 | cr_dict[doc],
1660 | &e_r_vector_given_CP,
1661 | &cp_vector_given_e
1662 | )
1663 | result = factor.compute_factor(1, 1, 1)
1664 |
1665 | try:
1666 | assert_almost_equal(result, 0)
1667 | except AssertionError:
1668 | return False
1669 |
1670 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0}
1671 | click = True
1672 | purchase = False
1673 | factor = Factor()
1674 | factor.cinit(
1675 | r,
1676 | last_r,
1677 | click,
1678 | purchase,
1679 | alpha_params[query][doc],
1680 | sigma_params[query][doc],
1681 | gamma,
1682 | cr_dict[doc],
1683 | &e_r_vector_given_CP,
1684 | &cp_vector_given_e
1685 | )
1686 | result = factor.compute_factor(1, 1, 1)
1687 |
1688 | try:
1689 | assert_almost_equal(result, 0.008064)
1690 | except AssertionError:
1691 | return False
1692 |
1693 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1}
1694 | click = True
1695 | purchase = True
1696 | factor = Factor()
1697 | factor.cinit(
1698 | r,
1699 | last_r,
1700 | click,
1701 | purchase,
1702 | alpha_params[query][doc],
1703 | sigma_params[query][doc],
1704 | gamma,
1705 | cr_dict[doc],
1706 | &e_r_vector_given_CP,
1707 | &cp_vector_given_e
1708 | )
1709 | result = factor.compute_factor(1, 1, 1)
1710 |
1711 | try:
1712 | assert_almost_equal(result, 0)
1713 | except AssertionError:
1714 | return False
1715 |
1716 | return True
1717 |
1718 |
1719 | cdef bint test_update_tmp_gamma():
1720 | cdef:
1721 | DBNModel model = DBNModel()
1722 | int r = 0
1723 | int last_r = 0
1724 | char *s = b'{"doc": "doc0", "click": 1, "purchase": 0}'
1725 | json_object *doc_data = json_tokener_parse(s)
1726 | vector[float] cp_vector_given_e = [0.2]
1727 | vector[float] e_r_vector_given_CP = [0.4]
1728 | unordered_map[string, float] cr_dict
1729 | vector[float] tmp_gamma_param
1730 | unordered_map[string, vector[float]] tmp_alpha_param
1731 | dbn_param alpha_params
1732 | dbn_param sigma_params
1733 | string query = b'query'
1734 | float ESS_den = 0
1735 | float ESS_0
1736 | float ESS_1
1737 | int i
1738 | int j
1739 | int k
1740 | bint click = True
1741 | bint purchase = False
1742 | float alpha = 0.4
1743 | float sigma = 0.4
1744 | float gamma = 0.7
1745 | float cr = 0.1
1746 |
1747 | alpha_params[query][b'doc0'] = 0.4
1748 | sigma_params[query][b'doc0'] = 0.4
1749 | gamma = 0.7
1750 |
1751 | model.alpha_params = alpha_params
1752 | model.sigma_params = sigma_params
1753 | model.gamma_param = gamma
1754 |
1755 | cr_dict[b'doc0'] = 0.1
1756 | tmp_gamma_param = [0, 0]
1757 |
1758 | factor = Factor()
1759 | factor.cinit(
1760 | r,
1761 | last_r,
1762 | click,
1763 | purchase,
1764 | alpha,
1765 | sigma,
1766 | gamma,
1767 | cr,
1768 | &e_r_vector_given_CP,
1769 | &cp_vector_given_e
1770 | )
1771 |
1772 | ESS_den = 0
1773 | for i in range(2):
1774 | for j in range(2):
1775 | for k in range(2):
1776 | ESS_den += factor.compute_factor(i, j, k)
1777 |
1778 | ESS_0 = 0.02592 / ESS_den
1779 | ESS_1 = 0.012096 / ESS_den
1780 |
1781 | model.update_tmp_gamma(r, last_r, doc_data, &query, &cp_vector_given_e,
1782 | &e_r_vector_given_CP, &cr_dict, &tmp_gamma_param)
1783 |
1784 | try:
1785 | assert_almost_equal(tmp_gamma_param[0], ESS_1)
1786 | assert_almost_equal(tmp_gamma_param[1], ESS_1 + ESS_0)
1787 | except AssertionError:
1788 | return False
1789 |
1790 | json_object_put(doc_data)
1791 | return True
1792 |
1793 |
1794 | cdef bint test_update_alpha_params():
1795 | cdef:
1796 | DBNModel model = DBNModel()
1797 | unordered_map[string, vector[float]] tmp_alpha_param
1798 | string doc = b'doc0'
1799 | string query = b'query'
1800 |
1801 | tmp_alpha_param[doc] = [1, 2]
1802 | model.update_alpha_param(&query, &tmp_alpha_param)
1803 | if not model.alpha_params[query][doc] == 0.5:
1804 | return False
1805 |
1806 | return True
1807 |
1808 |
1809 | cdef bint test_update_sigma_params():
1810 | cdef:
1811 | DBNModel model = DBNModel()
1812 | unordered_map[string, vector[float]] tmp_sigma_param
1813 | string doc = b'doc0'
1814 | string query = b'query'
1815 |
1816 | tmp_sigma_param[doc] = [1, 2]
1817 | model.update_sigma_param(&query, &tmp_sigma_param)
1818 | if not model.sigma_params[query][doc] == 0.5:
1819 | return False
1820 |
1821 | return True
1822 |
1823 |
1824 | cdef bint test_update_gamma_param():
1825 | cdef:
1826 | DBNModel model = DBNModel()
1827 | vector[float] tmp_gamma_param
1828 |
1829 | tmp_gamma_param = [1, 2]
1830 | model.update_gamma_param(&tmp_gamma_param)
1831 | if not model.gamma_param == 0.5:
1832 | return False
1833 |
1834 | return True
1835 |
1836 | cdef bint test_export_judgments():
1837 | cdef:
1838 | DBNModel model = DBNModel()
1839 | dbn_param alpha_params
1840 | dbn_param sigma_params
1841 |
1842 | alpha_params[b'query0'][b'doc0'] = 0.3
1843 | alpha_params[b'query0'][b'doc1'] = 0.4
1844 | alpha_params[b'query0'][b'doc2'] = 0.5
1845 | alpha_params[b'query1'][b'doc0'] = 0.6
1846 |
1847 | sigma_params[b'query0'][b'doc0'] = 0.3
1848 | sigma_params[b'query0'][b'doc1'] = 0.4
1849 | sigma_params[b'query0'][b'doc2'] = 0.5
1850 | sigma_params[b'query1'][b'doc0'] = 0.6
1851 |
1852 | model.alpha_params = alpha_params
1853 | model.sigma_params = sigma_params
1854 |
1855 | tmp_file = tempfile.NamedTemporaryFile()
1856 | model.export_judgments(tmp_file.name)
1857 | flag = False
1858 | for row in open(tmp_file.name):
1859 | result = ujson.loads(row)
1860 | try:
1861 | if 'query1' in result:
1862 | assert_almost_equal(result['query1']['doc0'], 0.36)
1863 | flag = True
1864 | else:
1865 | assert_almost_equal(result['query0']['doc0'], 0.09)
1866 | assert_almost_equal(result['query0']['doc1'], 0.16)
1867 | assert_almost_equal(result['query0']['doc2'], 0.25)
1868 | except AssertionError:
1869 | return False
1870 | if not flag:
1871 | return False
1872 |
1873 | tmp_file = tempfile.NamedTemporaryFile()
1874 | filename = tmp_file.name + '.gz'
1875 | model.export_judgments(filename)
1876 | flag = False
1877 | for row in gzip.GzipFile(filename, 'rb'):
1878 | result = ujson.loads(row)
1879 | try:
1880 | if 'query1' in result:
1881 | assert_almost_equal(result['query1']['doc0'], 0.36)
1882 | flag = True
1883 | else:
1884 | assert_almost_equal(result['query0']['doc0'], 0.09)
1885 | assert_almost_equal(result['query0']['doc1'], 0.16)
1886 | assert_almost_equal(result['query0']['doc2'], 0.25)
1887 | except AssertionError:
1888 | return False
1889 | if not flag:
1890 | return False
1891 |
1892 | return True
1893 |
1894 |
1895 | cdef bint test_not_null_converence():
1896 | cdef:
1897 | DBNModel model = DBN()
1898 |
1899 | model.fit('tests/fixtures/null_test', iters=10)
1900 | return True
1901 |
1902 |
1903 | cdef bint test_long_list_null_converence():
1904 | cdef:
1905 | DBNModel model = DBN()
1906 |
1907 | model.fit('tests/fixtures/eighty_skus', iters=10)
1908 | return True
1909 |
1910 |
1911 | cdef bint test_all_clicks_set():
1912 | cdef:
1913 | DBNModel model = DBN()
1914 |
1915 | model.fit('tests/fixtures/all_clicks_set', iters=10)
1916 | return True
1917 |
1918 |
1919 | cpdef run_tests():
1920 | assert test_get_search_context_string()
1921 | assert test_compute_cr()
1922 | assert test_get_param()
1923 | assert test_build_e_r_vector(&alpha_params, &sigma_params, &gamma_param)
1924 | assert test_build_X_r_vector(&alpha_params, &sigma_params, &gamma_param)
1925 | assert test_build_e_r_vector_given_CP(&alpha_params, &sigma_params, &gamma_param)
1926 | assert test_build_cp_p(&alpha_params)
1927 | assert test_build_CP_vector_given_e(&alpha_params, &sigma_params, &gamma_param)
1928 | assert test_get_last_r()
1929 | assert test_update_tmp_alpha(&alpha_params, &sigma_params, &gamma_param)
1930 | assert test_update_tmp_sigma(&alpha_params, &sigma_params, &gamma_param)
1931 | assert test_compute_factor_last_click_lower_than_r()
1932 | assert test_compute_factor_last_click_higher_than_r()
1933 | assert test_update_tmp_gamma()
1934 | assert test_update_alpha_params()
1935 | assert test_update_sigma_params()
1936 | assert test_update_gamma_param()
1937 | assert test_fit()
1938 | assert test_export_judgments()
1939 | assert test_not_null_converence()
1940 | assert test_long_list_null_converence()
1941 | assert test_all_clicks_set()
1942 |
1943 |
1944 | if __name__ == '__main__':
1945 | #assert test_get_search_context_string()
1946 | #assert test_compute_cr()
1947 | #assert test_get_param()
1948 | #assert test_build_e_r_vector(&alpha_params, &sigma_params, &gamma_param)
1949 | #assert test_build_X_r_vector(&alpha_params, &sigma_params, &gamma_param)
1950 | #assert test_build_e_r_vector_given_CP(&alpha_params, &sigma_params, &gamma_param)
1951 | #assert test_build_cp_p(&alpha_params)
1952 | #assert test_build_CP_vector_given_e(&alpha_params, &sigma_params, &gamma_param)
1953 | #assert test_get_last_r()
1954 | #assert test_update_tmp_alpha(&alpha_params, &sigma_params, &gamma_param)
1955 | #assert test_update_tmp_sigma(&alpha_params, &sigma_params, &gamma_param)
1956 | #assert test_compute_factor_last_click_lower_than_r()
1957 | #assert test_compute_factor_last_click_higher_than_r()
1958 | #assert test_update_tmp_gamma()
1959 | #assert test_update_alpha_params()
1960 | #assert test_update_sigma_params()
1961 | #assert test_update_gamma_param()
1962 | #assert test_fit()
1963 | #assert test_export_judgments()
1964 | #assert test_not_null_converence()
1965 | pass
1966 |
--------------------------------------------------------------------------------
/notebooks/DBN.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# ClickModels: DBN"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "ClickModels is a field of study in Machine Learning that uses Probabilistic Graphical Models to model the interactions between users and a set of ranked items.\n",
15 | "\n",
16 | "One of the main uses of ClickModels is to train models from past observed data to evaluate how good each document probably is for each query, also known in literature as judgments' values.\n",
17 | "\n",
18 | "In order to compute the judgments for each document for each query, we rely on the [work](https://pdfs.semanticscholar.org/0b19/b37da5e438e6355418c726469f6a00473dc3.pdf) developed by Aleksandr et. al. where users interactions with each query result is modeled through a Dynamic Bayesian Network as depicted below\n",
19 | "\n",
20 | "\n",
21 | "
\n",
22 | "
"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "$E_r$ is a random variable that tracks whether a given document $u$ was examined at rank $r$ by the customer or not (this would be equivallent to the impression event from GA's dataset).\n",
30 | "\n",
31 | "$A_r$ is an indicator as to whether the customer found that given document attractive or not. When a sku is examined and it's attractive, then we have a Click event,represented by the observed variable $C_r$. \n",
32 | "\n",
33 | "Another observed variable is $P_r$ which represents the purchasing event. $P_r$ and $C_r$ both directly influence $S_ur$ which indicates whether customer is satisfied already or not.\n",
34 | "\n",
35 | "Case not, then it's considered that customers can continue examining through the result dataset with a $\\gamma$ probability factor."
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "Creating the DNB above is done through the following code:"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 4,
48 | "metadata": {},
49 | "outputs": [
50 | {
51 | "data": {
52 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAuQAAAEHCAYAAADmnFPUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzdd5hV1dXH8e/vDChNQDSCGhtib4i9l1jQxKhvIBrfmGAjUUyiKWos0VdM1NgVUWIBNdFExZgYDZZYoqgoZWwhSNFYUYygwAAD96z3j30GxnEaM+fec+5lfZ6HR2bm3nvW4Mze6+6z9toyM5xzzjnnnHPZiLIOwDnnnHPOuVWZJ+TOOeecc85lyBNy55xzzjnnMuQJuXPOOeeccxnyhNw555xzzrkMeULunHPOOedchjwhd84555xzLkOekDvnnHPOOZchT8idc84555zLkCfkzjnnnHPOZcgTcuecc8455zLkCblzzjnnnHMZ8oTcOeecc865DHlC7pxzzjnnXIY8IXfOOeeccy5DnpA755xzzjmXoQ5pvZAkS+u1nHPOgZkpzdfzcdo559KXxljtK+TOOeecc85lKLUV8jppr+g459yqptgr2T5OO+dc+6U5VvsKuXPOOeeccxlKfYXcOVc6ktYDdgK2AroCnYCOwGJgEfApMAWoNrOarOJ0zrlVlSQBmxDG6n5AZ8JYHRHG6sXAh8Bk4A0zq80oVJchmaWz2l63bO+3Qp0rDklVwNeAvQgD+06E5HsS8DownzCwLwNWJwz66wADgK2BWcljJwKPmtmbJf4WXCsVazz1cdq54pPUGTgc2JUwTg8Aaghj77Tk74sAIyTmnYANksduArxBGKsnAX8zsw9L/C24VkpzTPWE3Lmck7QOcBLwQ+Aj4HFWDNbvWCt+iSWtDmxLGPB3Bo4EXgVuJAz4y4oTvWsLT8idKz+SNiWM00OAauCfJGO1mX3UytfoCvQnjNW7ERL7x4CRwD9bM9670vGE3LkKl9zi3BM4jTAgjwVuMrNJKb3+6sC3ktffCPgdcIuZzU7j9V37eELuXHlI7lweBgwjLHaMBkaZ2cyUXr8HcDxhrDZCYn6XmX2exuu79vGE3LkKJqkPcBOwHTACuMPM5hbxejsApwKDgIuBEWYWF+t6rmWekDuXf5K2IiTgVcANwH1mtqhI1xKwHyEx3wcYZmYPFONarvU8IXeuAiUD7neAa4BbgYvNbEkJr785YXIpACea2YxSXdt9kSfkzuVXsir+M+AXwK8IK+IlW8SQtBdhrJ4E/MjMPinVtd0XpTmmettD53IgWRV/ADgX+LqZnVfKZBwg2eS5L/Bn4EVJP5bkY4RzziWSVfHxwKHALmZ2U6nvKJrZeEKd+YfAq5KOLuX1XXH4ZOtcxpLVjmrgX8BOZjYxq1jMrGBm1xDq148B/i6pW1bxOOdcXkj6DvAscAdwsJm9nVUsZlZjZj8Fvg1cLunmZOXelSkvWXEuQ5IOBe4CjjezR7OOp75kcL+ZUMt+uJl9mnFIqwwvWXEuXyT9EDgfGGhmr2cdT33JosmDhHMnvut9zEvHS1acqwCSvgHcCRyVt2Qcwmo5MJSwIvS0pF4Zh+SccyUn6QxCvfi+eUvGAcxsAfANYDXgAUmrZRySawNPyJ3LgKT9gNuBI8zs+azjaUrS8/Ys4FHgES9fcc6tSiSdCJwJHGBms7KOpylmthgYTNiUf6eXr5QfL1lxrsQkbQM8BRxjZk9lHU9rJB1gfgdsCBzmbRGLy0tWnMuepMMJHa/2L5eTjSV1Ah4BXjOzn2QdT6XzkhXnypSkjoQNQeeVSzIOy1fKfwh0AU7POBznnCsqSWsRkvFjyiUZh+Ur5UcDR0k6OOt4XOv5CrlzJSTpPEJrwYHleASypM2A54E9vE958fgKuXPZkvQH4GMzOzPrWNpC0iHALcB2fqpn8fjBQM6VIUnbAU8SWhu+k3U8bZVscPofwm1cL10pAk/IncuOpKOAK4AdzKwm63jaStItQGxmP8g6lkrlJSvOlZmkVGUM8MtyTsYTNxDGjh9lHYhzzqUpKVUZCZxQzsl44mfAQC9dKQ++Qu5cCUg6ExhImZaqNJSUrrwAbG9mH2QdT6XxFXLnspGsKi80szMa+dpBQMPV5kvNbLKky5O/zytFnK2VnHUxCtjMzJZmHU+l8ZIV58pI0n5qBvBtM3u5na+VmwlB0k3Ah2Z2cSmutyrxhNy50pO0NmGs7mdmnzT42n3Apw3LPySNAmYCPzCzTUsW7EqQ9E/gejO7P+tYKo0n5M6VEUlfBy4ys13a+Tq5mhAkbU9or7WJr7ykyxNy50pP0i+AbcxsSIPPjwLmmdnZTTxvLnBvXmu1JR1DmB8OzDqWSpPmmNqh/eE451pwGqEmsc2SCWFWYxOCmf2gbkJozzVWlpm9Kukt4AjggVJe2znn0iQpAk4Fjm3w+YOAoS0kXBOBx4sYXnv9GbhW0lZmNjXrYFzjfFOnc0UkaRNgN+BP7XiNugmh0dWZRFYTwkjCGw7nnCtnhwKfAg3LCg8GnmjhubNa8ZjMmFktoaf6qVnH4prmCblzxfUD4I527tbP84TwALCtpC0zuLZzzqXlNGBkI5vuBwB9W3jupLxt5mzE74DvSuqWdSCucZ6QO1dch9OO1fFEbicEM1tCuB06sNTXds65NCRtaQ8E7mvky5OBvpJGSerZ2PPN7HfFjC8NZvYu8BqwR9axuMZ5DXkDybvHHYGdgZ2AzYHOyZ8qYHHyZw7hF3VS8uetSmhn59IjqTPQD3ilnS81GTgoqSM/u7HEO+MJYSKwf4bXd6ugpF/0TvX+bEwYpzsBIozTi4APWDFOT/I2na4RWwP/MbP5jXztUmAQMBQYKmkyYZHlCTObXMIY0zCR8LuS53r3VdYq32VFkghHmQ8Bdgc2JLyLrBvA/wUsJAzuBWB1woC/HmHlcidC8t4lefzfCCUKeb995YpM0u6EW6AD2vk6PQk/W3Wr5LmaECT1B+42s62zjqVSeJeVL5PUAfgGcBywC9ALmEL43ZhIaFe3KPkDYZzuDGzEFxP32uTx9xM6Yywu3Xfh8kjSicABZnZ8E1/vCfwSOIgw79d5wszK5tAdSccBR5vZ4KxjqRTe9jAFkroDxxPqxkRonP8UMLUtLdwk9SZMEscBhxEG+5vykDC5bEgaBvQ3s1NSeK3cTgjJ7d55QG8zW5BlLJXCE/IVJPUBTibsx3iHsDltPDDDzOKVfC0RFl12JyzC7AyMBm42s1kphu3KiKQbgelmdm0rHtuTMA7/kjAWn21mvy1yiK1Wd1ZFY0m3pC2Av5tZSyWQrpXSHFNXuRpySRtIGgm8TbjNfjqh7+h1ZvZqW/spm9lHZvY3MzsO2BJ4C3hQ0guSBicTgVu17ExYvWs3M5tnZmeb2U7AmsBgVpSynJXGNdoR21LgDaB/lnG4yiJpB0l/BKYCGwBHmNleZjbazN5c2WQcwIL/mNmfzOwwQj1tBLwk6RFJ+6X7Xbgy0eqxOhmL70/G4lnAMUWNrJUkDUgOhxtM03uOpgNrJ+VeLmdWmYRcwcmEJGYesK2ZDTazp9Ku/U6S898QfikuBS4A/ippvTSv43JvR8LPW6pyOiFM4osr9861iaTVJP0foc71RWBjM/uBmVWnfS0zm2FmPyck/PcBv5c00jtRrDqSk5S3A6obfH5QK57+BNDoRs+0JXuImmRmk5PWuE3WhydvYqfgiye5tEok5JI2BMYBPyTUiZ1bio09ZrbMzP5KePc9BaiWdLyvlq8yegEftfXJeZsQWvAx4ft1rs2S/QgvEWq9dzSza83ss2Jf18wWmdloQmLWCXhNkp9quGroDMSNbOhszUJHL3Lcf7wJHwO+Qp5DFZ2QJ6vipxBW7/4J7GFmr5c6DjOrNbNfEVrD/QJfLV9VdGbFBrO2KKcJYRHh+3VupUnqmKyKPwZcQyhPeb/UcSR3n04EhgF3SLrJV8srXmdC04aGDmqqzWH9x9B4q8Q8W0R40+lypmIT8uQY3BGEGvEDzezXba0PT0uywXNnwq2xFyVtlWU8rug6AUva8fxymhAW44O8awNJXYC/EjZa7mhmd2TdQtbMHiGslncDnpH0lSzjcUX1pXFa0gDCncdvN/WkpITkXjN7ot7nBkkaWn9fj6SekibV+3iopMuTmu9ByZ9SjuGL8cWTXKrIhDzp+nAnYUDd18xeyzik5ZLV8guA84EnJe2UdUyuaJbSxl7/ZTghdCR8v861mqQewKOEcx2+nsWqeFOS1rXfA/4O/FPSVzMOyRXHUsIZI/UdRLjzOFjS0PpfSMbT+wDM7Af1Pj+AsKdnIqEjUP3XmlXvMfcC/wXuM7P7CfuMPk3zG2qBj9U5VXEHAyUbNO4gdKIY2M4jy4vGzO6U9DnwiKRDzKy9h8e4/GnPqnH9CeELB/8kq+a3AJ82MSHMIqya/7beazWcEIYSJoRNJfUF2ts6sRON3/Z1rlFJKcg4QkLyo7Z0TSm2ZKX+fEmfAU9J2sfMZmcdl0tVYyvGawGDzWyepLMk1d8oOQ8YVX8hpO7zZjYr6XRyf73PH8OKjZbzktfcBbgcIGm3WT+Bp4kFkgGNfP7xNhwK14n2lVK6IqmohDzZLDmCcGjPYWaW6x86M3swWc3/u6T9zGx61jG5VC0E1mjjczOdENqgG1D0zXeuMkjqBDxIOHjt9KxLVFpiZlckMT+WjNVzs47JpaYG6CwpqntTmHQrIfn7b1mxuNGken3shxI2Jdc5CDi7wWMOApo8n6KJHuKj6i/AtEM3wvfscqaiEnLgJ8CuhE4quU7G65jZfclt20ck9TezhVnH5FIzDdgG+PfKPjHrCaENtiUc2OJca1xPeAM3NO/JeD2XEDZR35vc1SyXuF0zzKxW0nvA5rRhrK4vudu4fJyt/7Gkvsl/BxDubmZ1mve2tPP7dMVRMTXkkjYj1GUfY2afZx3PyjCzWwn9dn+TdSwuVRP5YmJcdC1NCMnnUp0QkjtTO5HSIUiuskk6FDgUOMHMClnH01pJAv4LoAfhTa+rHGmN1T1JygMTg5LXhhXnNNSVIxZLk+1nkwOBehEOCHI5UxEJeVI3Phq42MxmZB1PG/0EGCRp36wDcamZRIkTcrKZENYjbIp6N8XXdBUouRt4C3ByuS2cQDhbAjgB+LWkjbKOx6UmlbE66aQ2MdkoXzfOzks2htaNubtQhM5YWnFS59mEevNRDTekEuaAKXncr+Eqp2Tlx0BMqB8vS2b2qaRTgdsl7eClKxVhErCTJJXq9raZTZY0MTlUaB5hEtglGZjvTR62C9DsqW8raSdgkt/Cd61wJTDOzJo8TRCWdxCa1IYNa0VnZm9Iugq41UtXKsYkwona7dZInffgBl//Un14StedTNggfXYzD/M7mTmmtMYSSQZgZiU9hTIpVXkB2L2Yq+OlmiAk3UUoJ/hJMa/jSkPSh4QDqd7OOpZikXQxUGVm52UdS6Uo1nia1TidXPtQ4HfAds2tjielVTOB39bfS5EnkjoAzwO3mVmab25dBiT1At4GeuZ19biuBr2dr3E/8ICZ3Z1SWKu8NMfUSihZuQy4rMjJeF9CzeCmxbpGPT8BjkveaLjy9zzwtayDKLKvEb5P5xqV7DO4Bji1FaUqdQluS4diZaZB6UqXrONx7WNmnwLvE5pC5FIKyfhqwD6E/Wouh8o6IU8OajiAdG+/N6ZkE0QyMNxO+9vQuXy4Dfhh1kEUi6TtgY0Ih7s415T9CWWFf2/uQUnd7eTkw75FjqldzOwNwt3ZY7OOxaXidip4rAaOBt5ob2LviqesE3JC27a7zWx+sS6Q0QQxCvi+JD/etvw9CqyV9P2uRKcCv0tWDJ1rymnAyFbUWw8GLk3+3mS3iBwZCQxL7gC48jYaOErS2lkHUiTDCD+vLqfKNiFPDtQ5BbipyJcq+QSRvIN9iXCgiytjSVu3mwkJSUWR1J3wM3pL1rG4/JK0HqGs6fctPG4o4cCrunacuV4hTzxKOBW6Ut9wrzLM7BPgL4RSpIoiaTtCye1fso7FNa1sE3LgKGB6ctuwKDKeIEZSgUncKup2wsrLWlkHkrLjCUc3f5h1IC7XTgH+2MJGzp7ApkmnCAgdgnJbQ14n2QB4Ez5WV4obgVMllXNu1Ji6O5lLsw7ENa2cf+iKujqegwliHPAVSf1LeE1XBMnKy0OEQbEiJBuEfkTx71C5MpaUcrRmrB7KijuRkPTST8bhvKsrdeiedSCu3V4GPiUs+FUESesQ9jn4Sco5V5YJeXIQ0B4UdyNZphNEUurwOLBXKa7niu4i4AxJW2QdSErOJbSmeybrQFyubURor/taUw9IuljNa3By7KfJf3NfR5684X6d0h8C5lKW7HE4C7iuTN4MtsaNwK1m9n7WgbjmlWVCDmwOfGxmc4vx4jmaICYBO5fweq5Ikn0BFwGjkzeUZSu5a3MaMNQPRXEtaM1BJD9o5HyHuk4Q5VBHDj5WVwwze5JwR/OarGNpL0mDgW2BX2Udi2tZuSbkO7PiOPBiyMsEkcXR6654RgK1wBlZB9JWSanKA8CffMXFtcLONJOQSxpAuBPYUN1iSO5WKSUdJKnh0ec+VleWs4D9JR2edSBtJekrwA3ACWa2OOt4XMs6ZB1AGxXt+NecTRCvAf0kdTazRSW8risCM4slnQi8JOlvZjYt65ja4FygI3B60hJ0m7yebOdyYSfg+ma+fjmApIYncvZt8N/MJXPDMYQ5oGFcqR297rJnZgsknQTcIWm7BnfLy8WNwJ1m5gcBlYlyTsgfKtJr52aCMLMlkv4N7ICfrlURzGyWpAuBeyTt34pTC3ND0oGEUpUdCT+TDwMFSf3N7JVMg3O5k2zobHLxRNIg4Ox6G+cbfu0+oGSdiSSNMrMmD2RL4pycxNawPOXfwLqSepjZZ8WM05WGmT0p6a/A7ZIGJ/u6ykLSIW574PtZx+Jar1xLVrYGXk37RetNEAc3/APUJeilbl33KqEGzFWOkYQ+838pl8OfJO0K/BEYbGbvm9kjQN2R4dWSfAe/a6g3EDfTFnOXxpLxRFnVkCfJ2r8Ic5OrHD8F1gBGlcvhT5KOAS4EjvA76+WlXBPybkAxVhbzOEF8DnQt8TVdESUbIYcBs4H7JXXKOKRmJbfq/wqcaGbLu6qY2SIzE6He8iRJlrTYcg7CuNXoOC3pLL7Yxaqh3NaQN8PH6gpjZksIR85vQ+i8kuucSdKRhBKxw8xsetbxuJWT6x+uxiS/EB0Jm+PSfN28ThCLgFwnbG7lJStq3yNM4n+XtEbGITVK0j6EnvinmdnfGnuMmV0BbJB8+JGkium37tqlM2H8+oKki9VazdXlJl2JoJEFEEmDJA1Nxuy6z/WUNKnex0MlXS5pQPL4QY1sxEzbYnysrjhmtgA4jFCmdJukXJb6SjoeGAV83cxSryBwxVd2CTmh7r2QZru1UkwQ7bAUWC2F13E5k5ya9l1gGvB0nnqUK/geoaPKcWb2QHOPN7P3ktXy+4GRkhYmHVncqqsjsKz+J5Lezo8TDmBpjS+Mt8ndmlmELlv1670PYsVZEQOAe4H/AveZ2f3AZFa0ri0WH6srVJIbHAysBzyYpzuBkjpIugD4DXCgmRWzA50ronJMyJcCVWndOirFBNFOqxNWXlwFSlbKTyWc9jdB0tlZ9ymXtB6hROVnwEFm9kRrn2tmgwmHdnUBlkg6oDhRujKwhCRBTRYoHgfmEsbQW5I9O18i6XFJc+t9PFPS5cmH85KywmMIb/7qHMOK7lh1Z0jsQrJJ38xm1d+wKem+hn+Agxr5/NCV+H59rK5gZrYQ+CbhEKipSa12piRtS2j4sDewp5n9K+OQXDvk8tZLc8zMJC0hDH5t3rCQJOL3ERJnCBMEyWpKw8c+Tr1d9ZJmAveb2dmEwX9WMmE0NUG0R2d8kK9oyc/0Z4TNQ/8LfFPSiaVui5hsWjoeuJJw1Pm3zGylS8PM7EVJHYH/AE9KegwY6IcIrXIWEcav+iuMLUo20Tf1tbpFjqF8se/3QSQb7+s95iDglCZeZ3DDz7XUZaUVOuFjdaWrBWJgTeDXyZvKYWb2cSmDSMpmziacafFL4DYfX8tfOa6QQ6jn/kp7XsDM5iUdVJT8WbOxZDx57MHJ1+seu2mSjDecIEbVe9pBQKtXFpuxNmFVyVUoSacDdwIfEloK/hEYL+k8SSU5GVbh9M26VfFDzezCtiTjdcxsmZmtT2i7dQgQS9oknWhdmZgHrJX2RrikxHD52Fv/47q/J3cuPy1x/+i1WbHXyFWY5Of4JkIC/ASh+9ks4FVJJ5eiY1ZSSnggYVV8P2AnM7vVk/HKUK4JeTUhccmF1kwQ7TCA8P26CpMMrhcQbqvXAsPNrGBmNwC7EVqozZJ0u6TUTwGU1EnSdyW9ROjr/xyh09CUtK5hZncCdW8qZkm6OK3XdvlmZnMJiwmbpvzSPfliOeAgVpzcPCD5b1oLIo350pvkJBnrR2h96CpMsiJ9D+EOYi1wsZktThbmjiR0YnlH0pWS+hXh+j0lnUHod389MIKwcPJO2tdy2SnXhDxvxxS3ZoJYaZJ6EDaR/Lvtobk8SspDrgbOIam3JqySA2BmM83sf4HNgTeBsZJeknSCpA2S57fluqtJ2lHSZcA7hJaFmwE7mNnl7VkVb4qZzU02fF4DXKDQHjGXXWVc6lIfq5Ma8onJZvq6xHteUu9dl4TvQihJTE3SseVyQqnAAEmj6tWYbw9M8yPKK4+k1QkLFt8gjNUzgPF1XzezCWb2dWB3QjnLC5LGSfofhePr23rdLpL2lvQ74C3CXXgRxuoxvipeeZTW/1NJBpBMvEUl6Wjg5OSXIBckjSLUjM8j7Ob/ZfLxvW29bSppf+A3ZrZnWnG6fFA4rfMcVtSdXmVm5zfz+CpC662TCZsmRUh26v68DsxPXmtp8rqdgXUIbwp3IuyD2AZ4m9DK8GbCXoxXCT+nRd+kJGkrVqwiDm6qTGxVV6zxtJTjdHK984CeZvaLUlyvPdpTQy7pNGCAmZ2cclguY5LGAocTxtQFwElmdm8zj+8MDCa0td0F+IwV4/REQletGsIei5gwTncitI7dqd6fTYGpwIPALcD+hFX688zsNyl/m66N0hxTyzUh34Dwg92nkt8lSvoZsLGZ/SjrWFy6kpW9xwkDci2wUWs3BiWr4+sTEuy6wXsrwupNJ0K7ucWEAX8uoeVb3YRQnfTVrXutKsLkAHCsmf253d9c6+J/nrCi9G9gGzOLi33dclJBCfmhhNOPDyzF9dpDUt96e4JW9rm3ARPN7KaUw3IZS95s3Zh8OAdYz8yWNfOU+s+NCIl1w0S7LgmvIozTi4GP+OIiy2sWDiaqe62+wHTC3dTdvdd4PnhCHib0jwgbGt4t9vWyIulu4FEzuyPrWFy6FI6aPwmYAjxlZj/LMJZqYAfCIUWbm9lHJbru4cDDyYf9zeyVUly3HFRQQr424RZ/r0p+05X8Dp1iZq1tn+vKhEILzp6EO3tXmdntGcUhwl3QLoQS2W3qJ+wuG2mOqWVZQ56sij8B/E/WsRSLpK7AocBTWcfi0pV0gDgJOMvMBmSZjCeeBoywavOHttanrywze4QwuQBUJ29SXAUxs08IJVJfyziUopG0OWGvj7+hrDAKpw73BPY2s22ySsZhed5TTShXXBf4dVaxuOIoy4Q8cRNwWqmShwx8Bxjvu6grS1IiMgmWHzmfB+MJKy8dCQdMfKdUFzazRcnKwlnAScmGz9ycgudScRNwWtZBFNEPgduLsSHaZUdSb2Ak4cyR8S09vkSeBAqEhYwfS8pNtznXfuWckD9HqKXKfW3iykreZAxjRd2aqxxvJ/8tes/alfASoZ4RwibPmaUOIHlzskHy4UfJypSrDH8A9k32/lQUSV0Im/duzjoWl7rZ0PghUhl6EairXy8A/80wFpeysk3Ik9s3I6nMlZfdCKc2pnHSp8sJSUOArwIH5qw92jvAeYQd/ZjZhCyCMLP3ktXy+4GRkhZKWi2LWFx6kk3EfyC0bas0xwIvmNnbWQfi0iPpnuSv62UayJc9A1yS/L2T30GvLGW5qbPeNdcgHM+9vZm9V6rrFpukup6n25nZ61nH49ov2dw2B/i7mR2edTyNkdSJsOP/FDPLtJ5b0u7AC8mHB5rZKrWXolI2dda77laE2+0bVUppR3In8z9AD0q4GdoVl6Q9CF2gfmRmI7KOpzGStiO0q93MzGZkHc+qbJXvstLgutcCq5tZRdziTiau5wkbSep4Yl7msvr9WFmS5gE98hBncjreO4QNTI8BAyu5zWl9lZaQJ9d+DHgoOYm27En6BqFUZf3kU/8ldL7wxLxMSepIaENbY2Zds46nOcnv8hwz8z03GVrlu6w0cBHwDUllX0ueJCBjgHOT/7m7J196Ldnstm1mwbk2q9c9ZKNMA2md3QAkbZN1IGa2zMzWA74PHALEkjbJOCzXdj8CLpS0adaBtJekNQnJ+HeTsfoYYC1gtqRPkg2BrvzUnQWxZqZRtM5pwFe8rK9ylH1CnpyCORS4rQKO4/4p4SSwUbD8SF5PzMtYgxaHua/3M7NpyV9z00/ZzO4EeiUfzpJ0cZbxuLZJfrYuBW5PDkwpZ9cCD5rZ0wBmdq8n5uWtQYvD3JdV1TuE6neZBuJSU/YlK/Wufzuw2MzKcpNnUqryT2CXpjYISdqNsMu6jpey5FjS4nAZ5L9UpT5J3wPuALqY2aKs46lP0jXAGcmH3c1sfpbxFEsllqwk168CngXuzmt9bkuSUpXrCXuXFjTxmG8Df0o+9FKWnJPUB/iQ0OIwT11VmiXpL8A3y2l+qTReQ9749XsCrwFDzOwfWcTQVkmpynPAHa05etkT8/Ig6V1CV5XOOeuq0qLk93msmQ3KOpaGkjev/0o+HGxm92cZTzFUakKexLAFoff9bmZW8hab7ZGUqrxGKFV5uhWP98S8DOTh96ItkgMEFwDHmdk9LT3epc9ryBuRlK6cBPw+GfDLQrJT/yZgHkmpSku8lCX/ctzisLXuAb6VdRCNMbOphLHrReA+SVMroARilZGUrvwf8FdJa2UdT2slXdKszFQAACAASURBVIjGAve2JhkHL2UpBzlucdgiM1sILAXuzjoW134VNYmZ2WPAL4HHJG2YdTwtSZLx3wLbE1b64pV5vifm+ZS0OBwNPFLG7fpOBJB0StaBNMaCPYCvA1sCBUk7ZByWa70RwN+Av5fD3p+k+8afCJv+frGyz/fEPJ+SFofHAqeb2YdZx9NGOwFI6pd1IK59KqZkpUEsPwF+DBxsZrOyjqcxSTJ+KSGh2M/MPk3hNb2UJQfy9LvQHnlqgdgcSZ2BmuTD28zs5CzjSUMll6zUi6Xu7uB2wNeTu5y5I2l1wgpkJ+DoNDb8eSlL9uq1OFxoZt2yjqc9vAVidrxkpQVmdh1wFfDPPLRvayjZ2HQT8DVg/zSScfAV8zyo1+Iw93doWiE3LRCbY2aLkp/7s4CTkp95n5hyLukpfxowEXg6j6vFkroBDwEG/E9a3Td8xTwX6loc9mr2UeXBWyBWgIpMyAHMbCRwNmGgPylZjclc0kf5cWBz4Gtm9t+0r+GJeTYatDh8N+t42iuPLRCbY2ZXABskH36UtDFzOZaU6Z0BPAi8LOnQjENaTtIuwATgXeBYM1uS9jU8Mc9GubU4bIm3QKwMFVmyUp+k7QmH7XxEOBL8vYziiIAfAMMJdeNXm9myEl3bS1mKrFxbHLakXgvErmZW09Lj80LSfcAgQinLmuU26a4KJSsNSToYuJWwYPEzM/ssozhWBy4kvLk+E7jH0pooW762l7IUWbm2OGyJt0DMhpesrAQze5Vw6/15YIqkE0u9Wp6sij8BDAH2MbPflioZB18xL5G3k/92zjKItCWH8gDclWkgKymZaPcAugBLJB2QcUiuBWb2OKGevEAYo0q+Wi5pZ2ASsBWwg5ndXapkHHzFvEQ+hOVjRCU5DkDSd7IOxLVNxa+Q11dvtXwB4aS1vxYzMVY4IvqHwAmUeFW8OavairmkDQilDN2BrsBi4HNgDjCtvRNu0uJwNKHFYbl2VWmSpLuB77T0u5280d2aUJPZk7AJbj6hpee7ZvZ+sWNtJKYOwDvAusBjwMBSJlhttSqukNdXb7V8InAD8Ewx/79J2gkYRthkfwbwxzz8nKxKK+bJ+LEZsA7Qg/BmegFhDHnPmjgwbyWv8UfCm531yrirSpMk1QIdWzFWdwS2JYzTawAdWTFWzyxGKW2lSnNMXaUSclj+gzgYOBXYhFBzdUtav5xJ6cLhhE0WOxMStZvM7K00Xj9NlZiYJ62fvgUcWFVVtUOhUPgKECV3BKzu5kgcx0p+Vq2qquqzQqHwL8IJgg8CE1o7GSctDucQWhx+vRjfU9aS/suLgKFmdkvyOQF7A0dK2kfSlnEcd0++ZlEU1Y0HmJnMTJIKURR9XCgUpgD/IBw89J8SfQ91pTcAffP4+1jfqp6Qw/INld8njKUCRgJ3pVXKknTn+Xby+n2AmwlzwSdpvH6aKi0xT8aPAcDRkvaJomjbQqGwZvKl5WN1/fEDiKuqqv5bKBReBZ4hjB//au46Da65B+FO+elmdmMxvq+sSdoOeBXYzMxmJJ/rCAwEvh5F0R5AvziOuwBEUWT1xoT6Y/XSKIreLxQKEwklZGM9SW+cJ+QpUehbfCrhHfM/gKcItytfsVYeGZ4MLBsTeoHuTOhpOpswedzX2tfJUrkn5pK+Alwl6dtmtnqnTp0K6623nrbccstol112oX///nTo0OFLz4vjmFmzZvHCCy/w+uuv2zvvvFNYsGBBByCW9IyZnWlmr7Rw7bL7uW8LJS0Qgd0kXW1me0qiW7duhY033rhqm2220Z577skmm2zS6PNra2uZMmUKL7/8MtOmTSt8+OGHLFmypCqKokVxHN9F2Ahb1JphhVMW6zoaDTezXxXzeu3hCfkKyRi7LyFxPgT4M+Gkz0nAG2a2tJWvU0XYTL8ToXzvGMIK/EjCG+pC+tGnq9wTc0mbS7oWOMTMqrp27bpsgw02qNpmm2202267scUWWxBFX66kjeOY1157jQkTJjB16tT4/fffjxctWtRB0lIz+wtwRnN34LSixeECM8t93/v2SH7H5wBDoyj6TRzHW0VRZD169Cj07du3avvtt9eee+5Jnz59Gn1+TU0NEyZMYMqUKUybNm3ZnDlztHTp0qooiubGcTyCMHa26nduVeAJecok9SBsANuNMFhvBcwgDPj/AhYSyhyWEW7DdyLcAt8p+bM4eewk4G9mNqnE30Iqyi0xlzQwiqLfxnG8Xffu3ZcdeeSRHY466ihWW63tnZ/iOOa5557jD3/4Q+GDDz6oiqLogziOLwduaLhqLukW4GRgw0roqtKUJJG5LIqin8dxzIYbblj43ve+V7Xbbru163Vra2u59957efjhhwsLFiyIJE0ys5+b2TPpRN44SdcQyhIAupvZ/GJery08IW+cpHUJdzh3IYy9GwFvEBLrGYQ7OYsIbQo7E8bqjZPH9ie0uptIGKvHmtnM0n4H6SinxDx5Q3ViFEUXxHG80TrrrLPsmGOO6XDQQQc1mny31rJly3j44YcZO3bssrlz53aIoujfcRyfb2ZjG4lhLqE8Y7VKTiYldQX+GkXRgWZmW221lZ100knR5ptv3q7X/fzzz7njjjt4+umnC7W1tZL0RLJg1eo7FJXKE/IiS3bZb0cYxDcnDOydgQ6EwX4x8AlJEm5mszMKtSjynphLOiiKonvjOF5ziy22iE888cRo6623Tv06c+bM4dZbb7UJEyZYHMcFMzvPQmu9uhaHkwgru1ekfvGckHSxpF9WVVVFe++9t0488UStueaaqV+nurqa0aNHF2bNmlUVRdEncRwfaWbPp36hhKStCG+2IZySe3+xrtUWnpC3TlLW0p8wVm/CigWTiBXJ+QeE39XJZjY3o1CLIu+JuaQfSrpW0moDBgzg5JNP1vrrr5/6dWbMmMFtt90Wv/HGG5K0MI7j75vZA0kMpxLuguxtZuNTv3gOJIsmvweO6dy5c3zIIYdUffe736VTp06pX+vJJ5/knnvuKcyePbsqiqIZcRwfnEZ9f7nyhNyVRN4Sc0kdJd1vZt/cfffd4zPPPDPq0qVL0a8bxzF333039957r0maHsfxgcB7ULk/75L6RlH0FLDB9773PR199NHtWs1qrc8//5wrrrgirq6ujiTdbWbfLdbmumTl7nlC+cK/CclMXIxrrSxPyN3KyFtiLmmtKIqeMLP+RxxxBCeccEKjZYNpW7x4MSNHjrSnnnpKkp4ysxOBt6iwFof1JXt4/taxY8duZ555ZrT33nuX5Loffvghw4cPL7z77rsilLFcVJIL54wn5K6k8pCYSzpI0oOrr7565/POOy/q379/KS8PhBXzc889tzB79uyIsMmss5ktLnkgRSbpYuD8DTfcMP71r39d1bNnz5LH8OKLL3LFFVfES5cunW9mhxd5tfxw4OHkw/4t7RsoBU/IXVvkITGXNFTSjb169dIll1xS9dWvfrWUlwdg2rRpXHjhhYWampqo3qbQilJvVfzYnXbaKT733HOj9pRrttWDDz7I6NGjDfhPHMf7l2qjfl54Qu4ykVViHkXRn8zs23vssYedddZZKsVKS3P++Mc/cvfdd5ukN+M43iWPNchtkaxqTQI2PPHEE3XkkUdmGk9tbS3Dhw+vWy2/LY7jk4t1raTjRt3BR7eZWdGu1cp4PCF3bZZFYi5p9SiKnjezAUcffTQnnHBCMS/XojiOue666+zJJ59UFEWPxXFcFi1PW0PSdpKe69ixY7ezzz472nXXXTONZ+7cuZx33nl1q+Vnm9mVmQZUQp6Qu0yVKjGXpCiKXpS0y0UXXaQsVsWbMmfOHM4888zC/Pnz58dxvLmZzck6pvaQtJGkN9Zee+1OV199dSar4k158cUXufTSSw14Io7jQ4s5qUr6BeHMAIDeZvZxsa7VQhyekLt2K1ViLmmNKIqmd+rU6StXXXVVlMWqeFOmTZvGL3/5y7hQKLwZx/G25dBNpzlJicpTW265pS655JJMVsWbMnbsWMaMGQPhzJWfZRxOSXhC7nKhmIm5pKooil6vqqra/Prrr8/VAF+ntraW0047rfDxxx8vNrMtmmu7lWdJK7JXN9hggw7XXXddVdZ3IBozY8YMfv7zn5uZTYrjeNciJ+VfBeq65pxmZjcV61rNxOAJuUtNMRNzSWtGUTSre/fua4waNaqqFPt6VtYnn3zCaaedFi9ZsuT9OI43M7MlWcfUFpIOA/621157cc455+TypPV//vOfXHnllQaMjuP4pKzjKTZPyF2upJ2YJyvjUzp27LjdzTffHK299trtD7JIli1bxo9+9KPC+++/X2NmG5VbFwdJ60ua0a9fv45XXnllVSk2brbV+++/z+mnnx7HcfxcoVDYr9jXk3QfoR1qDbCmmdUW+5r1ru0JuUtd2om5pK5RFP2nV69ePUeNGlWVp9Xahj7//HOGDh1aWLRo0TtJUl5WK+WS9gaeOfjgg/XjH/8417+/kydP5qKLLjIzu97Mzmj5GeUrzTE1v7OvKxtmNiH5Ydw9+dRrCietbduW14ui6KkoirYbMWJErpNxgA4dOnDDDTdUrbXWWl2iKJqe1CKXBUk9JE1df/31c5+MA6y//vpcc801EbBPFEV/Lvb1kq4MexCO8F4i6YBiX9O5YjKze5Ox+hhgLWC2pE8k9V7Z10ruYk7r1q1bz5tuuinXyThA9+7dufnmm6s6duy4UbJXpmxI2kbS03vssQd5T8YBBgwYwDnnnCPgJ5JyewBb3uR7BnZlpTWJuaRm6yEk/cLM9r366qujpk4Sy5sOHTpw0003VXXq1GnNKIr+nnU8rRVF0dM9evTocsMNN+Q+Ga+z8cYbc+mll8rMjpJ0YrGvZ2YvAh2BD4EnJT2atEt0rmy1JjFvxVj9pw4dOqw7atSoqmL0uy6Gnj17cuONN0bA9pLKYuNhcsf42c0331znnntueQzUwJ577smwYcMALpK0Y9bxlIOy+Z/rykczifnFwAJJ32/seZLWBy477rjj1NQR7HnVqVMnhg8fHsVxvJ+k3Pe7lfRDM9vh0ksvzWXNeHO23nprvvnNbyJplMIpu0VlZsvMbD3g+4Sj22NJ5fUD6lwjmknMfwf8t6m7QpIOMLNvnXfeeVG3bt1KGXK79e7dm1NPPVXATyVtmXU8rXB7VVVVj0suuaTs8rWBAwey9dZbWxRFj/tCRsvK7n+wKx+NJOYXAKsDIyWNVjgRdbkoip7s06ePHXvssaUONRWbb745X/va10zSXXkuXZG0lqQbjj76aOVxs2xrnHTSSfTo0UNRFD1Rqmua2Z1Ar+TDWckbTOfKXiOJ+SlAd+BhSefWT6YkVUn66y677BIPGDAgo4jbZ+DAgfTt2zeOougfWcfSHEm7AkN++tOfRuVyF6KhCy+8MIqiaE3g5qxjyTtPyF3RmdkE4BvAsuRTXQgD/yRJG8LyUpXNfvOb31RlFGYqfvzjH6tr164d8ly6EkXRE7169VLWfYLbI4oiLrnkkqo4jncuRelKHTObmyQu1wIXJHd+1ijV9Z0rJjO7FziXFWN15+TjRyR1h1Cqstpqq3XJa5eP1ho+fHiVpHXzWrqSlKqM22677QqlOn2zGLp06cKZZ54ZAad46UrzvMuKKwlJawI/B44GNgWWAN0IHSyGAH86/vjjo29/+9uZxZiWN998k5/97GcAg8xsbNbx1CfpFEmjRo4cWbar4/XdcsstPPTQQ8vMrFepD2iStBXwr+TDwWZ2f4qv7V1WXCYk9QV+ChwB9CYk512A9wnJ+Z3/93//R2tWx8ePH8+zzz7L7NmzWWONNZg/fz59+vRhyJAh9OnThwULFjBmzBhOP/30In5HTRs3bhw33nijAVua2ZuZBNEESbd17NhxyN133718dXzEiBG88sorzJ49G4BNN92UNdZYsR4wf34YAvfZZx++9a1vlT7oZpx99tnxv//9708LhcJXso4lTd720JU1Sb2AA4EjgYHAWj179ozvuuuu1FfHs5oQhg8fbhMnTny/UChskOoLt1NVVdV/999//15nnnnmSj83r5PBMcccU6ipqRllZsNKdtFEciv/eUJZ1r8JbeTiFF7XE3KXuWRfz0HA/wAHRFHUdYsttuC3v/1ts6vj1dXV3HjjjWy66aYMGjSIfv36Lf/aggULuPzyyxk2bNjyxwwZMqSo30dzTjnllMJHH330TBzHX8ssiAYkdQQWDRs2rGrgwIFf+voRRxxBnz59uOWWW770tdmzZ3PGGWfQp08frr322hJE2zo1NTUce+yxZmbfNbO7s44nLWmOqeW1m8tVBDP7FLgfuF9B7aBBg1L9WVzZCSFtQ4cO1UsvvfRVSduY2RupX6ANJO0H9DrppLad1VD3pqVuMmhssK+bDJ599tmSTQaHHXZY1Z///OchQMkT8uSAoj0kHQ48DBQk9TezV0odi3NpSw47uwO4Q9J6cRy/P3To0GafM2LECB599FGGDx9OY6crd+vWjeHDh3PssceycOHCzFdyjz/++Korrrhif0mr5+jAoF927NhRhxxyyJe+MGPGDAB22GGHRp/Yp08fBg8ezJgxYxg3bhyNJfRZ6NKlC9tvv7299tpr/wdUTEKeprKuAXMV4UdRFEVHHHFEai84YsQILrjgAoYNG8Y555zzhWQcVkwIZ5xxBtXV1Y1OGu3Vu3dv1l133YKk3CxRRFF0Zd++fQvdu3dv82u0djKYOXMm48aNa/N1VsZxxx2HmXWWdExJLtgIM3uEcFsfoFrSrVnF4lyRXNmzZ89Cw/G0vssuu4xHH32Ua665psVxtS5RLMb4uzL23XdfVlttNQMuyjSQeqIo+sm+++4bNdaO9pVXwnv9HXdsuhy7a9euQFiYypOTTz45iuO4n6SNso4ljzwhd5mKoujs3XbbTWn1wc7ThPCd73ynyswOTG4/ZkpSjziOdzrhhBPaVRaUx8lgtdVWY9ttt7UoijLtemJmi5LblmcBJyUbPtfJMibn0pDcyRx09NFHNzl+jBkzhvHjxzNs2LAvLYI0ZrPNNivK3cm2OPDAA6uiKDo16zgAJO0Tx3GvpjbdT58+HWh6UQRWLJzk7SyPjTfemLXWWqsAXJN1LHnkCbnLjKQd4jhe7+STT06lnjVvE8IBBxxQt/JyYdEu0nqXd+3aNW7vm4+8TgannHJKFMfx5pIyr9k3syuAujg+kpSLid65djhNUtVRRx3V6BdnzJjB2LFj2XTTTVtdItG1a9fMV8frfP/73yeO4x6SvlwjUmKSrtpkk00KPXo0fsRCdXU1ffr0obn+78899xxAbspV6hs8eHCVpCO8L/mXeULusnRB7969C1/5Svs3Xed1Qth7772rqqqqStaWrylRFB1z8MEHt3vTbF4ng0022YQePXosI/S6z5yZvZeslt9P6Lu/UFK+zxZ3rglRFP14wIABTd7JHDFiBMBKbc7s1q0beWnn161bNzbZZJNY0rlZx2JmO/3v//5vo2P17NmzWbhwYbMLImPHjmXhwoUMHz48dyvkAIcddhiSIuC7WceSN56Qu8xUVVXtuu2226bSWSWvE8I+++xDHMe9W35k8UhSHMc9Dzig0UP3Wi3vk8EWW2zRIYqivUp60RaY2WBgD0J9+ZKmTj50Luc22n333Rtd0ZwxYwYzZ85c6QWOfv36tepOZqnsuOOOURRF22UZg6TtgGiXXXZp9Ot1ZYCNlQwuWLCAESNGMG7cuCY31OZBFEWss846MXBY1rHkjXdZcZmJ43jdnXfeud2v054Jodj69++PmUWS+prZrKJfsHF7S7K+ffu26xZhS5PBmDFjeOWVVzKbDLbffnsmTpyYuyPtzezFZB/BO8CTkh4DBlpaPWedK6Lk4KvV99hjj0a/XndHLC+r3W2122678cADD6wpSRn+bn6rS5cuy6IoajQ3qxuDn332WaZMmfKFr3Xr1o0dd9wxs57uK2OzzTbr8PHHH++adRx54wm5y4Sk3kCHNBLyPE8IHTp0oFOnToXFixcPBi7PKIwju3XrVqCdv+95nwz22msvbr311s45a18GgJktA9aT9D1CG7k4eZP2VsahOdeSb1ZVVcXdu3dv9I563bkEzW30zlJ1dTXjxo3jnHPOafZxW265JYCAAcCkEoTWmP2++tWvNlm5UF1dTdeuXVv8XvKguX/3AQMG8Nxzz2W+3ydvPCF3WRnUsWPHQqdOndpdspL3CWHdddflrbfe2p+MEnJJ+2y00Uap1I/neTJYe+21iaIojuP468ADWcfTGDO7U9JDwKfALEnDzexXWcflXDMOW3vttWOaKHGtG3979259Zd6CBQua3YeShhkzZvDcc8+xYMGC5TE2J4oiunbtumzhwoVHk1FCXlVVtd3WW2/d5L/zwoUL2WuvXFXlfUlr/t133313rrvuutUk9TCzz0ocYm55DbnLykErM4A3p60TQqlstdVWVVVVVZkV9EnacptttmlXuUrdZJDXusQ6a665ZgwcnnUczTGzucmGz2uBC5L2iGu09DznshBF0a79+vVrcvGu/mm9rTVmzJj2hASs2DfUlH79+jFkyJCVWqjZYIMNqiTt097Y2qpQKPTaddfGKzlmzpwJhO5gWUrj371bt2506NAhBo5OObyy5gm5y0QURX3XX3/9VDZ0ZjUhtNZmm22Gma1Zsgs2YGZdk9uxbZaXyaAlffr06QDkZ6dYM8zsTGDr5MPPJQ3KMh7nmtC7b9++TX6xrnXsRx991KoXmzFjBuuuu24qgaVtgw02UBRFmRxaI6kzoG222abRr9eVCTa3qb6cdOvWLQYq45tJiSfkLiudV1999VReKO8TQpcuXTCzov2uSaqS9Jikv0r6maT9JS0/jtPMora8aamvXCaD1VdfHUlds46jtcxsKmEcngDcJ2lq0hLMubzo0KVLlya/WLd3p+G+kqaMGzeOb33rW6kElrZOnToBpDMxNULSVyS9KOmPkoZJ2j1JxAF6QSidaUzdoWx56kzTHh07dgQom7G6FLyG3GVC0mrJ4Ndue++9N2PHjmXKlCmtqq8bN25cSTcfJpNZB0lvFukSHYC67iKHAIuBLpI+ASYCam5CbY1ymQyShDydH6wSSTo67C7pcOBhoJBxSM7VV9XcWN2vXz8OPfRQHn30UQYOHNjsGHHZZZc1OvaOGzeO2bNns/feey9fWHn22WdLvl+lc+fOAL2KOFZvAHQCdgO+CSwljNXvAf9q6kl19dgrc5Dd+PHjmT9/PgsXLlz+BmjBggWcf/75XHvttUC2/+6ekH+ZJ+QuK6md0pX3CSE5kExAKeo9VickdEuAdYAt68XQJuU0GSTfZ1muMJvZI5K6ADVZx+JcfU2t2tapG1PPP/98hgwZ8qVDwep+108//fQvbeacMWMGe++9N48++iiXX345t9xyC7Nnz271inuaJGFmHSjNWN2ZMFbXAusRkvNG1XUSa+0enhkzZtC7d2969+7N5ZdfvnwMfuWVV5afD5H1v3syVqdStlopPCF3mTCzJbW1tam9Xp4nhEWLFgEsM7OOqbxgA5KqgOnAMuAFYDyhS8DrZrZEUlxTU9PmjLycJoMlS5ZgZova9SIZSmKXJCP8xYDeZvZxtpG5VVghGcOadfrppzNw4EDuv/9+xo0bB4T9PV27dmXQoEFNntzbrVs3unXrxvTp05ePFX369PnCwslll132pefNnDnzS5/v379/u04IXrx4MZLmmFlRTjWT9FVgMvAR8BxhvJ4ITCMk5e/Uf/yYMWMYP3788sYF48aNo7q6miFDhjQ7Hnfr1o0+ffowZsyYL9w1fvbZZ5c/L+t/96VLlwIsXKknVThPyF1WFi9Zkm6r6GJOCO1RU1ODpKKVIZhZAWhy15WkeOHChSu9EpH1ZNAWtbW1mFmlDfIfSTrNzG7KOhC3SlpWU9O6mzb9+vVb6TtcdW/Sq6urm/z9b+w1R4wYkXrp4eLFiyHcXSwKM3uPcOfySyR92vBzQ4YMWanTp+vU/ZuOGzdu+R1JYPn4Xf8xWf27Jwl56dqdlQFPyF0m4jh+b/bs2VuT8i2rYk0I7fHWW28hKbMkUdLiWbNmdV3ZPu1ZTwZt8fHHHy8D3kvtBTNmZpJ0HzBS0pXAmmaW3q0l51ogad67777bveVHtt2MGTNYY401it6bvCUffPCBxXHcuu4A6asBmDVrFs11tWmtuoWUurG2/sezZ8+mT58+mf67L1y4UIQ7uy5RlrWWriI89eGHH2Ydw3LFHJimTp0aFwqF11J/4VYysxmvvvpqSY+CbmkygOL8m//3v/+NgMdSe8EcMLPBwB5AF2CJpAMyDsmtQgqFwstvvvlmUTcav/LKK0Xr4DR//vxWP/Y///lPwcyeLUogLTAzi6Los+effz6V11uwYMHy8RfCvp66trV1bWyz+nevra2ltra2ChhblIuXKU/IXVbGLl68uGrZsmVZxwEUd2B69913Y+CZorx4K5jZ82+99VZJO3dkMRl8/vnnLFu2LAIeTO1Fc8LMXgQ6Ah8CT0p6VO3Zqetc6z3x8cfF3cIwffr05e0T0zJjxgzGjBnD2LFjmTlzJiNGjFheytiU+fPnVwF/TTWQlWBmU19//fVUFk/69etHv379GD9+PNXV1eywww507dqVcePGLR93s/p3f+mll5C0zMzysyqXA16y4jJhZrOiKIqrq6ujnXfeOetwmD59ers2AzUljmNqamo6kO1R7g9/9tlnPyzlBetPBl27dmWHHXZg+vTpjBs3bvkEkPa/+Ysvvoik2jiOK/IoZjNbBqwn6XvAHUAsqa+ZvZVxaK6yjV26dOlNNTU1tLd9alOK0Wavbgxqbdnd22+/TXKC7j9TD6aVzOy5d955Z2dSys0algM2/HfO6t990qRJRFHkyXgDnpC7zERRNOfll1/unYeEvFh9V6dOnQrhbuQrRblA6zwWFVOLsgAAIABJREFUx7E+/PDDkh6IVOrJYMqUKUh6N9UXzSEzu1PSQ8CnwCxJw83sV1nH5SqTmc2Jomjpyy+/3HG//fbLOpzlBg1K92DbF154gSiKPi8UCiUt72vgLwsWLPhZhtdvURr/7tOmTSsUCoXJKYRTUbxkxWWmUChMef311+Os4yim8ePHE0XRl3bPl5KZLY2iaOHTTz+dZRhFN23atGVxHL+UdRylYGZzk9W8a4ELJJmk9h3H6lwToij6YMKECVmH8QX1S+LSUF1dbWY2LdUXXXnjzUz/+leTZwRlLo1/92Qf0RPtfqEK4wm5y9JV77zzTrRgQeV2PnrqqacKcRxnvnEljuNx48aNq9gTIOfMmcOcOXM6AJdnHUspmdmZwNbJh59LSnfZ0DmgUCjcOWHChIodP2pra5k6dSpmdkOWcSQbO9+86667Knah6vnnn2fp0qURcFvWseSNJ+QuM2b2RBRF8+64446sQymKV155hQULFkTAWVnHApz56aefVr31VmWWG996660WRdGHGZcGZcLMphLG8gnAfZKmSvKx3aVpeG1trZ566qms4yiKe++9F0lLzOyurGOJ4/j8N954Q0lP9Ipz1113FSQ9W84HuBWLD9ouU3Ecj3zyyScrcuVl9OjRhSiKJplZ5psMzezdKIrevPXWWytu5SWOYyZMmGBxHH/5WLlVhAW7A18HtgQKkorTNsitcsxsqaR/3H333RU5Vj/88MOFOI7/kHUcAGZ2n6Sae+65J+tQUvfJJ5/w3nvvVZnZGVnHkkeekLusXVxbW6tnnsmsK2BRfP7558ycObMqjuM8rI4DEMfxr1577TXV1lbWuTIPPfQQcRwXgExvN+eBmT1C6FcOUC3p1izjcZXDzM6YPXt2Vd05ApUiuZNZBfwi61jqxHE8phJLDJM7mbPNbErWseSRJ+Su5BTsIOks4BlJ/P73v6+owWf06NFEUTTXzHJzj9fM/iRp0d133511KKkaO3Zswcz+bGZZdkfIDTNblGz4PAs4Kdnw2ehx3c41R1IHSXtKGg7cG0VRfMstt1TU79no0aPjKIomm9ncrGOp55c1NTXRyy+/nHUcqYnjmBdffNHiOP5t1rHklSfkriQkdZZ0vKSxwDzgOeBSYGczGz579uyoUuoT58yZwz/+8Q+L4/jirGNpKI7ja/785z/bZ59lXkWTir/85S/MnTs3An6adSx5Y2ZXABskH34k6dQs43HlQdJakk6V9DjwOfB34DxgkziOr3jppZf09ttvZxpjWiZPnszMmTOjOI5/knUs9ZnZ/CiKHr/qqqsKcVwZVYZXX321xXG8hNAZyjXCE3JXKocQdlX/D9AdWI3QR3lPM7sIuPn666+Pa2pqsoswJeeee25B0nQzy93AY2bnAx9ccMEFZX9HYt68edx+++0GXGJm72cdTx6Z2XvJavn9wMis43Fl4TRC0nQQ0JlwQuybwLZmdk4URRMuuOCCsk8Ua2tr+c1vfhNLGmtmz2UdT0NxHB9VU1Oz7Lrrriv7OxJvvPEGzzzzjMzsGL+T2TRPyF3RSepJqO/tmHyqBpgEbGVmdX2jh8Vx/OlFF11U1qP8Pffcw+zZsxXH8f5Zx9KUOI4PfOutt6JHHnkk61Da5bzzzisA7/qhOC0zs8HAHnUfSzogw3BcTknqC1xMWDCBMFY/APSvOxE2juODP/vss7IvXbnsssvi2traGjM7JutYGpOUnh335JNPatq0rNujt10cx1x88cUFSY+Z2UNZx5NnnpC7opHUU9KbwFzCrfO6VYgbgX3M7JO6x5qZxXF80NSpU8u2tdacOXO45557DDjbzHJ7LLCZvQlcMWrUqLItXfnLX/7CO++8E8Vx7IllK5nZi/U+fFLSo5KUWUAuNyT1lWTAzORT1cAy4Awz+66ZLe/BZ2bzzeyUv/3tb2VbujJ58mRefvnlyMy+aWa5vVtoZg9IeurCCy8s2zsSV199tS1atKjWzL6ZdSx55wm5S12DRHwzYGhy23x/YGMzO6uxQTDpIX3z9ddfH8+bN6+kMbdXHMecc845BUlvmtmVWcfTEjM7G/jg/9u79zi75nv/46/P2jNJSBE0dWurpVqkpHKhLi0q8SulVXEpqqGNIDgoKj3iFsJJVasexyVCUJwe9FdytL+eH/nV+TnUZSRCaYMUTVL8hGRMmSQzs9fn98d3jYwx9732XnvteT8fjzzGzOxZ38/E5Lve813fSzLKnCsrV67sOFXllazryanJhGlksZl9NutiJBtdBPEJSV/9ZUJfPberr3P326MoenLGjBnFtra2SpWbiubm5o5TVap+9Mfdv9Hc3Nx29dVX5+6JxLPPPttxqsq6rOupdgrkkprugnh7p+7uRXf/Wy+XOS2O42WnnnpqMU/zyadPnx6vXLmyLU8jtnEc77ds2TKfOXNmboZe3n33XaZNmxYDz2uqysC5+y+BzZJ3XzGzqluALOXTXRB39/8D4O7reluXEcfxxKampjVnnnlmbkZvW1paOOWUU4qtra1vV+tUlc6SqSvffuSRR7jzzjuzLqfPli5dykUXXeRm9m+aqtI3CuRSst6CeH8kU1d2bG5ufueUU04p5mHP7JkzZ8ZLliyJ3X1MNU9V6czdl7r73g0NDfzsZz+r+tGX5uZmkp+J5XEc75Z1PXnn7quT0dBrgAuT7RE3yrouKZ/egnh/JFNXvrh8+fK26dOnx9Ueytva2jj99NOLjY2N78dx/PlqnqrSmbv/Hjjx7rvv9vvvvz/rcnq1YsUKzj33XAcWxHF8XNb15IUCuQxYmkG8I3dfF8fx5999992m73//+1U7Uh7HMTNmzCg2NDS4u3/F3f+cdU39lSyqPejhhx/2K6+8smrvqI2NjSQ/CyvjON4pTzfTaufuZwM7J+82mdkRWdYj6UsziHfk7n9z9zFLliwpnnPOOVU7Ut7S0sLJJ59cfPPNN9e6+47VcHpyf7n77cAPb7nlFq/mUzyXLl3K6aef7u7+ZBzH/yPrevLE0tqBJvnHTjLiIjUs2TXlKUIIhxDESwrh3bQzPIqil4cNG7bFtddeG22xxRZpNzFgLS0tnHfeecVXX3216O67J/Pfc8vM9jGz/xo1ahSXXXZZoa6uLuuSPrBixQrOOuusuLW19W9JGK/5uYjl6k97um6ywPNxYA9gCTDK3aszYUmfJLum/LXDhyaUGsK7aedzZvbcVlttNeTnP/95YcMNN+z9iyqksbGRM844o9jU1NQUx/EO7v5O1jWVwsx+AMw96KCDmDZtWlXlrUWLFnHppZc68FCxWBwUYTzNvlqBXPqsUkG8U5v1URQtdPcvHnPMMXbMMceUs7k+eeqpp5g9e3bc2tr6nrvvVisLC81sFzN7YtiwYUMvvPDCwi677JJ1Sdx6663cd999mNmTcRzvPVhGxrMI5B1eczDwu+TdL+X9l83BqFJBvFObW0VR9GwURZufffbZ0Ve/+tVyNtcn8+fPb18A/locx7u4+/tZ15QGMzvczO7ZfPPNufzyywvbbLNNpvW0tbUxe/bs+IknnojM7I44jr+XaUEVpEAuFZVFEO+ihnOB2VtuuaVfccUVhZEjR1ayeSCMis+aNStetGhRBPw78N1aC4hmVm9m97v7wXvvvbefe+65lsVo+YoVK5gxY0Zx1apV7u5nuPuNFS8iQ1kG8uR1GxD2oAa4xd2npFmHlEcWQbxT+wbcCJy08847+8UXXxxlMVre2NjIBRdcUFy2bFkEXOHhQLSaYmabR1H0sLt/cdKkSTZ58uRM6li8eDGzZs2K161bt8bdD3P3BZkUkhEFcqmIagjinerZKoqiP7j7Fyo9Wt5pVPwQd//vijWeATM7yMx+PWzYsKEXXHBBYfTo0RVpN45jbr/99vZR8cVxHE/I+yPmgcg6kHd4/XnAT5J3t3D3t9KsR9KRdRDvop7doih6KIqiTc8666xo3333rVjbDzzwADfffLMDy+M43r9WnmB2x8xOM7NfVHq0vKWlhZ/+9Kfx448/HpnZf7j7Ee7eWpHGq4gCeQWY2SbAGOALhOODNwAKwNrkz1vAIuCvtTbPstqCeGdmdq6Z/Ut9fT377bdfYfLkyWy88capt9PW1sZvfvMb5s+f39bU1FRHjY6Kd6fjaPmmm27aNmnSpLpDDz2UKEp/Lfjq1auZN2+eP/roo14sFovu/k+DbVS8o2oJ5MnXfBJYnrw7zd1vSLOmUpnZJ4CxwGcJ/fQwwAj99Brg74STgVd4Wje8KlFtQbyjjqPlw4cPj7/xjW8Ujj76aIYMGdLbl/Zbc3Mzd9xxBwsWLCiuXbvWgCtrcVS8O8lo+R/iON516623Lh5//PGFffbZpyxtLV++nLlz58aLFy82M3s/juNJ7v5gWRrLAQXylJlZBOxFOAxhbPJna+BZ4M/A+4SOvUjo7Iclnx8DjACeIXT4TwMLvMMJlHlS7UG8IzOrBy6IouiMOI4323777YsnnnhiKiO5b7zxBnPnzvWFCxfi7i3u/u/Aee6+suSL55CZbQNcbWaHR1FU2GOPPWzKlCmWxrShhoYGbrvttuKyZcsKURStjOP4auCng+WXnu5UUyDv8LW/BiYRprJs6u4V35PUzIYA+wPjgXGEvvpjhP53aVLbWsAJ/fQGwGeS1xmhj+7YV+dyTnE1B/HOksGt2VEUfc/dh40ePdqnTJkSbbvttiVf+y9/+Qvz5s2LlyxZEkVR1BTH8Y3AJe6+puSL51CyDugad99v6NChfsABBxQmT55MqdOG4jjmwQcf5J577imuXLmyEEXRK3Ecz0x2fhnUFMhTYmabAScApxI68T8QOuuFwJK+hAIz+zjrQ/x4wmmUDwDXA0/mYUQmT0G8K2a2fxRFP4njeGxdXZ2PHDnSd9hhh8Juu+3GXnvt1WNn1NLSwuLFi2loaODFF18svv7666xbt64QRdGyOI5nAvPy8P+wEpIRr9OiKPpxHMdbDxs2rG3rrbeOdtppp2j33Xdn1113paf55k1NTTz++OMsXryYl19+ue2dd96J2traLIqiP8Zx/MNkC0ahOgN58vVfJuzEAvA1r9BJh2b2KWAqMIUQRB9jfV/9Sm//RpOf3W1Y31e3D77cCdzg7kvKV3168hTEu2Jmx0ZRdGkcx58bMmRIccstt2THHXcsjBs3jrFjx/Y4et7c3ExDQwMLFy7kpZdeKr711lu0trYWoij6UxzH5yd7dQtgZkOBS6IoOjWO402GDx/e9qlPfaowatQo23PPPdlhhx16fNL59ttv89hjj/Hss8/yyiuvtK1evbrg7rG7/w44u9anAfWHAnmJzGwscBpwOPBbQnh+PI3gZWabsz7kNyXX/lU1jsTkPYh3lhxqchRwYBRF4919G3cfUldXF9fX18eFQoG6ujqKxSLFYpG2tjZraWkpmFkxiqKVxWLxGeBh4B7v/UTRQS0JBt8B9isUCqOLxeLHgWjIkCHFuro6LxQKFAoF2traKBaLtLS0RMViMTKzdWa2PI7jp4AHgV9X47+NrFVrIE+uUQcsA7Yi/D/8ejl+aU2eXB4ATAP2JYTnGz2l/f7N7NOsD/kvEPrq+e5edWfB5z2Id2ZmWxH6j68VCoUxcRxv4e6F+vr6Yn19/Uf6j9bW1qitrS0ys9Yoit4oFotPAwsIffWgW2PSH0neOQLYp1AojCoWiyMAGzp06Ad9tZl9cF9ct25dIY5ji6JoDfBKHMdPAP+L8G9jUD+57IoC+QAlcw2vA3YHbiCMfpZlkVJyM5lIuJmMJ8y9rIojtmotiPfEzDYFDiFMMRqe/FlLmIa0Gvi9u7+WWYE1xMw+T/iZ3wTYCBhC+Hv+B/A68FvP4YEcWajmQN7hWpOB25J3t3P3V0u9ZodrfwGYR/g5ug64y93fS+v6ndoaShicOQ3YDDjR3Z8sR1v9VWtBvCfJU5CDCP8PNiJMOVoDvEdYs/XAYJ02mKbkadFYYB/C3/NwoI71ffVSwn2x5s97SIMCeT8lP4BHAb8g3EAucfe1FWz/K4Sby1PAP2X1G/1gCuIieZaHQJ5cb1NgVfLuZe5+UYnXKwBnAT8GLgGur+SieTM7CrgWuB24uJL3iU51DJogLpJnafap6W+XUGWSUfF7gYuBb7n79Ep3sskWeaMJv+X/ycwOq2T7VqYj7kVkcHP31cmN6BrgQjPzZOpYvyWj4o8AhwJ7uPu/VnoHK3e/B9gV2B5YZGZ7VLJ9K9MR9yJS/Wo6kJvZPsBzhEcwY7J8DOnuze5+NmGk/iozuymZi1k2CuIiUglJ37Zz8m6TmR3Rn683s+MICzV/RVgs+tdevqRs3P0tdz+CMEI/38x+VO42FcRFpGanrJjZQcAvgWPd/aGs6+nIzIYDvyHM2Tom7blampoikm95mbLSxfWNsAvLHsCrQD1h96oTulv4aWanAdMJi0NfKEddA2VmWxMWrv4OmJ724lVNTRHJN80h772WbwE3AYe5++O9vT4LyUKiO4GNgW+mEcoVxEVqQ14DeYd2DiaE2JiwMG8ecGbnQGtm5wKnABPTXBSapmTnrN8DDcDpKe3GpSAuUgM0h7wHZnYAMBc4uFrDOEASwL9D2BrxV6VMX9HUFBGpMq8lbyPCLg7fBy7r+AIzm0rY2WTfag3jAMki/AmE3blmlXItTU0Rke7UVCA3sy8R5iAe4e4Ls66nN8ment8FNiTsgdsvCuIiUqVeJITw1wjb1g0Hzmmfj21m3yYstJ/o7n/Pqsi+cvcmwpZ8h5nZWf39egVxEelNzUxZSaaAPA3Mdvc7s6hhoJI55YsJx7P3ule5pqaI1La8T1np0J4RDve5iLDvsQHnA+cAh3rOTmc1s20J95n93f35PrxeU1NEapjmkHfd/uXAF4Fvl+PUuHJL9iq/G9ilu33KFcRFBodaCeSd2t6JsHhzEvCv7j690jWkwcxOAk4G9nT31m5eoyAuMggokH+07XGEBUSj3f3NSrefFjO7Bhjp7sd1+riCuMggUouBPGn/GGAGYRvaXJ4EmIz6/yfwiLvP6vQ5BXGRQUSB/MPtDgUWAle6+12VbDttZrYh8CzJ1BUFcZHBqRYDuZltSejfDnH3hkq3nyYz+zThvrO/uz+vIC4yOCmQf7jdGcA4cjpVpbNk6so9hIVQn0s+rCAuMojUaCD/N+A1d//nbj7/EKEvH5F86BWgsdPLRgDbJf/d6O6blqPWvkimrkwDvtThwwriIoOIAvn6NocAy4D93H1JCterihuCmT1BOFhDQVxkEKq1QG5mnyScmvxpd3+vl9c6PfStyZPDuYRpL9unXmwfmVkErAI2QUFcZFBKs08t69HtFXA48EIaYRzA3SfCh24IXXb2HW8IabTbhUuAK4Cby3R9EZFKmgrc1Ycw3t6nLujuNe7emIxO35tiff3m7rGZzQR2UxgXkVLlfR/yaQxg/+6e9PWGAJxEGEEvhwcJoy67l+n6IiIVkTzJPAm4oQ8vn5C8fainFyV9cOenl1m4DTjEzEZmXYiI5FtuA7mZ7QJsD/xHypfO/Ibg7jHh5jWtHNcXEamgw4Al7v7nPrx2YvK22wGRDso1INJn7r4KuI9wCJKIyIDlNpADpwI3dbcPbAmq5YZwK/BNM/t4GdsQESm3acB1fXztBAB3/1DfamYTkp1MOro7hdrScB1wqpkVsi5ERPIrz4H8EOBXZbhuVdwQksOB/ov1I/YiIrliZhsTFsrP78Nre5oueGTnPtndF5VeYencfSGwDhiVdS0ikl+5DORm9glgI+DllK9bbTeEp4GxZW5DRKRcxgDP9fFJ5rjk7YemC5rZBNbvbFWt1FeLSElyGcgJHd+iMuw7Xm03BHXyIpJnYwn9WF+0Txc82sweSv6sJvTHVTEa3gP11SJSkrxueziWcEpa2jreENr/u31f8p+Uob3eLATGmJnVwqFHIjLojAX+dx9f2z5d8EPB1szm0Msi+3JIBmJOdvcj+/DyhcBRZS5JRGpYngN5OeePV8UNwd3fNrN3CbvJLK10+yIiJRpLOFOhR8n6nBF0PRLe6O59WWSfimTq4tF8+BC43jwD7GJmde7eVrbiRKRm5XXKyhhSfoRZTTeEThaiR6EikjNmthHwSaAvB7e1L17vqp/tPIVwhJnNLqGuOT193t0Xufv5ndvt5Wv+AawAdhpoXSIyuOU1kI8EXk/5mhW7IfTTG4TvV0Sk6iT9YVfHRm8OrOzjiHH7FMGPhOAuBkOmAj2G6oy8AWibWhEZkNwF8qTjH0bYZipN1XpDWEv4fkVEqoqZ7QmsAhrN7L/N7HIz+6aZbQNsQOi/+mIMdNnXdm5vBDC+845XVUJ9tYgMWB7nkNcDRXcvpnzd/t4QKrXIcw1wlZldVaH2RKRKmFlZFnOX4bobA/sAewJFYAjwD+CdPtTSPle7L9MQ76XDYIiZTSWssbmb9fO9j+7jQsy0rSX8EiIi0m95DORFoJDmziNVfkOoAx4BnkzpeiJS/c5L3qb9i3ja192dEMTbCE8tNwBWAk8Rzon4Vh+u0b47SbeDIclCy9nAOHef2OFj9xCeWN7r7tsna4EmdnedMqsj/D2IiPRb7gK5uxfNrI0wUt6S0mWr+YawAXCfu1+T4jVFpIqZ2XkA7v6jar6umW1B6BcXExagL04WOGJmOwCTevjaOYS1O+0DGVM7HM7WbrPk8yOS98/v8LlGd280s/FJDe0nLJ/coY17u2h6TBcff8jdb+r2G+2bYYQnmiIi/Za7QJ5YAwynxEBeiRtCCoajTl5EqpC7/z/ghG4+3Uzov7r72pL6yQ7zyCcAJ3Xzmo88qTSzOaW23Q311SIyYHkN5C8BOwOPlXKRStwQUrAzcGeZri0iUi5vAMPNbDN3X1WOBpIBlFXu3liO6/ejDiNsefhSlnWISH7lbpeVRNXszV3OG4KZ1QG7Eg6dEBHJDXePCVNZOj91TNMEephqWKLN+vHa7YD33P2tMtUiIjUur4H8acKR9tWgnDeEHYHX3f3dMl1fRKScnqa8gyfjCQvtU2NmY5JzJs4nzDefkyze78lYwkCRiMiA5HXKykLg7KyLSIynfHuSjyXc0ERE8mghfdtpZUDKsb2huy8i7Lh1fm+v7UB9tYiUJK8j5C8AnzWzbhcMVYq7H9nb3uUl0KiLiORZ1Uwv7KAcpyyrrxaRkuQykLt7C2Fu4lezrqVckkVC+6P9x0Ukv14CRpjZZzKu4wNpn/KZDAxphFxESpLLQJ64jXS3F6w2ewFDgT9mXYiIyEAkCzvvIJzVUKuOBR5x95VZFyIi+WUpHXb5wVHM7m6pXLD39oYDy4Dd3H1ZJdqsJDO7C2jQgUAig0+5+tNK99NJm18gnDb8aXdfV6l2KyF5krkION/dH8y6HhGprDT71NyOkLv7+4T9uWtu5MXMPgEcDNyedS0iIqVw9xeB5+jh1M4c+zLwMcq305aIDBK5DeSJG4ApZjYk60JS9gPgf7r76qwLERFJwfXAtKyLKINpwA3J1BwRkQHLdSB39yWEHVeOybqWtJjZMOAUwi8bIiK14AFgWzMbn3UhaTGzbYBDCeuZRERKkutAnrgA+BczG5l1ISm5FHjK3bWFlojUBHdvA2YAc2vhiWYyd3wO8At3X5V1PSKSf7kP5O7+BGEu+XVZ11IqM/syMBk4LetaRERS9ktgBfDPWReSgu8BnwRmZV2IiNSG3O6y0qntDYBngAvdPdVjlCslmaryDHBRXr8HEUlHLe2y0qn9bQhnSEx098VZ1FCq5Ht4Bjgwr9+DiKRDu6x04u5rgBOAa3M8deVS4HmFcRGpVe7+d+A84LY8Tl3pMFXlBoVxEUlTTYyQd6jhKmBn4DB3b82qjv4yswMJj3N3dfe3sq5HRLJVqyPkSQ0G/JZwiucPPa2bUAWY2RnAFGB8cmK0iAxiGiHv3gWAAfPMLBffm5ntBdwFHKEwLiK1LgngxwMTgR9nXE6fmdlxwI8IAz4K4yKSqlyE1r5KOskjgG2B66o9lJvZWOA+4Hh3fzTrekREKiHZmeRA4AdmdmbW9fTGzA4Hrga+7u6vZl2PiNSeqg6sA+HuzYS9YUcBt5tZfcYldcnM9gV+D0x19//Muh4RkUpy99eBrwGnm9mFyVSWqmNmJxB28TrY3V/IuBwRqVE1F8gB3P1d4OvA5sD91bTQ04LjgXuB77j7/KxrEhHJgrv/DfgKcCRwvZltmHFJHzCzOjObAcwE9nf3RVnXJCK1qyYDOXwwUn4Y4STPP5nZkRmXhJltBdxP2GXgQHf/Q8YliYhkyt3fBPYFNgEWm9neGZeEme0M/JFQ197JqdAiImVTs4Ecwpxyd/8RIZhfZmb3ZDFanoyKH0fYf/c5YJy2zBIRCdx9tbsfC5wP3GtmP8titDwZFZ8O/F/gFsLAyfJK1yEig09NB/J2yWmeuwGvEUbLJ5vZ0Eq0bWY7ERZuTifMQbxQK/RFRD7K3e8DdgW2IoyWH1yJxfnJoMnehFHxAwiDJnPytCWjiORbTe1D3hfJ8fSzCIs+bwHmuPuylNuoJ4zKnwrsRFgQdJW7r0uzHRGpTbW8D3lfmdlhwMXARsCNwK3u/k7KbXwMOBaYBgwHZgO3KIiLSF+k2acOukDezsx2JATm7wKPEk5fe8Td3xvg9SLgc4TOfSrwMnA9cJ9GxEWkPxTIg2TnlT0IgfmbhDU4NwMNAx3gMLM6wij8CYT+/xFCX73A3eMUyhaRQUKBPEVmNpwQok8EvkSY1vI0sDD582fgfaDF3d3MCsAwwiPVscC45O0YoIlwAt317v58Zb8TEakVCuQflaz/ORE4DtgB+Avr++mFhEGQNUBr0lfXE/rqzxD66Pb+eldgBWGnq5vSfkIqIoOHAnmZJB34KNaH7LHA54ENgHqgCBQInf7bfPhmsNDdV2ZQtojUGAXyniULPkezvp8eC3yWEMAjoP3Gtga8Xq/0AAACrUlEQVR4nQ/31c8kW+OKiJREgTwDych4gWT0Jet6RKR2KZAPXDIlxdy9NetaRKS2pdmn1pVezuDg7kXCCLmIiFQpd2/LugYRkf4aFNseioiIiIhUKwVyEREREZEMKZCLiIiIiGRIgVxEREREJEMK5CIiIiIiGVIgFxERERHJkAK5iIhUjJnda2Yjsq5DRKSaaB9yEREpOzM7AtgOmJB1LSIi1UYj5CIiMmBJ0O6Vu//a3X8CrCpzSSIiuaNALiIipRifdQEiInmnQC4iIiIikiEFchGRQc7MRpjZVDObY2ZjzGyCmd2bdV0iIoNF2QO5OnoRkao3zt1vIllw6e4LgIeyLUlEZPCoxC4r49z9JjP7KzDH3ReY2XYVaFdERPog6ZdHAI3uvij52E2dX5e8ZnanD48zszkd3m909/PLV62ISO0peyDva0cvIiKZOgpY0NML3L0ROLnjx8xstgK4iEhpKjWHvNeOXkREMjWRykxT2awCbYiI5EqlAnmlOnoRERmYEcnc8bJI1g/NBkYAc81sarnaEhHJm0qd1FnWjl5ERErj7hPLfP0FhCelmt4iItJJRUbIy93Ri4hIZhqyLkBEJO/M3dO5kJkDuLulckERkUGqXP2p+mkRkfSk2afqYCARERERkQwpkIuIiIiIZEiBXEREREQkQ6nvstI+n0ZERKqT+mkRkeqiEXIRERERkQyltsuKiIiIiIj0n0bIRUREREQypEAuIiIiIpIhBXIRERERkQwpkIuIiIiIZEiBXEREREQkQwrkIiIiIiIZUiAXEREREcmQArmIiIiISIYUyEVEREREMqRALiIiIiKSIQVyEREREZEMKZCLiIiIiGRIgVxEREREJEMK5CIiIiIiGVIgFxERERHJkAK5iIiIiEiGFMhFRERERDL0/wGDFAjM6d36FgAAAABJRU5ErkJggg==\n",
53 | "text/plain": [
54 | ""
55 | ]
56 | },
57 | "metadata": {},
58 | "output_type": "display_data"
59 | }
60 | ],
61 | "source": [
62 | "import daft\n",
63 | "from matplotlib import rc\n",
64 | "\n",
65 | "rc(\"font\", family=\"serif\", size=12)\n",
66 | "rc(\"text\", usetex=True)\n",
67 | "\n",
68 | "\n",
69 | "pgm = daft.PGM(grid_unit=4.0, node_unit=1.4)\n",
70 | "\n",
71 | "# Start with the plates.\n",
72 | "rect_params = {\"lw\": 2}\n",
73 | "edge_params = {\n",
74 | " 'linewidth': 1\n",
75 | "}\n",
76 | "pgm.add_plate(\n",
77 | " [0, 0, 3, 2],\n",
78 | " label=r\"\\Large $r$\",\n",
79 | " rect_params=rect_params,\n",
80 | ")\n",
81 | "\n",
82 | "pgm.add_plate(\n",
83 | " [3 + 0.2, 0, 3, 2],\n",
84 | " label=r\"\\Large $r+1$\",\n",
85 | " rect_params=rect_params,\n",
86 | ")\n",
87 | "\n",
88 | "pgm.add_node(\"e_r\", r\"$E_r$\", 0.5, 0.5, scale=1.5, fontsize=24)\n",
89 | "pgm.add_node(\"e_r_1\", r\"$E_{r+1}$\", 3.5 + 0.2, 0.5, scale=1.5, fontsize=24)\n",
90 | "pgm.add_edge(\"e_r\", \"e_r_1\", plot_params=edge_params)\n",
91 | "\n",
92 | "pgm.add_node(\"c_r\", r\"$C_r$\", 1.5, 1., scale=1.5, fontsize=24, observed=True)\n",
93 | "pgm.add_node(\"c_r_1\", r\"$C_{r+1}$\", 3.5 + 0.2 + 1, 1., scale=1.5, fontsize=24, observed=True)\n",
94 | "pgm.add_edge(\"e_r\", \"c_r\", plot_params=edge_params)\n",
95 | "pgm.add_edge(\"e_r_1\", \"c_r_1\", plot_params=edge_params)\n",
96 | "\n",
97 | "pgm.add_node(\"a_r\", r\"$A_u$\", 0.5, 1.5, scale=1.5, fontsize=24)\n",
98 | "pgm.add_node(\"a_r_1\", r\"$A_{ur+1}$\", 3.7, 1.5, scale=1.5, fontsize=24)\n",
99 | "pgm.add_edge(\"a_r\", \"c_r\", plot_params=edge_params)\n",
100 | "pgm.add_edge(\"a_r_1\", \"c_r_1\", plot_params=edge_params)\n",
101 | "\n",
102 | "pgm.add_node(\"p_r\", r\"$P_{ur}$\", 2.3, 1., scale=1.5, fontsize=24, observed=True)\n",
103 | "pgm.add_node(\"p_r_1\", r\"$P_{ur+1}$\", 3.5 + 2, 1., scale=1.5, fontsize=24, observed=True)\n",
104 | "pgm.add_edge(\"c_r\", \"p_r\", plot_params=edge_params)\n",
105 | "pgm.add_edge(\"c_r_1\", \"p_r_1\", plot_params=edge_params)\n",
106 | "\n",
107 | "pgm.add_node(\"s_r\", r\"$S_{ur}$\", 2., 1.7, scale=1.5, fontsize=24)\n",
108 | "pgm.add_node(\"s_r_1\", r\"$S_{ur+1}$\", 3.7 + 1.5, 1.7, scale=1.5, fontsize=24)\n",
109 | "pgm.add_edge(\"c_r\", \"s_r\", plot_params=edge_params)\n",
110 | "pgm.add_edge(\"c_r_1\", \"s_r_1\", plot_params=edge_params)\n",
111 | "\n",
112 | "pgm.add_edge(\"p_r\", \"s_r\", plot_params=edge_params)\n",
113 | "pgm.add_edge(\"p_r_1\", \"s_r_1\", plot_params=edge_params)\n",
114 | "pgm.add_edge(\"s_r\", \"e_r_1\", plot_params=edge_params)\n",
115 | "\n",
116 | "# Render and save.\n",
117 | "pgm.render()\n",
118 | "pgm.savefig(\"dbn.png\", dpi=150)"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | "Here are the equations we'll be using for finding the parameters of the DBN."
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "\\begin{align}\n",
133 | "P\\left(E_r=1\\mid E{r-1}=0\\right) & = 0 \\label{eq:1} \\tag{1} \\\\\n",
134 | "P\\left(A_u=1\\right) & = \\alpha_{uq} \\label{eq:2} \\tag{2} \\\\\n",
135 | "P\\left(C_r=1\\mid E_r=1, A_u=1\\right) & = 1 \\label{eq:3} \\tag{3} \\\\\n",
136 | "P\\left(S_{r}=1\\mid C_r=0,P_r=0\\right) & = 0 \\label{eq:4} \\tag{4} \\\\\n",
137 | "P\\left(S_{r}=1\\mid C_r=1,P_r=0\\right) & = \\sigma_{uq} \\label{eq:5} \\tag{5} \\\\\n",
138 | "P\\left(S_{r}=1\\mid C_r=1,P_r=1\\right) & = 1 \\label{eq:6} \\tag{6} \\\\\n",
139 | "P\\left(E_{r}=1\\mid S_{r-1}=1\\right) & = 0 \\label{eq:7} \\tag{7} \\\\\n",
140 | "P\\left(E_{r}=1\\mid E_{r-1}=1,S_{r-1}=0\\right) & = \\gamma \\label{eq:8} \\tag{8} \\\\\n",
141 | "P\\left(C_r=1\\right) = P\\left(C_{r}=1\\mid E_r=1\\right)\\cdot P\\left(E_r=1\\right) & = \\alpha_{uq}\\epsilon_{ru}\\label{eq:9} \\tag{9} \\\\\n",
142 | "\\end{align}"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "Each query and each sku carries an attractive factor $\\alpha_{uq}$. When the customer interacts with a sku, there's a $\\sigma_{uq}$ chance them'll enjoy it and end their browsing through the query result page. \n",
150 | "\n",
151 | "If they are not satisfied, they continue browsing through with a probability of $\\gamma$.\n",
152 | "\n",
153 | "In this model, only clicks and purchases are observed which means all other variables are hidden; in such case we use EM optimization techniques to find values for each parameter that best describes observed data in terms of log-likelihood.\n",
154 | "\n",
155 | "This being said, the log-likelihood is given by:\n",
156 | "\n",
157 | "$$\\ell\\ell = \\sum_{s \\in S}log\\left(\\sum_{\\textbf{X}}P(\\textbf{X}, \\textbf{C}^{(s)}, \\textbf{P}^{(s)} \\mid \\Psi \\right)$$\n",
158 | "\n",
159 | "Where $X$ represents the hidden variables, $C$ and $P$ are the observed data clicks and purhcases and finally $\\Psi$ represents all variables used to model the data.\n",
160 | "\n",
161 | "Finding the derivative of this equation is intractable thanks to the summation of the hidden variables. We use them the [Expectation-Maximization](https://towardsdatascience.com/inference-using-em-algorithm-d71cccb647bc) algorithm and aim to maximize the following $Q$ function:\n",
162 | "\n",
163 | "$$Q = \\sum_{s \\in S} \\mathbb{E}_{X|C^{(s)}}\\left[logP\\left(X, C^{(s)}, P^{(s)} \\mid \\Psi\\right)\\right]$$\n",
164 | "\n",
165 | "In our case, as all variables are Bernoulli (either 0 or 1), each modeled by a parameter $\\theta_c$ which translates the above to:\n",
166 | "\n",
167 | "$$\n",
168 | "Q(\\theta_c) =\\sum_{s \\in S} \\sum_{c_i \\in s} \\left(P\\left(X_{c_i}^{(s)}=1, Par(X_{c_i}^{(s)}) = p \\mid C^{(s)}, P^{(s)}, \\Psi\\right)log(\\theta_c) + P\\left(X_{c_i}^{(s)}=0, Par(X_{c_i}^{(s)}) = p \\mid C^{(s)}, P^{(s)}, \\Psi\\right)log(1-\\theta_c)\\right) + Z \n",
169 | "$$\n",
170 | "\n",
171 | "We'll be using this equation in the maximization step, derive it to find new optimum values for each parameter of our model and repeat the process until either we reach convergence (usually set by no increment in loglikelihood metric) or by total amount of desired iterations.\n",
172 | "\n",
173 | "The derivative to find new values is given by:\n",
174 | "\n",
175 | "$$\\theta_c^{(t+1)} = \\frac{\\sum_{s\\in S}\\sum_{c_i \\in s}P\\left(P(X_{c_i}^{(s)}=1, Par(X_{c_i}^{(s)})=p \\mid C^{(s)}, P^{(s)}, \\Psi\\right)}{\\sum_{s\\in S}\\sum_{c_i \\in s}P\\left(Par(X_{c_i}^{(s)})=p \\mid C^{(s)}, P^{(s)}, \\Psi\\right)}\\label{eq:10} \\tag{10}$$"
176 | ]
177 | },
178 | {
179 | "cell_type": "markdown",
180 | "metadata": {},
181 | "source": [
182 | "## Attractiveness $\\alpha_{uq}$"
183 | ]
184 | },
185 | {
186 | "cell_type": "markdown",
187 | "metadata": {},
188 | "source": [
189 | "We have that:\n",
190 | "\n",
191 | "$$P(A_u = 1) = \\alpha_{uq}$$"
192 | ]
193 | },
194 | {
195 | "cell_type": "markdown",
196 | "metadata": {},
197 | "source": [
198 | "Given equations 1-9, we can also derive that:"
199 | ]
200 | },
201 | {
202 | "cell_type": "markdown",
203 | "metadata": {},
204 | "source": [
205 | "\\begin{equation}\n",
206 | "\\begin{split}\n",
207 | "\\epsilon_1 & = P(E_1=1) = 1 \\\\\n",
208 | "\\epsilon_{r+1} & = P(E_{r+1} =1) \\\\\n",
209 | " & = P(E_{r+1} = 1 \\mid E_r=1) \\cdot P(E_r=1) \\\\\n",
210 | " & = \\epsilon_r P\\left(E_{r+1}=1 \\mid S_r = 0, E_r=1\\right) \\cdot P(S_r=0 \\mid E_r=1) \\\\\n",
211 | " & = \\epsilon_r\\gamma P(S_r=0 \\mid E_r=1) \\\\\n",
212 | " & = \\epsilon_r\\gamma \\left(P\\left(S_r=0 \\mid C_r = 0, P_r = 0, E_r=1 \\right)P\\left(C_r=0, P_r=0 \\mid E_r=1\\right) + P\\left(S_r=0 \\mid C_r = 0, P_r = 1, E_r=1 \\right)P\\left(C_r=0, P_r=1 \\mid E_r=1\\right) + P\\left(S_r=0 \\mid C_r = 1, P_r = 0, E_r=1 \\right)P\\left(C_r=1, P_r=0 \\mid E_r=1\\right) + P\\left(S_r=0 \\mid C_r = 1, P_r = 1, E_r=1 \\right)P\\left(C_r=1, P_r=1 \\mid E_r=1\\right)\\right) \\\\\n",
213 | " & = \\epsilon_r \\gamma \\left((1 - \\alpha_{uq}) + (1 - \\sigma_{uq})(1 - cr_{uq})\\alpha_{uq} \\right) \n",
214 | "\\end{split}\\label{eq:11} \\tag{11}\n",
215 | "\\end{equation}"
216 | ]
217 | },
218 | {
219 | "cell_type": "markdown",
220 | "metadata": {},
221 | "source": [
222 | "Where $cr$ is the conversion rate of document $u$ for query $q$."
223 | ]
224 | },
225 | {
226 | "cell_type": "markdown",
227 | "metadata": {},
228 | "source": [
229 | "Given equation 10, we derive for the attractiveness parameter the following updating rule:\n",
230 | "\n",
231 | "$$\\alpha_{uq}^{t+1} = \\frac{\\sum_{s \\in S_{uq}} P(A_u = 1 \\mid C, P)}{|S_{uq}|} \\label{eq:12} \\tag{12}$$"
232 | ]
233 | },
234 | {
235 | "cell_type": "markdown",
236 | "metadata": {},
237 | "source": [
238 | "But given the structure of the DBN, we can infer that if $C$ is observed then $A_u$ is independent of $P$ as the former is a parent for the attractiveness variable. We can use this to assert a simplified updating rule:"
239 | ]
240 | },
241 | {
242 | "cell_type": "markdown",
243 | "metadata": {},
244 | "source": [
245 | "$$\\alpha_{uq}^{t+1} = \\frac{\\sum_{s \\in S_{uq}} P(A_u = 1 \\mid C)}{|S_{uq}|} \\label{eq:13} \\tag{13}$$"
246 | ]
247 | },
248 | {
249 | "cell_type": "markdown",
250 | "metadata": {},
251 | "source": [
252 | "Which can be developed as follows:"
253 | ]
254 | },
255 | {
256 | "cell_type": "markdown",
257 | "metadata": {},
258 | "source": [
259 | "$$\n",
260 | "\\begin{equation}\n",
261 | "\\begin{split}\n",
262 | " P(A_u = 1 \\mid C) & = P(A_u = 1 \\mid C_r, C_{>r}) \\\\\n",
263 | " & = \\unicode{x1D7D9}(C_r=1)\\cdot P(A_u=1 \\mid C_r = 1, C_{>r}) + \\unicode{x1D7D9}(C_r=0)\\cdot P(A_u=1 \\mid C_r = 0, C_{>r}) \\\\\n",
264 | " & = c_r + (1 - c_r) \\cdot \\left(\\unicode{x1D7D9}(C_{>r}=1) \\cdot P(A_u=1|C_r=0, C_{>r}=1) + \\unicode{x1D7D9}(C_{>r}=0) \\cdot P(A_u=1 \\mid C_r=0, C_{>r}=0)\\right) \\\\\n",
265 | " & = c_r + (1 - c_r)(1 - c_{>r}) \\cdot \\frac{P(C_r=0, C_{>r}=0 \\mid A_u=1) \\cdot P(A_u=1)}{P(C_r=0, C_{>r} = 0)}\n",
266 | "\\end{split}\\label{eq:14} \\tag{14}\n",
267 | "\\end{equation}\n",
268 | " $$"
269 | ]
270 | },
271 | {
272 | "cell_type": "markdown",
273 | "metadata": {},
274 | "source": [
275 | "Where $C_u$ is the click on current rank document and $C_{>r}$ is a random variable that is 1 if there's any click above current r and 0 otherwise."
276 | ]
277 | },
278 | {
279 | "cell_type": "markdown",
280 | "metadata": {},
281 | "source": [
282 | "Now developing the numerator of (14) we have:"
283 | ]
284 | },
285 | {
286 | "cell_type": "markdown",
287 | "metadata": {},
288 | "source": [
289 | "$$\n",
290 | "\\begin{equation} \n",
291 | "\\begin{split}\n",
292 | " P(C_r=0, C_{>r}=0 \\mid A_u=1) & = P(C_r=0, C_{>r}=0 \\mid A_u=1, E_r=0) \\cdot P(E_r=0) \\\\\n",
293 | " & = P(E_r=0) = 1 - \\epsilon_r\n",
294 | "\\end{split}\\label{eq:14.1} \\tag{14.1}\n",
295 | "\\end{equation}\n",
296 | " $$"
297 | ]
298 | },
299 | {
300 | "cell_type": "markdown",
301 | "metadata": {},
302 | "source": [
303 | "The equation above is derived from the fact that an attractive document is only not clicked if it's not examined."
304 | ]
305 | },
306 | {
307 | "cell_type": "markdown",
308 | "metadata": {},
309 | "source": [
310 | "The numerator is already solved, we still need to develop the denominator:"
311 | ]
312 | },
313 | {
314 | "cell_type": "markdown",
315 | "metadata": {},
316 | "source": [
317 | "$$\n",
318 | "\\begin{equation} \n",
319 | "\\begin{split}\n",
320 | " P\\left(C_r=0, C_{>r}=0\\right) = P(C{\\geq r}=0) = 1 - P(C_{\\geq r} = 1)\n",
321 | "\\end{split}\\label{eq:14.2} \\tag{14.2}\n",
322 | "\\end{equation}\n",
323 | " $$"
324 | ]
325 | },
326 | {
327 | "cell_type": "markdown",
328 | "metadata": {},
329 | "source": [
330 | "$$\n",
331 | "\\begin{equation} \n",
332 | "\\begin{split}\n",
333 | "P(C_{\\geq r} = 1) = \\epsilon_r \\cdot X_r\n",
334 | "\\end{split}\\label{eq:14.3} \\tag{14.3}\n",
335 | "\\end{equation}\n",
336 | " $$"
337 | ]
338 | },
339 | {
340 | "cell_type": "markdown",
341 | "metadata": {},
342 | "source": [
343 | "$$\n",
344 | "\\begin{equation} \n",
345 | "\\begin{split}\n",
346 | "X_r & = P(C_{\\geq r} \\mid E_r=1) \\\\\n",
347 | " & = P(C_r = 1 \\mid E_r=1) + P(C_r=0, C_{\\geq r+1} \\mid E_r=1) \\\\\n",
348 | " & = \\alpha_{uq} + P(C_{\\geq r+1} \\mid C_r=0, E_r=1) \\cdot P(C_r=0|E_r=1) \\\\\n",
349 | " & = \\alpha_{uq} + P(C_{\\geq r+1} \\mid E_{r+1}) \\cdot P(E_{r+1}=1 \\mid C_r=0, E_r=1) \\cdot (1 - \\alpha_{uq}) \\\\\n",
350 | " & = \\alpha_{uq} + (1 - \\alpha_{uq})\\gamma X_{r+1}\n",
351 | "\\end{split}\\label{eq:14.4} \\tag{14.4}\n",
352 | "\\end{equation}\n",
353 | " $$"
354 | ]
355 | },
356 | {
357 | "cell_type": "markdown",
358 | "metadata": {},
359 | "source": [
360 | "Finally, we have the updating rule for the attractiveness parameter:"
361 | ]
362 | },
363 | {
364 | "cell_type": "markdown",
365 | "metadata": {},
366 | "source": [
367 | "$$ \\alpha_{uq}^{(t+1)} = \\frac{\\sum_{s \\in S_{uq}}\\left(c_r^{(s)} + \\left(1 - c_r^{(s)}\\right)\\left(1 - c_{>r}^{(s)}\\right) \\cdot \\frac{\\left(1 - \\epsilon_r^{(t)}\\right)\\alpha_{uq}^{(t)}}{\\left(1 - \\epsilon_r^{(t)}X_r^{(t)} \\right)} \\right)}{|S_{uq}|} \\label{eq:15} \\tag{15}$$"
368 | ]
369 | },
370 | {
371 | "cell_type": "markdown",
372 | "metadata": {},
373 | "source": [
374 | "Where $\\epsilon_r$ is given by equation (11)."
375 | ]
376 | },
377 | {
378 | "cell_type": "markdown",
379 | "metadata": {},
380 | "source": [
381 | "## Satisfaction $\\sigma_{uq}$"
382 | ]
383 | },
384 | {
385 | "cell_type": "markdown",
386 | "metadata": {},
387 | "source": [
388 | "In our presented DBN model, the satisfaction factor is only defined when:\n",
389 | "\n",
390 | "$$ \\sigma_{uq} = P(S_u=1 \\mid C_r=1, P_r=0)$$"
391 | ]
392 | },
393 | {
394 | "cell_type": "markdown",
395 | "metadata": {},
396 | "source": [
397 | "This means the updating rule for the satisfaction term is given by:"
398 | ]
399 | },
400 | {
401 | "cell_type": "markdown",
402 | "metadata": {},
403 | "source": [
404 | "$$\\sigma_{uq}^{(t+1)} = \\frac{\\sum_{s \\in S'_{uq}}P(S_u=1 \\mid C, P)}{|S'_{uq}|} \\label{eq:16} \\tag{16} $$"
405 | ]
406 | },
407 | {
408 | "cell_type": "markdown",
409 | "metadata": {},
410 | "source": [
411 | "Which can be developed as:"
412 | ]
413 | },
414 | {
415 | "cell_type": "markdown",
416 | "metadata": {},
417 | "source": [
418 | "$$\n",
419 | "\\begin{equation} \n",
420 | "\\begin{split}\n",
421 | "P(S_u=1 \\mid C, P) &= P(S_u = 1 \\mid C_r=1, P_r=0, C_{>r}=0, P_{>r}=0) \\\\\n",
422 | "&= (1 - c_{>r})\\cdot P(S_u=1 \\mid C_r=1, P_r=0, C_{>r}=0, P_{>r}=0) \\\\\n",
423 | "&= (1 - c_{>r})\\cdot \\frac{P(C_{>r}=0, P_{>r}=0 \\mid S_u=1, C_r=1, P_r=0) \\cdot P(S_u=1 \\mid C_r=1, P_r=0)}{P(C_{>r}=0, P_{>r}=0 \\mid C_r=1, P_r=0)} \\\\\n",
424 | "&= \\frac{(1 - c_r)(1-p_r)\\sigma_{uq}}{P(P_{>r}=0 \\mid C_{>r}=0, C_r=1, P_r=0) \\cdot P(C_{>r}=0 \\mid C_r=1, P_r=0)} \\\\\n",
425 | "&= \\frac{(1 - c_r)(1-p_r)\\sigma_{uq}}{1 - P(C_{\\geq r+1}=1 \\mid E_{r+1})\\cdot P(E_{r+1}\\mid C_r=1, P_r=0)} \\\\\n",
426 | "&= \\frac{(1 - c_r)(1-p_r)\\sigma_{uq}}{(1 - X_{r+1}\\cdot (1-\\alpha_{uq})\\gamma)}\n",
427 | "\\end{split}\\label{eq:17} \\tag{17}\n",
428 | "\\end{equation}\n",
429 | " $$"
430 | ]
431 | },
432 | {
433 | "cell_type": "markdown",
434 | "metadata": {},
435 | "source": [
436 | "Given equations (16) and (17), we devire that the updating rule is given by:"
437 | ]
438 | },
439 | {
440 | "cell_type": "markdown",
441 | "metadata": {},
442 | "source": [
443 | "$$\\sigma_{uq}^{(t+1)} = \\frac{\\sum_{s \\in S^{[1, 0]}}\\frac{(1 - c_r^{(t)})(1-p_r^{(t)})\\sigma_{uq}^{(t)}}{(1 - X_{r+1}\\cdot (1-\\alpha_{uq}^{(t)})\\gamma^{(t)})}}{|S^{[1, 0]}|} \\label{eq:18} \\tag{18}$$"
444 | ]
445 | },
446 | {
447 | "cell_type": "markdown",
448 | "metadata": {},
449 | "source": [
450 | "Where $S^{[1, 0]}$ is the set of sessions of customers interactions where at rank $r$ there's an observed click and no purchase for document $u$ and query $q$."
451 | ]
452 | },
453 | {
454 | "cell_type": "markdown",
455 | "metadata": {},
456 | "source": [
457 | "## Persistence $\\gamma$"
458 | ]
459 | },
460 | {
461 | "cell_type": "markdown",
462 | "metadata": {},
463 | "source": [
464 | "Persistence is defined as:"
465 | ]
466 | },
467 | {
468 | "cell_type": "markdown",
469 | "metadata": {},
470 | "source": [
471 | "$$\\gamma = P(E_{r+1} = 1 \\mid E_r = 1, S_{ur}=0)$$"
472 | ]
473 | },
474 | {
475 | "cell_type": "markdown",
476 | "metadata": {},
477 | "source": [
478 | "The sufficient statistics for this parameter is defined as:"
479 | ]
480 | },
481 | {
482 | "cell_type": "markdown",
483 | "metadata": {},
484 | "source": [
485 | "$$ESS(z) = \\sum_{s \\in S} \\sum_r P(E_{r+1}=z, E_r=1, S_{ur}=0 \\mid C, P) \\label{eq:19} \\tag{19}$$"
486 | ]
487 | },
488 | {
489 | "cell_type": "markdown",
490 | "metadata": {},
491 | "source": [
492 | "There's no closed form for this equation, so we use some techniques in order to able to compute it, like so:"
493 | ]
494 | },
495 | {
496 | "cell_type": "markdown",
497 | "metadata": {},
498 | "source": [
499 | "$$\n",
500 | "\\begin{equation} \n",
501 | "\\begin{split}\n",
502 | "ESS(z) &= \\sum_{s \\in S}\\sum_{r}\\frac{P(E_{r+1}=\\, E_r=1, S_u=0, C, P)}{P(C, P)} \\\\\n",
503 | "&= \\sum_{s \\in S}\\sum_{r}\\frac{P(E_{r+1}=\\, E_r=1, S_u=0, C, P)}{\\sum_x \\sum_y \\sum_z P(E_{r+1}=z,E_r=x, S_u=y, C, P)} \\\\\n",
504 | "&= \\sum_{s \\in S}\\sum_{r}\\frac{P(E_{r+1}=\\, E_r=1, S_u=0, C, P) \\cdot \\frac{1}{P(C_{r}, P{>r} \\mid E_{r+1}=z, E_r=x, S_u=y, C_r=c_r, P_r=p_r) \\cdot P(E_r=x, S_u=y, E_{r+1}=z, C_r=c_r, P_r=p_r \\mid C_{r}, P_{>r} \\mid E_{r+1}=z) \\cdot P(E_{r+1}=z, S_u=y, C_r=c_r, P_r=p_r \\mid E_r=x) \\cdot P(E_r=x \\mid C_{r}, P_{>r} \\mid E_{r+1}=z)$$, second is $$P(E_{r+1}=z, S_u=y, C_r=c_r, P_r=p_r \\mid E_r=x)$$ and finally $$P(E_r=x \\mid C_{r}, P_{>r} \\mid E_{r+1}=z)$ which is derived as:"
626 | ]
627 | },
628 | {
629 | "cell_type": "markdown",
630 | "metadata": {},
631 | "source": [
632 | "$$\n",
633 | "\\begin{equation} \n",
634 | "\\begin{split}\n",
635 | "P(C_{>r}, P_{>r} \\mid E_{r+1}=z) &= P(C_r, P_r\\mid C_{r-1}, P_{r-1}, ..., E_l=1) \\cdot P(C_{r-1}, P_{r-1}, E_l=1) \\\\\n",
636 | " &= (1-\\alpha \\epsilon_{rl})\\left((1-c_r)(1-p_r) + (1-w)(\\alpha \\epsilon_{rl}c_r(1-p_r)) + w \\alpha \\epsilon_{rl}c_r p_r \\right) \\cdot P(C_{r-1},P_{r-1} \\mid C_{\n",
658 | " @font-face {\n",
659 | " font-family: \"Computer Modern\";\n",
660 | " src: url('http://9dbb143991406a7c655e-aa5fcb0a5a4ec34cff238a2d56ca4144.r56.cf5.rackcdn.com/cmunss.otf');\n",
661 | " }\n",
662 | " @font-face {\n",
663 | " font-family: \"Computer Modern\";\n",
664 | " font-weight: bold;\n",
665 | " src: url('http://9dbb143991406a7c655e-aa5fcb0a5a4ec34cff238a2d56ca4144.r56.cf5.rackcdn.com/cmunsx.otf');\n",
666 | " }\n",
667 | " @font-face {\n",
668 | " font-family: \"Computer Modern\";\n",
669 | " font-style: oblique;\n",
670 | " src: url('http://9dbb143991406a7c655e-aa5fcb0a5a4ec34cff238a2d56ca4144.r56.cf5.rackcdn.com/cmunsi.otf');\n",
671 | " }\n",
672 | " @font-face {\n",
673 | " font-family: \"Computer Modern\";\n",
674 | " font-weight: bold;\n",
675 | " font-style: oblique;\n",
676 | " src: url('http://9dbb143991406a7c655e-aa5fcb0a5a4ec34cff238a2d56ca4144.r56.cf5.rackcdn.com/cmunso.otf');\n",
677 | " }\n",
678 | " div.cell{\n",
679 | " width:800px;\n",
680 | " margin-left:16% !important;\n",
681 | " margin-right:auto;\n",
682 | " }\n",
683 | " h1 {\n",
684 | " font-family: Helvetica, serif;\n",
685 | " }\n",
686 | " h4{\n",
687 | " margin-top:12px;\n",
688 | " margin-bottom: 3px;\n",
689 | " }\n",
690 | " div.text_cell_render{\n",
691 | " font-family: Computer Modern, \"Helvetica Neue\", Arial, Helvetica, Geneva, sans-serif;\n",
692 | " line-height: 145%;\n",
693 | " font-size: 130%;\n",
694 | " width:800px;\n",
695 | " margin-left:auto;\n",
696 | " margin-right:auto;\n",
697 | " }\n",
698 | " .CodeMirror{\n",
699 | " font-family: \"Source Code Pro\", source-code-pro,Consolas, monospace;\n",
700 | " }\n",
701 | " .prompt{\n",
702 | " display: None;\n",
703 | " }\n",
704 | " .text_cell_render h5 {\n",
705 | " font-weight: 300;\n",
706 | " font-size: 22pt;\n",
707 | " color: #4057A1;\n",
708 | " font-style: italic;\n",
709 | " margin-bottom: .5em;\n",
710 | " margin-top: 0.5em;\n",
711 | " display: block;\n",
712 | " }\n",
713 | " \n",
714 | " .warning{\n",
715 | " color: rgb( 240, 20, 20 )\n",
716 | " } \n",
717 | "\n",
718 | ""
733 | ],
734 | "text/plain": [
735 | ""
736 | ]
737 | },
738 | "execution_count": 1,
739 | "metadata": {},
740 | "output_type": "execute_result"
741 | }
742 | ],
743 | "source": [
744 | "from IPython.core.display import HTML,display\n",
745 | "\n",
746 | "\n",
747 | "def css_styling():\n",
748 | " styles = open(\"styles/custom.css\", \"r\").read()\n",
749 | " return HTML(styles)\n",
750 | "\n",
751 | "\n",
752 | "css_styling()"
753 | ]
754 | }
755 | ],
756 | "metadata": {
757 | "kernelspec": {
758 | "display_name": "Python 3",
759 | "language": "python",
760 | "name": "python3"
761 | },
762 | "language_info": {
763 | "codemirror_mode": {
764 | "name": "ipython",
765 | "version": 3
766 | },
767 | "file_extension": ".py",
768 | "mimetype": "text/x-python",
769 | "name": "python",
770 | "nbconvert_exporter": "python",
771 | "pygments_lexer": "ipython3",
772 | "version": "3.6.9"
773 | }
774 | },
775 | "nbformat": 4,
776 | "nbformat_minor": 4
777 | }
778 |
--------------------------------------------------------------------------------