├── pyClickModels ├── __init__.py ├── __version__.py ├── jsonc.pxd ├── DBN.pxd └── DBN.pyx ├── MANIFEST.in ├── requirements.txt ├── notebooks ├── dbn.png ├── styles │ ├── bmh_matplotlibrc.json │ ├── custom.css │ └── matplotlibrc └── DBN.ipynb ├── tests ├── test_DBN.py ├── fixtures │ ├── eighty_skus │ │ └── judgments.gz │ ├── all_clicks_set │ │ └── judgments.gz │ └── null_test │ │ └── judgments_test_null.gz ├── conftest.py └── test_cy_DBN.pyx ├── setup.cfg ├── .flake8 ├── .coveragerc ├── scripts └── build_wheels.sh ├── .travis.yml ├── Makefile ├── LICENSE ├── .gitignore ├── setup.py └── README.md /pyClickModels/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | -------------------------------------------------------------------------------- /pyClickModels/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.0.2' 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cython 2 | wheel 3 | numpy 4 | ujson 5 | -------------------------------------------------------------------------------- /notebooks/dbn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/pyClickModels/master/notebooks/dbn.png -------------------------------------------------------------------------------- /tests/test_DBN.py: -------------------------------------------------------------------------------- 1 | def test_DBN(): 2 | from test_cy_DBN import run_tests 3 | run_tests() 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE 3 | 4 | [isort] 5 | known_first_party = pyClickModels 6 | default_section = THIRDPARTY 7 | -------------------------------------------------------------------------------- /tests/fixtures/eighty_skus/judgments.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/pyClickModels/master/tests/fixtures/eighty_skus/judgments.gz -------------------------------------------------------------------------------- /tests/fixtures/all_clicks_set/judgments.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/pyClickModels/master/tests/fixtures/all_clicks_set/judgments.gz -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length=90 3 | filename = *.pyx,*.px* 4 | exclude = .eggs,*.egg,build,*.pxd 5 | ignore = E901,E225,E226,E227,E999,W504 6 | -------------------------------------------------------------------------------- /tests/fixtures/null_test/judgments_test_null.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillianFuks/pyClickModels/master/tests/fixtures/null_test/judgments_test_null.gz -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | plugins = Cython.Coverage 3 | include = 4 | pyClickModels/* 5 | omit = 6 | tests/* 7 | pyClickModels/__version__.py 8 | 9 | [report] 10 | show_missing = true 11 | exclude_lines = 12 | pragma: no cover 13 | -------------------------------------------------------------------------------- /notebooks/styles/bmh_matplotlibrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "lines.linewidth": 2.0, 3 | "axes.edgecolor": "#bcbcbc", 4 | "patch.linewidth": 0.5, 5 | "legend.fancybox": true, 6 | "axes.color_cycle": [ 7 | "#348ABD", 8 | "#A60628", 9 | "#7A68A6", 10 | "#467821", 11 | "#CF4457", 12 | "#188487", 13 | "#E24A33" 14 | ], 15 | "axes.facecolor": "#eeeeee", 16 | "axes.labelsize": "large", 17 | "axes.grid": true, 18 | "patch.edgecolor": "#eeeeee", 19 | "axes.titlesize": "x-large", 20 | "svg.fonttype": "path", 21 | "examples.directory": "" 22 | } -------------------------------------------------------------------------------- /scripts/build_wheels.sh: -------------------------------------------------------------------------------- 1 | docker run -v $(pwd):/pyClickModels quay.io/pypa/manylinux1_x86_64 sh -c ''' 2 | yum update 3 | yum install -y json-c-devel 4 | 5 | cd /pyClickModels 6 | 7 | for PYVER in /opt/python/*/bin/; do 8 | if [[ $PYVER != *"27"* ]]; then 9 | "${PYVER}/pip" install -U pip 10 | "${PYVER}/pip" install -U setuptools 11 | "${PYVER}/pip" install -r requirements.txt 12 | "${PYVER}/python" setup.py sdist bdist_wheel 13 | fi 14 | done 15 | 16 | for whl in dist/*.whl; do 17 | auditwheel repair "$whl" --plat "manylinux2010_x86_64" -w dist/ 18 | rm $whl 19 | done 20 | ''' 21 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | matrix: 4 | include: 5 | - python: 3.6 6 | - python: 3.7 7 | - python: 3.8 8 | 9 | before_install: 10 | - sudo apt-get -y install libjson0 libjson0-dev 11 | addons: 12 | apt: 13 | update: true 14 | sources: 15 | - ubuntu-toolchain-r-test 16 | packages: 17 | - g++-7 18 | 19 | install: 20 | - pip install -U setuptools cython coveralls 21 | 22 | script: 23 | - | 24 | if [[ $TRAVIS_PYTHON_VERSION == 3.8 ]]; then 25 | make isort-check 26 | make flake8 27 | fi 28 | python setup.py test 29 | 30 | after_success: 31 | - | 32 | if [[ $TRAVIS_PYTHON_VERSION == 3.8 ]]; then 33 | travis_wait 30 python setup.py test --coverage=true && coveralls 34 | else echo failed 35 | fi 36 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: flake8 coverage coverage-html test publish 2 | 3 | flake8: 4 | pip install -U flake8 5 | flake8 pyClickModels 6 | 7 | isort: 8 | pip install -U isort 9 | isort -rc pyClickModels 10 | isort -rc tests 11 | 12 | isort-check: 13 | pip install -U isort 14 | isort -ns __init__.py -rc -c -df -p pyClickModels pyClickModels tests 15 | 16 | coverage: 17 | python setup.py test --coverage=true 18 | 19 | coverage-html: 20 | python setup.py test --coverage=true --html=true 21 | 22 | test: 23 | python setup.py test 24 | 25 | publish: 26 | pip install -U setuptools 27 | pip install -U wheel 28 | pip install 'twine>=1.5.0' 29 | pip install auditwheel 30 | sh ./scripts/build_wheels.sh 31 | #twine upload --repository testpypi dist/* 32 | twine upload dist/* 33 | #rm -fr build dist .egg *.egg-info 34 | -------------------------------------------------------------------------------- /pyClickModels/jsonc.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "/usr/include/json-c/json.h": 2 | struct json_object: 3 | pass 4 | 5 | ctypedef bint json_bool 6 | json_object *json_tokener_parse(const char *str) 7 | json_bool json_object_object_get_ex(const json_object *obj, const char *key, json_object **value) 8 | const char *json_object_get_string(json_object *jso) 9 | 10 | struct lh_entry: 11 | void *k 12 | void *v 13 | lh_entry *next 14 | 15 | struct lh_table: 16 | int size 17 | lh_entry *head 18 | 19 | lh_table *json_object_get_object(const json_object *jso) 20 | 21 | void *lh_entry_k(lh_entry *entry) 22 | size_t json_object_array_length(const json_object *obj) 23 | json_object *json_object_array_get_idx(const json_object *jso, size_t idx) 24 | int json_object_get_int(const json_object *obj) 25 | int json_object_put(json_object *obj) 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Willian Fuks 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /notebooks/styles/custom.css: -------------------------------------------------------------------------------- 1 | 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | *.c 9 | *.cpp 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | *.html 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # Cython annotations 135 | pyClickModels/*.html 136 | 137 | # json-c building folders 138 | json-c-build/ 139 | json-c/ 140 | -------------------------------------------------------------------------------- /pyClickModels/DBN.pxd: -------------------------------------------------------------------------------- 1 | from libcpp.string cimport string 2 | from libcpp.unordered_map cimport unordered_map 3 | from libcpp.vector cimport vector 4 | 5 | from pyClickModels.jsonc cimport * 6 | 7 | 8 | cdef class DBNModel: 9 | cpdef void fit(self, str input_folder, int iters=*) 10 | cpdef void export_judgments(self, str output, str format_=*) 11 | cdef: 12 | float gamma_param 13 | unordered_map[string, unordered_map[string, float]] alpha_params 14 | unordered_map[string, unordered_map[string, float]] sigma_params 15 | string get_search_context_string(self, lh_table *tbl) 16 | void compute_cr(self, string *query, json_object *sessions, unordered_map[string, unordered_map[string, float]] *cr_dict) 17 | float *get_param(self, string param, string *query=*, string *doc=*) 18 | vector[float] build_e_r_vector(self, json_object *clickstream, string *query, unordered_map[string, float] *cr_dict) 19 | vector[float] build_X_r_vector(self, json_object *clisktream, string *query) 20 | vector[float] build_e_r_vector_given_CP(self, json_object *clickstream, unsigned int idx, string *query) 21 | float compute_cp_p(self, json_object *clickstream, unsigned int idx, string *query, vector[float] *e_r_array_given_CP, unordered_map[string, float] *cr_dict) 22 | vector[float] build_CP_vector_given_e(self, json_object *session, string *query, unordered_map[string, float] *cr_dict) 23 | int get_last_r(self, json_object *clickstream, const char *event=*) 24 | void update_tmp_alpha(self, int r, string *query, json_object *doc_data, vector[float] *e_r_vector, vector[float] *X_r_vector, int last_r, unordered_map[string, vector[float]] *tmp_alpha_param) 25 | void update_tmp_sigma(self, string *query, int r, json_object *doc_data, vector[float] *X_r_vector, int last_r, unordered_map[string, vector[float]] *tmp_sigma_param) 26 | void update_tmp_gamma(self, int r, int last_r, json_object *doc_data, string *query, vector[float] *cp_vector_given_e, vector[float] *e_r_vector_given_CP, unordered_map[string, float] *cr_dict, vector[float] *tmp_gamma_param) 27 | void update_alpha_param(self, string *query, unordered_map[string, vector[float]] *tmp_alpha_param) 28 | void update_sigma_param(self, string *query, unordered_map[string, vector[float]] *tmp_sigma_param) 29 | void update_gamma_param(self, vector[float] *tmp_gamma_param) 30 | void update_tmp_params(self, json_object *session, unordered_map[string, vector[float]] *tmp_alpha_param, unordered_map[string, vector[float]] *tmp_sigma_param, vector[float] *tmp_gamma_param, string *query, unordered_map[string, float] *cr_dict) 31 | void restart_tmp_params(self, unordered_map[string, vector[float]] *tmp_alpha_param, unordered_map[string, vector[float]] *tmp_sigma_param, vector[float] *tmp_gamma_param) 32 | 33 | cdef class Factor: 34 | cdef: 35 | unsigned int r 36 | unsigned int last_r 37 | bint click 38 | bint purchase 39 | float alpha 40 | float sigma 41 | float gamma 42 | float cr 43 | vector[float] *e_r_vector_given_CP 44 | vector[float] *cp_vector_given_e 45 | float compute_factor(self, bint x, bint y, bint z) 46 | cinit(self, unsigned int r, unsigned int last_r, bint click, bint purchase, float alpha, float sigma, float gamma, float cr, vector[float] *e_r_vector_given_CP, vector[float] *cp_vector_given_e) 47 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import json 3 | import tempfile 4 | 5 | import numpy as np 6 | import pytest 7 | 8 | 9 | @pytest.mark.slow 10 | def build_DBN_test_data(users=10, docs=10, queries=2): 11 | # first z column is alpha, second is sigma and third is purchase rate. 12 | params = np.random.random(size=(queries, docs, 3)) 13 | persistence = 0.7 14 | 15 | final_result = [] 16 | for q in range(queries): 17 | inner_result = { 18 | "search_keys": { 19 | "search_term": q, 20 | "region": "north", 21 | "favorite_size": "L" 22 | }, 23 | "judgment_keys": [] 24 | } 25 | for u in range(users): 26 | session = [] 27 | counter = 0 28 | tmp_docs = list(range(docs)) 29 | np.random.shuffle(tmp_docs) 30 | stopped_examining = False 31 | while True: 32 | counter += 1 33 | if counter > docs: 34 | break 35 | doc = tmp_docs.pop() 36 | if stopped_examining: 37 | data = { 38 | 'click': 0, 39 | 'purchase': 0, 40 | 'doc': str(doc) 41 | } 42 | session.append(data) 43 | continue 44 | persist = np.random.random() 45 | satisfied = np.random.random() 46 | click_event = np.random.random() 47 | purchase_event = np.random.random() 48 | observed_click = 1 if click_event < params[q, doc, 0] else 0 49 | observed_purchase = ( 50 | 1 if observed_click and purchase_event < params[q, doc, 2] else 0 51 | ) 52 | data = { 53 | 'click': observed_click, 54 | 'purchase': observed_purchase, 55 | 'doc': str(doc) 56 | } 57 | session.append(data) 58 | # if clicked then there's chance user is satisfied 59 | if observed_click: 60 | # user is certainly satisfied 61 | if observed_purchase: 62 | stopped_examining = True 63 | if satisfied < params[q, doc, 1]: 64 | stopped_examining = True 65 | else: 66 | if persist > persistence: 67 | stopped_examining = True 68 | # if didn't click then only continue browsing given persistence 69 | else: 70 | if persist > persistence: 71 | stopped_examining = True 72 | inner_result['judgment_keys'].append({'session': session}) 73 | final_result.append(inner_result) 74 | tmp_folder = tempfile.TemporaryDirectory() 75 | tmp_folder.name = '/tmp' 76 | half_results = int(len(final_result) / 2) 77 | with gzip.GzipFile(tmp_folder.name + '/judgments_model_test_data_1.gz', 'wb') as f: 78 | for row in final_result[:half_results]: 79 | f.write(json.dumps(row).encode() + '\n'.encode()) 80 | 81 | with gzip.GzipFile(tmp_folder.name + '/judgments_model_test_data_2.gz', 'wb') as f: 82 | for row in final_result[half_results:]: 83 | f.write(json.dumps(row).encode() + '\n'.encode()) 84 | return persistence, params, tmp_folder 85 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import os 4 | import sys 5 | from codecs import open 6 | from setuptools import setup 7 | from setuptools.command.test import test as TestCommand 8 | from distutils.extension import Extension 9 | import Cython.Compiler.Options 10 | from Cython.Distutils import build_ext 11 | from Cython.Build import cythonize 12 | 13 | 14 | here = os.path.abspath(os.path.dirname(__file__)) 15 | Cython.Compiler.Options.annotate = True 16 | 17 | _version = {} 18 | _version_path = os.path.join(here, 'pyClickModels', '__version__.py') 19 | 20 | with open(_version_path, 'r', 'utf-8') as f: 21 | exec(f.read(), _version) 22 | 23 | with open('README.md', 'r', 'utf-8') as f: 24 | readme = f.read() 25 | 26 | 27 | if sys.argv[-1] == 'publish': 28 | """ 29 | Deploy to PyPI is still somewhat manual. It runs locally on Docker instead of relying 30 | on Travis. 31 | """ 32 | os.system('./scripts/build_wheels.sh') 33 | os.system('python setup.py sdist bdist_wheel') 34 | os.system('twine upload -r pypitest dist/*') 35 | sys.exit() 36 | 37 | 38 | def build_define_macros(): 39 | """ 40 | Macro CYTHON_TRACE is set to True so coverage report is available. More info in: 41 | 42 | https://stackoverflow.com/questions/50967268/cython-generating-coverage-for-pyx-file 43 | """ 44 | args_ = sys.argv 45 | if len(args_) > 1: 46 | command = args_[1] 47 | define_macros = [('CYTHON_TRACE', '1')] if command == 'test' else [] 48 | return define_macros 49 | 50 | 51 | define_macros = build_define_macros() 52 | 53 | 54 | class PyTest(TestCommand): 55 | 56 | user_options = [ 57 | ('coverage=', None, 'Runs coverage report.'), 58 | ('html=', None, 'Saves result to html report.'), 59 | ] 60 | 61 | def initialize_options(self): 62 | TestCommand.initialize_options(self) 63 | self.pytest_args = [] 64 | self.coverage = False 65 | self.html = False 66 | 67 | def finalize_options(self): 68 | TestCommand.finalize_options(self) 69 | 70 | if self.coverage: 71 | self.pytest_args.extend(['--cov-config', '.coveragerc']) 72 | self.pytest_args.extend([ 73 | '--cov', 'pyClickModels', '--cov-report', 'term-missing']) 74 | 75 | if self.html: 76 | self.pytest_args.extend(['--cov-report', 'html']) 77 | 78 | self.pytest_args.extend(['-p', 'no:warnings']) 79 | 80 | def run_tests(self): 81 | import pytest 82 | 83 | errno = pytest.main(self.pytest_args) 84 | sys.exit(errno) 85 | 86 | 87 | ext_modules = [ 88 | Extension( 89 | 'pyClickModels.DBN', 90 | ['pyClickModels/DBN.pyx'], 91 | language='c++', 92 | libraries=['json-c'], 93 | include_dirs=['pyClickModels'], 94 | define_macros=define_macros, 95 | extra_compile_args=["-std=c++11"], 96 | extra_link_args=["-std=c++11"] 97 | ), 98 | Extension( 99 | 'tests.test_cy_DBN', 100 | ['tests/test_cy_DBN.pyx'], 101 | language='c++', 102 | libraries=['json-c'], 103 | extra_compile_args=["-std=c++11"], 104 | extra_link_args=["-std=c++11"] 105 | ) 106 | ] 107 | 108 | install_requires = [ 109 | 'cython', 110 | 'numpy', 111 | 'ujson' 112 | ] 113 | 114 | tests_require = [ 115 | 'pytest', 116 | 'pytest-cov', 117 | 'mock' 118 | ] 119 | 120 | setup_requires = [ 121 | 'flake8', 122 | 'isort', 123 | 'pytest-runner' 124 | ] 125 | 126 | extras_require = { 127 | 'testing': tests_require 128 | } 129 | 130 | compiler_directives = { 131 | 'language_level': '3', 132 | 'binding': False, 133 | 'boundscheck': False, 134 | 'wraparound': False, 135 | 'cdivision': True, 136 | 'linetrace': True 137 | } 138 | 139 | packages = ['pyClickModels'] 140 | 141 | setup( 142 | name='pyClickModels', 143 | version=_version['__version__'], 144 | author='Willian Fuks', 145 | author_email='willian.fuks@gmail.com', 146 | description='ClickModels for Search Engines Implemented on top of Cython.', 147 | packages=packages, 148 | include_package_data=True, 149 | package_data={ 150 | 'pyClickModels': ['*.pxd'] 151 | }, 152 | long_description=readme, 153 | long_description_content_type='text/markdown', 154 | install_requires=install_requires, 155 | tests_require=tests_require, 156 | setup_requires=setup_requires, 157 | license='MIT', 158 | ext_modules=cythonize( 159 | ext_modules, 160 | compiler_directives=compiler_directives 161 | ), 162 | cmdclass={ 163 | 'build_ext': build_ext, 164 | 'test': PyTest 165 | }, 166 | zip_safe=False, 167 | classifiers=[ 168 | 'Development Status :: 3 - Alpha', 169 | 'Environment :: Console', 170 | 'Intended Audience :: Developers', 171 | 'Intended Audience :: Science/Research', 172 | 'License :: OSI Approved :: MIT License', 173 | 'Natural Language :: English', 174 | 'Operating System :: POSIX :: Linux', 175 | 'Programming Language :: Python :: 3.5', 176 | 'Programming Language :: Python :: 3.6', 177 | 'Programming Language :: Python :: 3.7', 178 | 'Programming Language :: Python :: 3.8', 179 | 'Programming Language :: Python :: Implementation :: CPython', 180 | 'Programming Language :: Cython', 181 | 'Topic :: Scientific/Engineering', 182 | ], 183 | ) 184 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyClickModels [![Build Status](https://travis-ci.org/WillianFuks/pyClickModels.svg?branch=master)](https://travis-ci.org/WillianFuks/pyClickModels) [![Coverage Status](https://coveralls.io/repos/github/WillianFuks/pyClickModels/badge.svg?branch=master)](https://coveralls.io/github/WillianFuks/pyClickModels?branch=master) [![PyPI version](https://badge.fury.io/py/pyClickModels.svg)](https://badge.fury.io/py/pyClickModels) [![Pyversions](https://img.shields.io/pypi/pyversions/pyClickModels.svg)](https://pypi.python.org/pypi/pyClickModels) [![GitHub license](https://img.shields.io/github/license/WillianFuks/pyClickModels.svg)](https://github.com/WillianFuks/pyClickModels/blob/master/LICENSE) 2 | 3 | A Cython implementation of [ClickModels](https://github.com/varepsilon/clickmodels) that uses Probabilistic Graphical Models to infer user behavior when interacting with Search Page Results (Ranking). 4 | 5 | ## How It Works 6 | 7 | ClickModels uses the concept of [Probabilistic Graphical Models](https://en.wikipedia.org/wiki/Graphical_model) to model components that describe the interactions between users and a list of items ranked by a set of retrieval rules. 8 | 9 | These models tend to be useful when it's desired to understand whether a given document is a good match for a given search query or not which is also known in literature as *Judgments* grades. This is possible through evaluating past observed clicks and the positions at which the document appeared on the results pages for each query. 10 | 11 | There are several [proposed approaches](https://clickmodels.weebly.com/uploads/5/2/2/5/52257029/mc2015-clickmodels.pdf) to handle this problem. This repository implements a Dynamic Bayesian Network, similar to [previous works](https://github.com/varepsilon/clickmodels) also done in Python: 12 | 13 | ![dbn](notebooks/dbn.png) 14 | 15 | Main differences are: 16 | 17 | 1. **Implemented on top of Cython**: solutions already public available rely on CPython integrated with PyPy for additional speed ups. Unfortunatelly this still might not be good enough in terms of performance. To work on that, this implementation relies 100% on C/C++ for further optimization in speed. Despite not having an official benchmark, it's expected an improvement of **15x** ~ **18x** on top of CPython (same data lead to an increase of ~3x when using PyPy). 18 | 2. **Memory Friendly**: expects input data to follow a JSON format with all sessions of clickstream already expressed for each row. This saves memory and allows for the library to process bigger amounts of data. 19 | 3. **Purchase variable**: as businesses such as eCommerces can greately benefit from better understanding their search engine, this repository added the variable Purchase to further describe customers behaviors. 20 | 21 | The file [notebooks/DBN.ipynb](notebooks/DBN.ipynb) has a complete description of how the model has been implemented along with all the mathematics involved. 22 | 23 | 24 | 25 | ## Instalation 26 | 27 | As this project relies on binaries compiled by Cython, currently only Linux (manylinux) platform is supported. It can be installed with: 28 | 29 | pip install pyClickModels 30 | 31 | ## Getting Started 32 | 33 | ### Input Data 34 | 35 | pyClickModels expects input data to be stored in a set of compressed `gz` files located on the same folder. They all should start with the string "judgments", for instance, `judgments0.gz`. 36 | Each file should contain line separated JSONs. The following is an example of each JSON line: 37 | 38 | ```json 39 | { 40 | "search_keys": { 41 | "search_term": "blue shoes", 42 | "region": "south", 43 | "favorite_brand": "super brand", 44 | "user_size": "L", 45 | "avg_ticket": 10 46 | }, 47 | "judgment_keys": [ 48 | { 49 | "session": [ 50 | {"click": 0, "purchase": 0, "doc": "doc0"} 51 | {"click": 1, "purchase": 0, "doc": "doc1"} 52 | {"click": 1, "purchase": 1, "doc": "doc2"} 53 | ] 54 | }, 55 | { 56 | "session": [ 57 | {"click": 1, "purchase": 0, "doc": "doc0"} 58 | {"click": 0, "purchase": 0, "doc": "doc1"} 59 | {"click": 0, "purchase": 0, "doc": "doc2"} 60 | ] 61 | } 62 | ] 63 | } 64 | ``` 65 | 66 | The key `search_keys` sets the context for the search. In the above example, a given customer (or cluster of customers with the same context) searched for `blue shoes`. Their region is `south` (it could be any chosen value), favorite brand is `super brand` and so on. 67 | 68 | These keys sets the context for which the search happened. When pyClickModels runs its optimization, it will consider all the context at once. This means that the Judgments obtained are also on the whole context setting. 69 | 70 | If no context is desired, just use `{"search_keys": {"search_term": "user search"}}`. 71 | 72 | There's no required schema here which means the library loops through all keys available in `search_keys` and builds the optimization process considering the whole context as a single query. 73 | 74 | As for the `judgment_keys`, this is a list of sessions. The key `session` is mandatory. Each session contains the clickstream of users (if the variable purchase is not required set it to 0). 75 | 76 | For running DBN from pyClickModels, here's a simple example: 77 | 78 | ```python 79 | from pyClickModels.DBN import DBN 80 | 81 | model = DBN() 82 | model.fit(input_folder="/tmp/clicks_data/", iters=10) 83 | model.export_judgments("/tmp/output.gz") 84 | ``` 85 | 86 | Output file will contain a NEWLINE JSON separated file with the judgments for each query and each document observed for that query, i.e.: 87 | 88 | ```json 89 | {"search_term:blue shoes|region:south|brand:super brand": {"doc0": 0.2, "doc1": 0.3, "doc2": 0.4}} 90 | {"search_term:query|region:north|brand:other_brand": {"doc0": 0.0, "doc1": 0.0, "doc2": 0.1}} 91 | ``` 92 | 93 | Judgments here varies between 0 and 1. Some libraries requires it to range between integers 0 and 4. Choose a proper transformation in this case that better suits your data. 94 | 95 | ## Warnings 96 | 97 | **This library is still alpha!** Use it with caution. It's been fully unittested but still parts of it uses pure C whose exceptions might not have been fully considered yet. It's recommended to, before using this library in production evironments, to fully test it with different datasets and sizes to evaluate how it performs. 98 | 99 | ## Contributing 100 | 101 | Contributions are very welcome! Also, if you find bugs, please report them :). 102 | -------------------------------------------------------------------------------- /notebooks/styles/matplotlibrc: -------------------------------------------------------------------------------- 1 | ### MATPLOTLIBRC FORMAT 2 | 3 | # This is a sample matplotlib configuration file - you can find a copy 4 | # of it on your system in 5 | # site-packages/matplotlib/mpl-data/matplotlibrc. If you edit it 6 | # there, please note that it will be overwritten in your next install. 7 | # If you want to keep a permanent local copy that will not be 8 | # overwritten, place it in HOME/.matplotlib/matplotlibrc (unix/linux 9 | # like systems) and C:\Documents and Settings\yourname\.matplotlib 10 | # (win32 systems). 11 | # 12 | # This file is best viewed in a editor which supports python mode 13 | # syntax highlighting. Blank lines, or lines starting with a comment 14 | # symbol, are ignored, as are trailing comments. Other lines must 15 | # have the format 16 | # key : val # optional comment 17 | # 18 | # Colors: for the color values below, you can either use - a 19 | # matplotlib color string, such as r, k, or b - an rgb tuple, such as 20 | # (1.0, 0.5, 0.0) - a hex string, such as ff00ff or #ff00ff - a scalar 21 | # grayscale intensity such as 0.75 - a legal html color name, eg red, 22 | # blue, darkslategray 23 | 24 | #### CONFIGURATION BEGINS HERE 25 | 26 | # the default backend; one of GTK GTKAgg GTKCairo GTK3Agg GTK3Cairo 27 | # CocoaAgg FltkAgg MacOSX QtAgg Qt4Agg TkAgg WX WXAgg Agg Cairo GDK PS 28 | # PDF SVG Template 29 | # You can also deploy your own backend outside of matplotlib by 30 | # referring to the module name (which must be in the PYTHONPATH) as 31 | # 'module://my_backend' 32 | backend : TkAgg 33 | 34 | # If you are using the Qt4Agg backend, you can choose here 35 | # to use the PyQt4 bindings or the newer PySide bindings to 36 | # the underlying Qt4 toolkit. 37 | #backend.qt4 : PyQt4 # PyQt4 | PySide 38 | 39 | # Note that this can be overridden by the environment variable 40 | # QT_API used by Enthought Tool Suite (ETS); valid values are 41 | # "pyqt" and "pyside". The "pyqt" setting has the side effect of 42 | # forcing the use of Version 2 API for QString and QVariant. 43 | 44 | # if you are running pyplot inside a GUI and your backend choice 45 | # conflicts, we will automatically try to find a compatible one for 46 | # you if backend_fallback is True 47 | #backend_fallback: True 48 | 49 | #interactive : False 50 | #toolbar : toolbar2 # None | toolbar2 ("classic" is deprecated) 51 | #timezone : UTC # a pytz timezone string, eg US/Central or Europe/Paris 52 | 53 | # Where your matplotlib data lives if you installed to a non-default 54 | # location. This is where the matplotlib fonts, bitmaps, etc reside 55 | #datapath : /home/jdhunter/mpldata 56 | 57 | 58 | ### LINES 59 | # See http://matplotlib.org/api/artist_api.html#module-matplotlib.lines for more 60 | # information on line properties. 61 | lines.linewidth : 2.0 # line width in points 62 | #lines.linestyle : - # solid line 63 | #lines.color : blue # has no affect on plot(); see axes.color_cycle 64 | #lines.marker : None # the default marker 65 | #lines.markeredgewidth : 0.5 # the line width around the marker symbol 66 | #lines.markersize : 6 # markersize, in points 67 | #lines.dash_joinstyle : miter # miter|round|bevel 68 | #lines.dash_capstyle : butt # butt|round|projecting 69 | #lines.solid_joinstyle : miter # miter|round|bevel 70 | #lines.solid_capstyle : projecting # butt|round|projecting 71 | #lines.antialiased : True # render lines in antialised (no jaggies) 72 | 73 | ### PATCHES 74 | # Patches are graphical objects that fill 2D space, like polygons or 75 | # circles. See 76 | # http://matplotlib.org/api/artist_api.html#module-matplotlib.patches 77 | # information on patch properties 78 | patch.linewidth : 0.5 # edge width in points 79 | patch.facecolor : blue 80 | patch.edgecolor : eeeeee 81 | patch.antialiased : True 82 | 83 | ### FONT 84 | # 85 | # font properties used by text.Text. See 86 | # http://matplotlib.org/api/font_manager_api.html for more 87 | # information on font properties. The 6 font properties used for font 88 | # matching are given below with their default values. 89 | # 90 | # The font.family property has five values: 'serif' (e.g. Times), 91 | # 'sans-serif' (e.g. Helvetica), 'cursive' (e.g. Zapf-Chancery), 92 | # 'fantasy' (e.g. Western), and 'monospace' (e.g. Courier). Each of 93 | # these font families has a default list of font names in decreasing 94 | # order of priority associated with them. 95 | # 96 | # The font.style property has three values: normal (or roman), italic 97 | # or oblique. The oblique style will be used for italic, if it is not 98 | # present. 99 | # 100 | # The font.variant property has two values: normal or small-caps. For 101 | # TrueType fonts, which are scalable fonts, small-caps is equivalent 102 | # to using a font size of 'smaller', or about 83% of the current font 103 | # size. 104 | # 105 | # The font.weight property has effectively 13 values: normal, bold, 106 | # bolder, lighter, 100, 200, 300, ..., 900. Normal is the same as 107 | # 400, and bold is 700. bolder and lighter are relative values with 108 | # respect to the current weight. 109 | # 110 | # The font.stretch property has 11 values: ultra-condensed, 111 | # extra-condensed, condensed, semi-condensed, normal, semi-expanded, 112 | # expanded, extra-expanded, ultra-expanded, wider, and narrower. This 113 | # property is not currently implemented. 114 | # 115 | # The font.size property is the default font size for text, given in pts. 116 | # 12pt is the standard value. 117 | # 118 | #font.family : monospace 119 | #font.style : normal 120 | #font.variant : normal 121 | #font.weight : medium 122 | #font.stretch : normal 123 | # note that font.size controls default text sizes. To configure 124 | # special text sizes tick labels, axes, labels, title, etc, see the rc 125 | # settings for axes and ticks. Special text sizes can be defined 126 | # relative to font.size, using the following values: xx-small, x-small, 127 | # small, medium, large, x-large, xx-large, larger, or smaller 128 | #font.size : 12.0 129 | #font.serif : Bitstream Vera Serif, New Century Schoolbook, Century Schoolbook L, Utopia, ITC Bookman, Bookman, Nimbus Roman No9 L, Times New Roman, Times, Palatino, Charter, serif 130 | #font.sans-serif : Bitstream Vera Sans, Lucida Grande, Verdana, Geneva, Lucid, Arial, Helvetica, Avant Garde, sans-serif 131 | #font.cursive : Apple Chancery, Textile, Zapf Chancery, Sand, cursive 132 | #font.fantasy : Comic Sans MS, Chicago, Charcoal, Impact, Western, fantasy 133 | #font.monospace : Andale Mono, Nimbus Mono L, Courier New, Courier, Fixed, Terminal, monospace 134 | 135 | 136 | ### TEXT 137 | # text properties used by text.Text. See 138 | # http://matplotlib.org/api/artist_api.html#module-matplotlib.text for more 139 | # information on text properties 140 | 141 | #text.color : black 142 | 143 | ### LaTeX customizations. See http://www.scipy.org/Wiki/Cookbook/Matplotlib/UsingTex 144 | #text.usetex : False # use latex for all text handling. The following fonts 145 | # are supported through the usual rc parameter settings: 146 | # new century schoolbook, bookman, times, palatino, 147 | # zapf chancery, charter, serif, sans-serif, helvetica, 148 | # avant garde, courier, monospace, computer modern roman, 149 | # computer modern sans serif, computer modern typewriter 150 | # If another font is desired which can loaded using the 151 | # LaTeX \usepackage command, please inquire at the 152 | # matplotlib mailing list 153 | #text.latex.unicode : False # use "ucs" and "inputenc" LaTeX packages for handling 154 | # unicode strings. 155 | #text.latex.preamble : # IMPROPER USE OF THIS FEATURE WILL LEAD TO LATEX FAILURES 156 | # AND IS THEREFORE UNSUPPORTED. PLEASE DO NOT ASK FOR HELP 157 | # IF THIS FEATURE DOES NOT DO WHAT YOU EXPECT IT TO. 158 | # preamble is a comma separated list of LaTeX statements 159 | # that are included in the LaTeX document preamble. 160 | # An example: 161 | # text.latex.preamble : \usepackage{bm},\usepackage{euler} 162 | # The following packages are always loaded with usetex, so 163 | # beware of package collisions: color, geometry, graphicx, 164 | # type1cm, textcomp. Adobe Postscript (PSSNFS) font packages 165 | # may also be loaded, depending on your font settings 166 | 167 | #text.dvipnghack : None # some versions of dvipng don't handle alpha 168 | # channel properly. Use True to correct 169 | # and flush ~/.matplotlib/tex.cache 170 | # before testing and False to force 171 | # correction off. None will try and 172 | # guess based on your dvipng version 173 | 174 | #text.hinting : 'auto' # May be one of the following: 175 | # 'none': Perform no hinting 176 | # 'auto': Use freetype's autohinter 177 | # 'native': Use the hinting information in the 178 | # font file, if available, and if your 179 | # freetype library supports it 180 | # 'either': Use the native hinting information, 181 | # or the autohinter if none is available. 182 | # For backward compatibility, this value may also be 183 | # True === 'auto' or False === 'none'. 184 | text.hinting_factor : 8 # Specifies the amount of softness for hinting in the 185 | # horizontal direction. A value of 1 will hint to full 186 | # pixels. A value of 2 will hint to half pixels etc. 187 | 188 | #text.antialiased : True # If True (default), the text will be antialiased. 189 | # This only affects the Agg backend. 190 | 191 | # The following settings allow you to select the fonts in math mode. 192 | # They map from a TeX font name to a fontconfig font pattern. 193 | # These settings are only used if mathtext.fontset is 'custom'. 194 | # Note that this "custom" mode is unsupported and may go away in the 195 | # future. 196 | #mathtext.cal : cursive 197 | #mathtext.rm : serif 198 | #mathtext.tt : monospace 199 | #mathtext.it : serif:italic 200 | #mathtext.bf : serif:bold 201 | #mathtext.sf : sans 202 | mathtext.fontset : cm # Should be 'cm' (Computer Modern), 'stix', 203 | # 'stixsans' or 'custom' 204 | #mathtext.fallback_to_cm : True # When True, use symbols from the Computer Modern 205 | # fonts when a symbol can not be found in one of 206 | # the custom math fonts. 207 | 208 | #mathtext.default : it # The default font to use for math. 209 | # Can be any of the LaTeX font names, including 210 | # the special name "regular" for the same font 211 | # used in regular text. 212 | 213 | ### AXES 214 | # default face and edge color, default tick sizes, 215 | # default fontsizes for ticklabels, and so on. See 216 | # http://matplotlib.org/api/axes_api.html#module-matplotlib.axes 217 | #axes.hold : True # whether to clear the axes by default on 218 | axes.facecolor : eeeeee # axes background color 219 | axes.edgecolor : bcbcbc # axes edge color 220 | #axes.linewidth : 1.0 # edge linewidth 221 | axes.grid : True # display grid or not 222 | axes.titlesize : x-large # fontsize of the axes title 223 | axes.labelsize : large # fontsize of the x any y labels 224 | #axes.labelweight : normal # weight of the x and y labels 225 | #axes.labelcolor : black 226 | #axes.axisbelow : False # whether axis gridlines and ticks are below 227 | # the axes elements (lines, text, etc) 228 | #axes.formatter.limits : -7, 7 # use scientific notation if log10 229 | # of the axis range is smaller than the 230 | # first or larger than the second 231 | #axes.formatter.use_locale : False # When True, format tick labels 232 | # according to the user's locale. 233 | # For example, use ',' as a decimal 234 | # separator in the fr_FR locale. 235 | #axes.formatter.use_mathtext : False # When True, use mathtext for scientific 236 | # notation. 237 | #axes.unicode_minus : True # use unicode for the minus symbol 238 | # rather than hyphen. See 239 | # http://en.wikipedia.org/wiki/Plus_and_minus_signs#Character_codes 240 | axes.color_cycle : 348ABD, A60628, 7A68A6, 467821,D55E00, CC79A7, 56B4E9, 009E73, F0E442, 0072B2 # color cycle for plot lines 241 | # as list of string colorspecs: 242 | # single letter, long name, or 243 | # web-style hex 244 | 245 | #polaraxes.grid : True # display grid on polar axes 246 | #axes3d.grid : True # display grid on 3d axes 247 | 248 | ### TICKS 249 | # see http://matplotlib.org/api/axis_api.html#matplotlib.axis.Tick 250 | #xtick.major.size : 4 # major tick size in points 251 | #xtick.minor.size : 2 # minor tick size in points 252 | #xtick.major.width : 0.5 # major tick width in points 253 | #xtick.minor.width : 0.5 # minor tick width in points 254 | #xtick.major.pad : 4 # distance to major tick label in points 255 | #xtick.minor.pad : 4 # distance to the minor tick label in points 256 | #xtick.color : k # color of the tick labels 257 | #xtick.labelsize : medium # fontsize of the tick labels 258 | #xtick.direction : in # direction: in, out, or inout 259 | 260 | #ytick.major.size : 4 # major tick size in points 261 | #ytick.minor.size : 2 # minor tick size in points 262 | #ytick.major.width : 0.5 # major tick width in points 263 | #ytick.minor.width : 0.5 # minor tick width in points 264 | #ytick.major.pad : 4 # distance to major tick label in points 265 | #ytick.minor.pad : 4 # distance to the minor tick label in points 266 | #ytick.color : k # color of the tick labels 267 | #ytick.labelsize : medium # fontsize of the tick labels 268 | #ytick.direction : in # direction: in, out, or inout 269 | 270 | 271 | ### GRIDS 272 | #grid.color : black # grid color 273 | #grid.linestyle : : # dotted 274 | #grid.linewidth : 0.5 # in points 275 | #grid.alpha : 1.0 # transparency, between 0.0 and 1.0 276 | 277 | ### Legend 278 | legend.fancybox : True # if True, use a rounded box for the 279 | # legend, else a rectangle 280 | #legend.isaxes : True 281 | #legend.numpoints : 2 # the number of points in the legend line 282 | #legend.fontsize : large 283 | #legend.pad : 0.0 # deprecated; the fractional whitespace inside the legend border 284 | #legend.borderpad : 0.5 # border whitespace in fontsize units 285 | #legend.markerscale : 1.0 # the relative size of legend markers vs. original 286 | # the following dimensions are in axes coords 287 | #legend.labelsep : 0.010 # deprecated; the vertical space between the legend entries 288 | #legend.labelspacing : 0.5 # the vertical space between the legend entries in fraction of fontsize 289 | #legend.handlelen : 0.05 # deprecated; the length of the legend lines 290 | #legend.handlelength : 2. # the length of the legend lines in fraction of fontsize 291 | #legend.handleheight : 0.7 # the height of the legend handle in fraction of fontsize 292 | #legend.handletextsep : 0.02 # deprecated; the space between the legend line and legend text 293 | #legend.handletextpad : 0.8 # the space between the legend line and legend text in fraction of fontsize 294 | #legend.axespad : 0.02 # deprecated; the border between the axes and legend edge 295 | #legend.borderaxespad : 0.5 # the border between the axes and legend edge in fraction of fontsize 296 | #legend.columnspacing : 2. # the border between the axes and legend edge in fraction of fontsize 297 | #legend.shadow : False 298 | #legend.frameon : True # whether or not to draw a frame around legend 299 | 300 | ### FIGURE 301 | # See http://matplotlib.org/api/figure_api.html#matplotlib.figure.Figure 302 | figure.figsize : 11, 8 # figure size in inches 303 | figure.dpi : 100 # figure dots per inch 304 | #figure.facecolor : 0.75 # figure facecolor; 0.75 is scalar gray 305 | #figure.edgecolor : white # figure edgecolor 306 | #figure.autolayout : False # When True, automatically adjust subplot 307 | # parameters to make the plot fit the figure 308 | 309 | # The figure subplot parameters. All dimensions are a fraction of the 310 | # figure width or height 311 | #figure.subplot.left : 0.125 # the left side of the subplots of the figure 312 | #figure.subplot.right : 0.9 # the right side of the subplots of the figure 313 | #figure.subplot.bottom : 0.1 # the bottom of the subplots of the figure 314 | #figure.subplot.top : 0.9 # the top of the subplots of the figure 315 | #figure.subplot.wspace : 0.2 # the amount of width reserved for blank space between subplots 316 | #figure.subplot.hspace : 0.2 # the amount of height reserved for white space between subplots 317 | 318 | ### IMAGES 319 | #image.aspect : equal # equal | auto | a number 320 | #image.interpolation : bilinear # see help(imshow) for options 321 | #image.cmap : jet # gray | jet etc... 322 | #image.lut : 256 # the size of the colormap lookup table 323 | #image.origin : upper # lower | upper 324 | #image.resample : False 325 | 326 | ### CONTOUR PLOTS 327 | #contour.negative_linestyle : dashed # dashed | solid 328 | 329 | ### Agg rendering 330 | ### Warning: experimental, 2008/10/10 331 | #agg.path.chunksize : 0 # 0 to disable; values in the range 332 | # 10000 to 100000 can improve speed slightly 333 | # and prevent an Agg rendering failure 334 | # when plotting very large data sets, 335 | # especially if they are very gappy. 336 | # It may cause minor artifacts, though. 337 | # A value of 20000 is probably a good 338 | # starting point. 339 | ### SAVING FIGURES 340 | #path.simplify : True # When True, simplify paths by removing "invisible" 341 | # points to reduce file size and increase rendering 342 | # speed 343 | #path.simplify_threshold : 0.1 # The threshold of similarity below which 344 | # vertices will be removed in the simplification 345 | # process 346 | #path.snap : True # When True, rectilinear axis-aligned paths will be snapped to 347 | # the nearest pixel when certain criteria are met. When False, 348 | # paths will never be snapped. 349 | 350 | # the default savefig params can be different from the display params 351 | # Eg, you may want a higher resolution, or to make the figure 352 | # background white 353 | savefig.dpi : 300 # figure dots per inch 354 | #savefig.facecolor : white # figure facecolor when saving 355 | #savefig.edgecolor : white # figure edgecolor when saving 356 | #savefig.format : png # png, ps, pdf, svg 357 | #savefig.bbox : standard # 'tight' or 'standard'. 358 | #savefig.pad_inches : 0.1 # Padding to be used when bbox is set to 'tight' 359 | 360 | # tk backend params 361 | #tk.window_focus : False # Maintain shell focus for TkAgg 362 | 363 | # ps backend params 364 | #ps.papersize : letter # auto, letter, legal, ledger, A0-A10, B0-B10 365 | #ps.useafm : False # use of afm fonts, results in small files 366 | #ps.usedistiller : False # can be: None, ghostscript or xpdf 367 | # Experimental: may produce smaller files. 368 | # xpdf intended for production of publication quality files, 369 | # but requires ghostscript, xpdf and ps2eps 370 | #ps.distiller.res : 6000 # dpi 371 | #ps.fonttype : 3 # Output Type 3 (Type3) or Type 42 (TrueType) 372 | 373 | # pdf backend params 374 | #pdf.compression : 6 # integer from 0 to 9 375 | # 0 disables compression (good for debugging) 376 | #pdf.fonttype : 3 # Output Type 3 (Type3) or Type 42 (TrueType) 377 | 378 | # svg backend params 379 | #svg.image_inline : True # write raster image data directly into the svg file 380 | #svg.image_noscale : False # suppress scaling of raster data embedded in SVG 381 | #svg.fonttype : 'path' # How to handle SVG fonts: 382 | # 'none': Assume fonts are installed on the machine where the SVG will be viewed. 383 | # 'path': Embed characters as paths -- supported by most SVG renderers 384 | # 'svgfont': Embed characters as SVG fonts -- supported only by Chrome, 385 | # Opera and Safari 386 | 387 | # docstring params 388 | #docstring.hardcopy = False # set this when you want to generate hardcopy docstring 389 | 390 | # Set the verbose flags. This controls how much information 391 | # matplotlib gives you at runtime and where it goes. The verbosity 392 | # levels are: silent, helpful, debug, debug-annoying. Any level is 393 | # inclusive of all the levels below it. If your setting is "debug", 394 | # you'll get all the debug and helpful messages. When submitting 395 | # problems to the mailing-list, please set verbose to "helpful" or "debug" 396 | # and paste the output into your report. 397 | # 398 | # The "fileo" gives the destination for any calls to verbose.report. 399 | # These objects can a filename, or a filehandle like sys.stdout. 400 | # 401 | # You can override the rc default verbosity from the command line by 402 | # giving the flags --verbose-LEVEL where LEVEL is one of the legal 403 | # levels, eg --verbose-helpful. 404 | # 405 | # You can access the verbose instance in your code 406 | # from matplotlib import verbose. 407 | #verbose.level : silent # one of silent, helpful, debug, debug-annoying 408 | #verbose.fileo : sys.stdout # a log filename, sys.stdout or sys.stderr 409 | 410 | # Event keys to interact with figures/plots via keyboard. 411 | # Customize these settings according to your needs. 412 | # Leave the field(s) empty if you don't need a key-map. (i.e., fullscreen : '') 413 | 414 | #keymap.fullscreen : f # toggling 415 | #keymap.home : h, r, home # home or reset mnemonic 416 | #keymap.back : left, c, backspace # forward / backward keys to enable 417 | #keymap.forward : right, v # left handed quick navigation 418 | #keymap.pan : p # pan mnemonic 419 | #keymap.zoom : o # zoom mnemonic 420 | #keymap.save : s # saving current figure 421 | #keymap.quit : ctrl+w # close the current figure 422 | #keymap.grid : g # switching on/off a grid in current axes 423 | #keymap.yscale : l # toggle scaling of y-axes ('log'/'linear') 424 | #keymap.xscale : L, k # toggle scaling of x-axes ('log'/'linear') 425 | #keymap.all_axes : a # enable all axes 426 | 427 | ###ANIMATION settings 428 | #animation.writer : ffmpeg # MovieWriter 'backend' to use 429 | #animation.codec : mp4 # Codec to use for writing movie 430 | #animation.bitrate: -1 # Controls size/quality tradeoff for movie. 431 | # -1 implies let utility auto-determine 432 | #animation.frame_format: 'png' # Controls frame format used by temp files 433 | #animation.ffmpeg_path: 'ffmpeg' # Path to ffmpeg binary. Without full path 434 | # $PATH is searched 435 | #animation.ffmpeg_args: '' # Additional arugments to pass to mencoder 436 | #animation.mencoder_path: 'ffmpeg' # Path to mencoder binary. Without full path 437 | # $PATH is searched 438 | #animation.mencoder_args: '' # Additional arugments to pass to mencoder -------------------------------------------------------------------------------- /pyClickModels/DBN.pyx: -------------------------------------------------------------------------------- 1 | # cython: linetrace=True 2 | 3 | import gzip 4 | import os 5 | import time 6 | from glob import glob 7 | 8 | import ujson 9 | 10 | from cython.operator cimport dereference, postincrement 11 | from libc.stdlib cimport RAND_MAX, rand, srand 12 | from libc.time cimport time as ctime 13 | from libcpp.string cimport string 14 | from libcpp.unordered_map cimport unordered_map 15 | from libcpp.vector cimport vector 16 | 17 | from pyClickModels.jsonc cimport (json_object, json_object_array_get_idx, 18 | json_object_array_length, 19 | json_object_get_int, json_object_get_string, 20 | json_object_object_get_ex, json_object_put, 21 | json_tokener_parse, lh_entry, lh_table) 22 | 23 | # Start by setting the seed for the random values required for initalizing the DBN 24 | # parameters. 25 | SEED = ctime(NULL) 26 | srand(SEED) 27 | 28 | 29 | cdef class Factor: 30 | """ 31 | Helper class to implement the Factor component as discussed in: 32 | 33 | https://clickmodels.weebly.com/uploads/5/2/2/5/52257029/mc2015-clickmodels.pdf 34 | 35 | page 37 equation 4.43 36 | 37 | Args 38 | ---- 39 | r: int 40 | Rank position in search results. 41 | last_r: int 42 | Last observed click or purchase from search results. 43 | click: bint 44 | purchase: bint 45 | alpha: float 46 | Updated values of alpha. 47 | sigma: float 48 | Updated values of sigma. 49 | gamma: float 50 | Updated value of gamma 51 | cr: float 52 | Conversion Rate of current document in session. 53 | vector[float] e_r_vector_given_CP* 54 | Probability that document at position r was examined (E_r=1) given clicks 55 | and purchases. 56 | vector[float] cp_vector_given_e* 57 | Probability of observing Clicks and Purchases at positions greater than 58 | r given that position r + 1 was examined. 59 | """ 60 | # Use cinit instead of __cinit__ so to send pointers as input. 61 | cdef cinit( 62 | self, 63 | unsigned int r, 64 | unsigned int last_r, 65 | bint click, 66 | bint purchase, 67 | float alpha, 68 | float sigma, 69 | float gamma, 70 | float cr, 71 | vector[float] *e_r_vector_given_CP, 72 | vector[float] *cp_vector_given_e 73 | ): 74 | self.r = r 75 | self.last_r = last_r 76 | self.alpha = alpha 77 | self.sigma = sigma 78 | self.gamma = gamma 79 | self.click = click 80 | self.purchase = purchase 81 | self.cr = cr 82 | self.e_r_vector_given_CP = e_r_vector_given_CP 83 | self.cp_vector_given_e = cp_vector_given_e 84 | 85 | cdef float compute_factor(self, bint x, bint y, bint z): 86 | """ 87 | Responsible for computing the following equation: 88 | 89 | P(E_r = x, S_r = y, E_{r+1} = z, C_{>=r+1}, P_{>=r+1} | C_{r},P_{>r} | E_{r+1}) 131 | if not z: 132 | if self.last_r >= self.r + 1: 133 | return 0 134 | else: 135 | if self.r < self.cp_vector_given_e[0].size(): 136 | result *= self.cp_vector_given_e[0][self.r] 137 | # P(E_r=x | Crand() / RAND_MAX 173 | return &self.gamma_param 174 | elif param == b'alpha': 175 | tmp = &self.alpha_params 176 | else: 177 | # param = b'sigma': 178 | tmp = &self.sigma_params 179 | 180 | # query not in map 181 | if tmp[0].find(query[0]) == tmp[0].end(): 182 | # using c rand function as it's ~ 15 - 30 times faster than Python's random 183 | tmp[0][query[0]][doc[0]] = rand() / RAND_MAX 184 | # query is in map but document is not 185 | elif tmp[0][query[0]].find(doc[0]) == tmp[0][query[0]].end(): 186 | tmp[0][query[0]][doc[0]] = rand() / RAND_MAX 187 | 188 | return &tmp[0][query[0]][doc[0]] 189 | 190 | cdef string get_search_context_string(self, lh_table *tbl): 191 | """ 192 | In pyClickModels, the input data can contain not only the search the user 193 | inserted but also more information that describes the context of the search, 194 | such as the region of user, their favorite brands or average purchasing price 195 | and so on. 196 | 197 | The computation of Judgments happens, therefore, not only on top of the search 198 | term but also on the context at which the search was made. 199 | 200 | This method combines all those keys together so the optimization happens on 201 | a single string as the final query. 202 | 203 | Args 204 | ---- 205 | search_keys: lh_table 206 | Context at which search happened, expressed in JSON. Example: 207 | `{"search_term": "query", "region": "northeast", "avg_ticket": 20}` 208 | 209 | Returns 210 | ------- 211 | final_query: str 212 | string with sorted values joined by the `_` character. 213 | """ 214 | cdef: 215 | string result 216 | char *k 217 | json_object *v 218 | lh_entry *entry = tbl.head 219 | 220 | k = entry.k 221 | v = entry.v 222 | # CPython now optimizes `+` operations. It's expected Cython will have the same 223 | # compilation rules. 224 | result = string(k) + string(b':') + string(json_object_get_string(v)) 225 | 226 | entry = entry.next 227 | while entry: 228 | k = entry.k 229 | v = entry.v 230 | # Stores keys and values separated by ":" and then by "|". This is done so 231 | # there's a base vale for the input query as expressed by its complete 232 | # context (context here means possible keys that discriminate the search 233 | # such as the region of user, favorite brand, average ticket and so on. 234 | result = ( 235 | result + string(b'|') + string(k) + string(b':') + 236 | string(json_object_get_string(v)) 237 | ) 238 | entry = entry.next 239 | return result 240 | 241 | cdef void compute_cr(self, string *query, json_object *sessions, 242 | unordered_map[string, unordered_map[string, float]] *cr_dict): 243 | """ 244 | pyClickModels can also consider data related to purchases events. This method 245 | computes the conversion rate (cr) that each document had on each observed 246 | query context. 247 | 248 | Args 249 | ---- 250 | query: *string 251 | sessions: *json_object 252 | List of session ids where each session contains all documents a given user 253 | interacted with along with clicks and purchases 254 | cr_dict: unordered_map[string, float]] 255 | Map of documents and their respective conversion rates for each specific 256 | query. 257 | """ 258 | # If query is already available on cr_dict then it's not required to be 259 | # processed again. 260 | if cr_dict[0].find(query[0]) != cr_dict[0].end(): 261 | return 262 | 263 | cdef: 264 | size_t nsessions = json_object_array_length(sessions) 265 | size_t nclicks 266 | json_object *jso_session 267 | json_object *clickstream 268 | json_object *doc_data 269 | json_object *tmp_jso 270 | string doc 271 | bint click 272 | bint purchase 273 | unsigned int i 274 | unsigned int j 275 | vector[int] vec 276 | unordered_map[string, vector[int]] tmp_cr 277 | unordered_map[string, vector[int]].iterator it 278 | float cr 279 | 280 | for i in range(nsessions): 281 | jso_session = json_object_array_get_idx(sessions, i) 282 | json_object_object_get_ex(jso_session, b'session', &clickstream) 283 | 284 | nclicks = json_object_array_length(clickstream) 285 | 286 | for j in range(nclicks): 287 | doc_data = json_object_array_get_idx(clickstream, j) 288 | 289 | json_object_object_get_ex(doc_data, b'doc', &tmp_jso) 290 | doc = json_object_get_string(tmp_jso) 291 | 292 | json_object_object_get_ex(doc_data, b'click', &tmp_jso) 293 | click = json_object_get_int(tmp_jso) 294 | 295 | json_object_object_get_ex(doc_data, b'purchase', &tmp_jso) 296 | purchase = json_object_get_int(tmp_jso) 297 | 298 | # First time seeing the document. Prepare a mapping to store total 299 | # purchases and total times the document appeared on a given query 300 | # across all sessions. 301 | if tmp_cr.find(doc) == tmp_cr.end(): 302 | tmp_cr[doc] = vector[int](2) 303 | tmp_cr[doc][0] = 0 304 | tmp_cr[doc][1] = 0 305 | 306 | if purchase: 307 | tmp_cr[doc][0] += 1 308 | 309 | tmp_cr[doc][1] += 1 310 | 311 | it = tmp_cr.begin() 312 | while(it != tmp_cr.end()): 313 | cr = dereference(it).second[0] / dereference(it).second[1] 314 | cr_dict[0][query[0]][dereference(it).first] = cr 315 | postincrement(it) 316 | 317 | cdef vector[float] build_e_r_vector( 318 | self, 319 | json_object *clickstream, 320 | string *query, 321 | unordered_map[string, float] *cr_dict, 322 | ): 323 | """ 324 | Computes the probability of each document in user session being examined. 325 | 326 | The equation implemented is: 327 | 328 | $P(E_{r+1}=1) = \\epsilon_r \\gamma \\left((1 - \\alpha_{uq}) + 329 | (1 - \\sigma_{uq})(1 - cr_{uq})\\alpha_{uq} \\right)$ 330 | 331 | Args 332 | ---- 333 | clickstream: json_object * 334 | JSON obect representing the user clickstream. Example: 335 | [ 336 | {"doc": "doc0", "click": 0, "purchase": 0}, 337 | {"doc": "doc1", "click": 1, "purchase": 0} 338 | ] 339 | query: string 340 | cr_dict: unordered_map[string, float] * 341 | Conversion rates of each document for a given query. Example: 342 | {"doc0": 0.2, "doc1": 0.51} 343 | 344 | Returns 345 | ------- 346 | e_r_vector: vector[float] 347 | vector to receive final probabilities 348 | """ 349 | cdef: 350 | size_t total_docs = json_object_array_length(clickstream) 351 | string doc 352 | unsigned int r 353 | json_object *tmp 354 | float *alpha 355 | float *beta 356 | float *gamma 357 | float cr 358 | float e_r_next 359 | # Add +1 to total_docs to compute P(E_{r+1}) 360 | vector[float] e_r_vector = vector[float](total_docs + 1) 361 | 362 | # Probability of Examination at r=0 (first document in search page results) 363 | # is always 100% 364 | e_r_vector[0] = 1 365 | 366 | # Compute P(E_{r+1}) so add +1 to the total docs 367 | for r in range(1, total_docs + 1): 368 | json_object_object_get_ex( 369 | json_object_array_get_idx(clickstream, r - 1), 370 | b'doc', 371 | &tmp 372 | ) 373 | doc = json_object_get_string(tmp) 374 | alpha = self.get_param(b'alpha', query, &doc) 375 | sigma = self.get_param(b'sigma', query, &doc) 376 | gamma = self.get_param(b'gamma') 377 | cr = dereference(cr_dict)[doc] 378 | 379 | e_r_next = (e_r_vector[r - 1] * gamma[0] * ((1 - sigma[0]) * (1 - cr) * 380 | alpha[0] + (1 - alpha[0]))) 381 | e_r_vector[r] = e_r_next 382 | return e_r_vector 383 | 384 | cdef vector[float] build_X_r_vector(self, json_object *clickstream, string *query): 385 | """ 386 | X_r is given by P(C_{\\geq r} \\mid E_r=1). It extends for the probability of 387 | click on any rank starting from current until last one. This vector is also 388 | used in the EM optimization process. 389 | 390 | The probability of click after the very last sku is considered zero. This 391 | allows to build the `X_r` vector recursively. 392 | 393 | The equation is: 394 | 395 | X{_r} = P(C_{\\geq r} \\mid E_r=1) &= 396 | &= \\alpha_{uq} + (1 - \\alpha_{uq})\\gamma X_{r+1} 397 | 398 | Args 399 | ---- 400 | clickstream: *json_object 401 | Session clickstream (clicks and purchases) 402 | query: *string 403 | """ 404 | cdef: 405 | size_t total_docs = json_object_array_length(clickstream) 406 | unsigned int r 407 | string doc 408 | # Add one to the length because of the zero value added for position 409 | # N + 1 where N is the amount of documents returned in the search page. 410 | vector[float] X_r_vector = vector[float](total_docs + 1) 411 | json_object *tmp 412 | float X_r_1 413 | float X_r 414 | float *alpha 415 | float *beta 416 | float *gamma 417 | 418 | # Probability of clicks at positions greater than the last document in results 419 | # page is zero. 420 | X_r_vector[total_docs] = 0 421 | gamma = self.get_param(b'gamma') 422 | 423 | for r in range(total_docs - 1, -1, -1): 424 | json_object_object_get_ex( 425 | json_object_array_get_idx(clickstream, r), 426 | b'doc', 427 | &tmp 428 | ) 429 | doc = json_object_get_string(tmp) 430 | alpha = self.get_param(b'alpha', query, &doc) 431 | 432 | X_r_1 = X_r_vector[r + 1] 433 | X_r = alpha[0] + (1 - alpha[0]) * gamma[0] * X_r_1 434 | X_r_vector[r] = X_r 435 | return X_r_vector 436 | 437 | cdef vector[float] build_e_r_vector_given_CP(self, json_object *clickstream, 438 | unsigned int idx, string *query): 439 | """ 440 | Computes the probability that a given document was examined given the array of 441 | previous clicks and purchases. 442 | 443 | Mathematically: P(E_r = 1 | C_{json_object_get_int(tmp) 495 | 496 | json_object_object_get_ex( 497 | json_object_array_get_idx(clickstream, r), 498 | b'purchase', 499 | &tmp 500 | ) 501 | purchase = json_object_get_int(tmp) 502 | 503 | alpha = self.get_param(b'alpha', query, &doc) 504 | sigma = self.get_param(b'sigma', query, &doc) 505 | 506 | if purchase: 507 | return e_r_vector_given_CP 508 | elif click: 509 | e_r_vector_given_CP[r + 1 - idx] = (1 - sigma[0]) * gamma[0] 510 | else: 511 | e_r_vector_given_CP[r + 1 - idx] = ( 512 | (gamma[0] * (1 - alpha[0]) * e_r_vector_given_CP[r - idx]) / 513 | (1 - alpha[0] * e_r_vector_given_CP[r - idx]) 514 | ) 515 | return e_r_vector_given_CP 516 | 517 | cdef float compute_cp_p( 518 | self, 519 | json_object *clickstream, 520 | unsigned int idx, 521 | string *query, 522 | vector[float] *e_r_array_given_CP, 523 | unordered_map[string, float] *cr_dict 524 | ): 525 | """ 526 | Helper function that computes the probability of observing Clicks and Purchases 527 | at positions greater than r given that position r + 1 was examined. 528 | 529 | Mathematically: 530 | 531 | P(C_{>= r+1}, P_{>= r+1} | E_{r+1}) 532 | 533 | Args 534 | ---- 535 | session: *json_object 536 | Customer's clickstream. 537 | idx: unsigned int 538 | Index from where to start slicing json session 539 | query: *string 540 | cr_dict: unordered_map[string, float] *cr_dict 541 | Conversion Rate (CR) of documents for current query 542 | e_r_array_given_CP: vector[float] 543 | Probability of document being examined at position r given Clicks and 544 | Purchases observed before r. 545 | 546 | Returns 547 | ------- 548 | cp_p: float 549 | Computes the probability of observing Clicks and Purchases at positions 550 | greater than r given that r + 1 was examined. 551 | """ 552 | cdef: 553 | size_t total_docs = json_object_array_length(clickstream) 554 | unsigned int r 555 | string doc 556 | float *alpha 557 | bint click 558 | bint purchase 559 | json_object *tmp 560 | float cp_p = 1 561 | 562 | for r in range(idx, total_docs): 563 | json_object_object_get_ex( 564 | json_object_array_get_idx(clickstream, r), 565 | b'doc', 566 | &tmp 567 | ) 568 | doc = json_object_get_string(tmp) 569 | 570 | json_object_object_get_ex( 571 | json_object_array_get_idx(clickstream, r), 572 | b'click', 573 | &tmp 574 | ) 575 | click = json_object_get_int(tmp) 576 | 577 | json_object_object_get_ex( 578 | json_object_array_get_idx(clickstream, r), 579 | b'purchase', 580 | &tmp 581 | ) 582 | purchase = json_object_get_int(tmp) 583 | 584 | alpha = self.get_param(b'alpha', query, &doc) 585 | 586 | # Subtract `idx` from `r` because the input `e_r_array_given_CP` 587 | # should always be counted from the beginning (despite the slicing in 588 | # sessions, this variable should still be counted as if the new session 589 | # is not a slice of any sort). 590 | if purchase: 591 | cp_p *= cr_dict[0][doc] * alpha[0] * e_r_array_given_CP[0][r - idx] 592 | elif click: 593 | cp_p *= ( 594 | (1 - cr_dict[0][doc]) * alpha[0] * e_r_array_given_CP[0][r - idx] 595 | ) 596 | else: 597 | cp_p *= 1 - alpha[0] * e_r_array_given_CP[0][r - idx] 598 | return cp_p 599 | 600 | cdef vector[float] build_CP_vector_given_e( 601 | self, 602 | json_object *clickstream, 603 | string *query, 604 | unordered_map[string, float] *cr_dict 605 | ): 606 | """ 607 | Computes the probability that Clicks and Purchases will be observed at positions 608 | greater than r given that position at r+1 was examined. 609 | 610 | Mathematically: 611 | 612 | P(C_{>r}, P_{>r} | E_{r+1}) 613 | 614 | This is equation (25) from blog post: 615 | 616 | https://towardsdatascience.com/how-to-extract-relevance-from-clickstream-data-2a870df219fb 617 | 618 | Args 619 | ---- 620 | clickstream: *json_object 621 | User clickstream 622 | query: *string 623 | cr_dict: *unordered_map[string, float] 624 | Conversion Rate (CR) of documents for current query 625 | 626 | Returns 627 | ------- 628 | cp_vector_given_e: vector[float] 629 | Probability of observing Clicks and Purchases at positions greater than 630 | r given that position r + 1 was examined. 631 | """ 632 | cdef: 633 | unsigned int r 634 | size_t total_docs = json_object_array_length(clickstream) 635 | vector[float] e_r_vector_given_CP 636 | vector[float] cp_vector_given_e = vector[float](total_docs - 1) 637 | 638 | # Subtract 1 as E_{r+1} is defined up to r - 1 documents 639 | for r in range(total_docs - 1): 640 | e_r_vector_given_CP = self.build_e_r_vector_given_CP(clickstream, r + 1, 641 | query) 642 | cp_vector_given_e[r] = self.compute_cp_p(clickstream, r + 1, query, 643 | &e_r_vector_given_CP, cr_dict) 644 | return cp_vector_given_e 645 | 646 | cdef int get_last_r(self, json_object *clickstream, const char *event=b'click'): 647 | """ 648 | Loops through all documents in session and find at which position the desired 649 | event happend. It can be either a 'click' or a 'purchase' (still, in DBN, if 650 | a purchase is observed then it automatically means it is the very last r 651 | observed). 652 | 653 | Args 654 | ---- 655 | session: *json_object 656 | User clickstream 657 | event: const char* 658 | Name of desired event to track. 659 | 660 | Returns 661 | ------- 662 | last_r: int 663 | Index at which the last desired event was observed. 664 | """ 665 | cdef: 666 | unsigned int r 667 | size_t total_docs = json_object_array_length(clickstream) 668 | unsigned int idx = 0 669 | json_object *tmp 670 | bint value 671 | 672 | for r in range(total_docs): 673 | json_object_object_get_ex( 674 | json_object_array_get_idx(clickstream, r), 675 | event, 676 | &tmp 677 | ) 678 | value = json_object_get_int(tmp) 679 | if value: 680 | idx = r 681 | return idx 682 | 683 | cdef void update_tmp_alpha( 684 | self, 685 | int r, 686 | string *query, 687 | json_object *doc_data, 688 | vector[float] *e_r_vector, 689 | vector[float] *X_r_vector, 690 | int last_r, 691 | unordered_map[string, vector[float]] *tmp_alpha_param 692 | ): 693 | """ 694 | Updates the parameter alpha (attractiveness) by running the EM Algorithm. 695 | 696 | The equation for updating alpha is: 697 | 698 | \\alpha_{uq}^{(t+1)} = \\frac{\\sum_{s \\in S_{uq}}\\left(c_r^{(s)} + 699 | \\left(1 - c_r^{(s)}\\right)\\left(1 - c_{>r}^{(s)}\\right) \\cdot 700 | \\frac{\\left(1 - \\epsilon_r^{(t)}\\right)\\alpha_{uq}^{(t)}}{\\left(1 - 701 | \\epsilon_r^{(t)}X_r^{(t)} \\right)} \\right)}{|S_{uq}|} 702 | 703 | Args 704 | ---- 705 | r: int 706 | Rank position. 707 | query: string* 708 | doc_data: json_object* 709 | JSON object describing specific document from the search results page 710 | in the clickstream of a specific user. 711 | e_r_vector: vector[float] 712 | Probability of Examination at position r. 713 | X_r_vector: vector[float] 714 | Probability of clicks at position greater than r given that position r 715 | was Examined (E=1). 716 | last_r: int 717 | Last position r where click or purchase is observed. 718 | tmp_alpha_param: unordered_map[string, vector[int]] 719 | Holds temporary data for updating the alpha parameter. 720 | """ 721 | cdef: 722 | float *alpha 723 | string doc 724 | bint click 725 | json_object *tmp 726 | 727 | json_object_object_get_ex(doc_data, b'doc', &tmp) 728 | doc = json_object_get_string(tmp) 729 | 730 | json_object_object_get_ex(doc_data, b'click', &tmp) 731 | click = json_object_get_int(tmp) 732 | 733 | # doc not available yet. 734 | if tmp_alpha_param[0].find(doc) == tmp_alpha_param[0].end(): 735 | tmp_alpha_param[0][doc] = vector[float](2) 736 | tmp_alpha_param[0][doc][0] = 1 737 | tmp_alpha_param[0][doc][1] = 2 738 | 739 | if click: 740 | tmp_alpha_param[0][doc][0] += 1 741 | elif r > last_r: 742 | alpha = self.get_param(b'alpha', query, &doc) 743 | 744 | tmp_alpha_param[0][doc][0] += ( 745 | (1 - e_r_vector[0][r]) * alpha[0] / 746 | (1 - e_r_vector[0][r] * X_r_vector[0][r]) 747 | ) 748 | tmp_alpha_param[0][doc][1] += 1 749 | 750 | cdef void update_tmp_sigma( 751 | self, 752 | string *query, 753 | int r, 754 | json_object *doc_data, 755 | vector[float] *X_r_vector, 756 | int last_r, 757 | unordered_map[string, vector[float]] *tmp_sigma_param, 758 | ): 759 | """ 760 | Updates parameter sigma (satisfaction) by running the EM Algorithm. 761 | 762 | The equation for updating sigma is: 763 | 764 | \\sigma_{uq}^{(t+1)} = \\frac{\\sum_{s \\in S^{[1, 0]}}\\frac{(1 - c_r^{(t)}) 765 | (1-p_r^{(t)})\\sigma_{uq}^{(t)}}{(1 - X_{r+1}\\cdot (1-\\sigma_{uq}^{(t)}) 766 | \\gamma^{(t)})}}{|S^{[1, 0]}|} 767 | 768 | Args 769 | ---- 770 | query: string* 771 | r: int 772 | Rank position. 773 | doc_data: json_object* 774 | Clickstream data at position r. 775 | X_r_vector: vector[float] 776 | Probability of clicks at position greater than r given that position r 777 | was Examined (E=1). 778 | last_r: int 779 | Last position r where click or purchase is observed. 780 | """ 781 | cdef: 782 | float *sigma 783 | bint click 784 | json_object *tmp 785 | string doc 786 | 787 | json_object_object_get_ex(doc_data, b'doc', &tmp) 788 | doc = json_object_get_string(tmp) 789 | 790 | json_object_object_get_ex(doc_data, b'click', &tmp) 791 | click = json_object_get_int(tmp) 792 | 793 | json_object_object_get_ex(doc_data, b'purchase', &tmp) 794 | purchase = json_object_get_int(tmp) 795 | 796 | # doc not available yet. 797 | if tmp_sigma_param[0].find(doc) == tmp_sigma_param[0].end(): 798 | tmp_sigma_param[0][doc] = vector[float](2) 799 | tmp_sigma_param[0][doc][0] = 1 800 | tmp_sigma_param[0][doc][1] = 2 801 | 802 | # satisfaction is only defined for ranks where click or no purchase were 803 | # observed. 804 | if not click or purchase: 805 | return 806 | 807 | if r == last_r: 808 | sigma = self.get_param(b'sigma', query, &doc) 809 | gamma = self.get_param(b'gamma') 810 | 811 | tmp_sigma_param[0][doc][0] += ( 812 | sigma[0] / (1 - (X_r_vector[0][r + 1] * (1 - sigma[0]) * gamma[0])) 813 | ) 814 | tmp_sigma_param[0][doc][1] += 1 815 | 816 | cdef void update_tmp_gamma( 817 | self, 818 | int r, 819 | int last_r, 820 | json_object *doc_data, 821 | string *query, 822 | vector[float] *cp_vector_given_e, 823 | vector[float] *e_r_vector_given_CP, 824 | unordered_map[string, float] *cr_dict, 825 | vector[float] *tmp_gamma_param 826 | ): 827 | """ 828 | Updates the parameter gamma (persistence) by running the EM Algorithm. 829 | 830 | The equations for this parameter are considerably more complex than for 831 | parameters alpha and sigma. Using the Factor extension method to help out in 832 | the computation. 833 | 834 | 835 | Args 836 | ---- 837 | r: int 838 | Rank position. 839 | last_r: int 840 | Last rank where either click or purchase was observed. 841 | doc_data: json_object* 842 | JSON object with clickstream information of document at position r. 843 | query: string* 844 | cp_vector_given_e: vector[float]* 845 | Probability of observing Clicks and Purchases at positions greater than 846 | r given that position r + 1 was examined. 847 | e_r_vector_given_CP: vector[float]* 848 | Probability that document at position r was examined (E_r=1) given clicks 849 | and purchases. 850 | cr_dict: unordered_map[string, float]* 851 | Conversion Rate of documents for respective query. 852 | tmp_gamma_param: vector[float]* 853 | Temporary updates for gamma. 854 | """ 855 | cdef: 856 | Factor factor 857 | bint i = 0 858 | bint j = 0 859 | bint k = 0 860 | float ESS_0 = 0 861 | float ESS_1 = 0 862 | float ESS_denominator = 0 863 | float alpha 864 | float sigma 865 | float gamma 866 | json_object *tmp 867 | string doc 868 | bint click 869 | bint purchase 870 | float cr 871 | 872 | json_object_object_get_ex(doc_data, b'doc', &tmp) 873 | doc = json_object_get_string(tmp) 874 | 875 | json_object_object_get_ex(doc_data, b'click', &tmp) 876 | click = json_object_get_int(tmp) 877 | 878 | json_object_object_get_ex(doc_data, b'purchase', &tmp) 879 | purchase = json_object_get_int(tmp) 880 | 881 | alpha = self.get_param(b'alpha', query, &doc)[0] 882 | sigma = self.get_param(b'sigma', query, &doc)[0] 883 | gamma = self.get_param(b'gamma')[0] 884 | 885 | cr = cr_dict[0][doc] 886 | 887 | factor = Factor() 888 | factor.cinit( 889 | r, 890 | last_r, 891 | click, 892 | purchase, 893 | alpha, 894 | sigma, 895 | gamma, 896 | cr, 897 | e_r_vector_given_CP, 898 | cp_vector_given_e 899 | ) 900 | 901 | # Loop through all possible values of x, y and z, where each is an integer 902 | # boolean. 903 | for i in range(2): 904 | for j in range(2): 905 | for k in range(2): 906 | ESS_denominator += factor.compute_factor(i, j, k) 907 | 908 | if not ESS_denominator: 909 | ESS_0, ESS_1 = 0, 0 910 | else: 911 | ESS_0 = factor.compute_factor(1, 0, 0) / ESS_denominator 912 | ESS_1 = factor.compute_factor(1, 0, 1) / ESS_denominator 913 | 914 | tmp_gamma_param[0][0] += ESS_1 915 | tmp_gamma_param[0][1] += ESS_0 + ESS_1 916 | 917 | cdef void update_alpha_param( 918 | self, 919 | string *query, 920 | unordered_map[string, vector[float]] *tmp_alpha_param, 921 | ): 922 | """ 923 | After all sessions for a given query have been analyzed, the new values of 924 | alpha in `tmp_alpha_param` are copied into `alpha_params` where they'll 925 | be used into new optimization iterations. 926 | 927 | Args 928 | ---- 929 | query: string* 930 | tmp_alpha_param: unordered_map[string, vector[float]] 931 | Optimized values for updating alpha 932 | """ 933 | cdef: 934 | unordered_map[string, vector[float]].iterator it = ( 935 | tmp_alpha_param[0].begin() 936 | ) 937 | string doc 938 | vector[float] value 939 | 940 | while(it != tmp_alpha_param[0].end()): 941 | doc = dereference(it).first 942 | value = dereference(it).second 943 | self.alpha_params[query[0]][doc] = value[0] / value[1] 944 | postincrement(it) 945 | 946 | cdef void update_sigma_param( 947 | self, 948 | string *query, 949 | unordered_map[string, vector[float]] *tmp_sigma_param, 950 | ): 951 | """ 952 | After all sessions for a given query have been analyzed, the new values of 953 | sigma in `tmp_sigma_param` are copied into `sigma_params` where they'll 954 | be used into new optimization iterations. 955 | 956 | Args 957 | ---- 958 | query: string* 959 | tmp_sigma_param: unordered_map[string, vector[float]] 960 | Optimized values for updating sigma 961 | """ 962 | cdef: 963 | unordered_map[string, vector[float]].iterator it = ( 964 | tmp_sigma_param[0].begin() 965 | ) 966 | string doc 967 | vector[float] value 968 | 969 | while(it != tmp_sigma_param[0].end()): 970 | doc = dereference(it).first 971 | value = dereference(it).second 972 | self.sigma_params[query[0]][doc] = value[0] / value[1] 973 | postincrement(it) 974 | 975 | cdef void update_gamma_param( 976 | self, 977 | vector[float] *tmp_gamma_param 978 | ): 979 | """ 980 | After all sessions for a given query have been analyzed, the new value of 981 | gamma in `tmp_sigma_param` is copied into `gamma_param` where they'll 982 | be used into new optimization iterations. 983 | 984 | Args 985 | ---- 986 | tmp_gamma_param: vector[float]* 987 | Optimized values for updating sigma 988 | """ 989 | # Considered that a denominator of zero cannot happen. 990 | self.gamma_param = tmp_gamma_param[0][0] / tmp_gamma_param[0][1] 991 | 992 | cpdef void export_judgments(self, str output, str format_='NEWLINE_JSON'): 993 | """ 994 | After running the fit optimization process, exports judgment results to an 995 | external file in accordance to the selected input `format_`. Judgments are 996 | computed as: 997 | 998 | J_{uq} = P(\\alpha_{uq}) \\cdot P(\\sigma_{uq}) 999 | 1000 | where `u` represents the document and `q` the query. 1001 | 1002 | Args 1003 | ---- 1004 | output: str 1005 | Filepath where to save results. If `gz` is present in `output` then 1006 | compresses file. 1007 | format_: str 1008 | Sets how to write result file. Options includes: 1009 | - NEWLINE_JSON: writes in JSON format, like: 1010 | {'query0': {'doc0': 0.3, 'doc1': 0.2}} 1011 | {'query1': {'doc0': 0.76, 'doc1': 0.41}} 1012 | """ 1013 | cdef: 1014 | unordered_map[string, unordered_map[string, float]].iterator it 1015 | unordered_map[string, float].iterator doc_it 1016 | string query 1017 | string doc 1018 | float alpha 1019 | float sigma 1020 | dict tmp 1021 | 1022 | file_manager = gzip.GzipFile if '.gz' in output else open 1023 | 1024 | with file_manager(output, 'wb') as f: 1025 | it = self.alpha_params.begin() 1026 | while(it != self.alpha_params.end()): 1027 | query = dereference(it).first 1028 | tmp = {} 1029 | tmp[query] = {} 1030 | doc_it = self.alpha_params[query].begin() 1031 | while(doc_it != self.alpha_params[query].end()): 1032 | doc = dereference(doc_it).first 1033 | alpha = dereference(doc_it).second 1034 | sigma = self.sigma_params[query][doc] 1035 | tmp[query][doc] = alpha * sigma 1036 | postincrement(doc_it) 1037 | f.write(ujson.dumps(tmp).encode() + '\n'.encode()) 1038 | postincrement(it) 1039 | 1040 | cpdef void fit(self, str input_folder, int iters=30): 1041 | """ 1042 | Reads through data of queries and customers sessions to find appropriate values 1043 | of `\\alpha_{uq}` (attractiveness), `\\sigma_{uq}` (satisfaction) and `\\gama` 1044 | (persistence) where `u` represents the document and `q` the input query. 1045 | 1046 | Args 1047 | ---- 1048 | input_folder: str 1049 | Path where gzipped clickstream files are located. Each file. Here's an 1050 | example of the expected input data on each compressed file: 1051 | 1052 | `{ 1053 | "search_keys": { 1054 | "search_term": "query", 1055 | "key0": "value0" 1056 | }, 1057 | "judgment_keys": [ 1058 | { 1059 | "session": [ 1060 | {"click": 0, "purchase": 0, "doc": "document0"} 1061 | ] 1062 | } 1063 | ] 1064 | }` 1065 | 1066 | `search_keys` contains all keys that describe and are associated to the 1067 | search term as inserted by the user. `key0` for instance could mean any 1068 | further description of context such as the region of user, their 1069 | preferences among many possibilities. 1070 | iters: int 1071 | Total iterations the fitting method should run in the optimization 1072 | process. The implemented algorithm is Expectation-Maximization which means 1073 | the more iterations there are the more guaranteed it is values will 1074 | converge. 1075 | """ 1076 | cdef: 1077 | list files = glob(os.path.join(input_folder, 'jud*')) 1078 | # row has to be bytes so Cython can interchange its value between char* and 1079 | # bytes 1080 | bytes row 1081 | json_object *row_json 1082 | json_object *search_keys 1083 | json_object *sessions 1084 | json_object *session 1085 | json_object *clickstream 1086 | lh_table *search_keys_tbl 1087 | int c = 0 1088 | unsigned int i = 0 1089 | string query 1090 | unordered_map[string, vector[float]] tmp_alpha_param 1091 | unordered_map[string, vector[float]] tmp_sigma_param 1092 | vector[float] tmp_gamma_param = vector[float](2) 1093 | unordered_map[string, unordered_map[string, float]] cr_dict 1094 | 1095 | for _ in range(iters): 1096 | print('running iteration: ', _) 1097 | for file_ in files: 1098 | for row in gzip.GzipFile(file_, 'rb'): 1099 | # Start by erasing the temporary container of the parameters as 1100 | # each new query requires a new computation in the EM algorithm. 1101 | self.restart_tmp_params(&tmp_alpha_param, &tmp_sigma_param, 1102 | &tmp_gamma_param) 1103 | 1104 | row_json = json_tokener_parse(row) 1105 | 1106 | json_object_object_get_ex(row_json, b'search_keys', &search_keys) 1107 | search_keys_tbl = json_object_get_object(search_keys) 1108 | 1109 | query = self.get_search_context_string(search_keys_tbl) 1110 | json_object_object_get_ex(row_json, b'judgment_keys', &sessions) 1111 | self.compute_cr(&query, sessions, &cr_dict) 1112 | 1113 | for i in range(json_object_array_length(sessions)): 1114 | session = json_object_array_get_idx(sessions, i) 1115 | json_object_object_get_ex(session, b'session', &clickstream) 1116 | 1117 | self.update_tmp_params(clickstream, &tmp_alpha_param, 1118 | &tmp_sigma_param, &tmp_gamma_param, 1119 | &query, &cr_dict[query]) 1120 | 1121 | self.update_alpha_param(&query, &tmp_alpha_param) 1122 | self.update_sigma_param(&query, &tmp_sigma_param) 1123 | self.update_gamma_param(&tmp_gamma_param) 1124 | json_object_put(row_json) 1125 | 1126 | cdef void update_tmp_params( 1127 | self, 1128 | json_object *clickstream, 1129 | unordered_map[string, vector[float]] *tmp_alpha_param, 1130 | unordered_map[string, vector[float]] *tmp_sigma_param, 1131 | vector[float] *tmp_gamma_param, 1132 | string *query, 1133 | unordered_map[string, float] *cr_dict 1134 | ): 1135 | """ 1136 | For each session, applies the EM algorithm and save temporary results into 1137 | the tmp input parameters. 1138 | 1139 | Args 1140 | ---- 1141 | clickstream: json_object* 1142 | JSON containing documents users observed on search results page and their 1143 | interaction with each item. Example: 1144 | 1145 | `[ 1146 | {"doc": "doc0", "click": 0, "purchase": 0}, 1147 | {"doc": "doc1", "click": 1, "purchase": 1} 1148 | ]` 1149 | 1150 | tmp_alpha_param: vector[float]* 1151 | Holds temporary values for adapting each variable alpha. 1152 | tmp_sigma_param: vector[float]* 1153 | Holds temporary values for adapting each variable sigma. 1154 | tmp_gamma_param: vector[float]* 1155 | Holds temporary values for adapting gamma. 1156 | query: string* 1157 | cr_dict: unordered_map[string, float]* 1158 | Conversion Rates of each document for the current query. 1159 | """ 1160 | cdef: 1161 | json_object *doc_data 1162 | vector[float] e_r_vector 1163 | vector[float] X_r_vector 1164 | vector[float] e_r_vector_given_CP 1165 | vector[float] cp_vector_given_e 1166 | unsigned int last_r 1167 | unsigned int r 1168 | 1169 | e_r_vector = self.build_e_r_vector(clickstream, query, cr_dict) 1170 | X_r_vector = self.build_X_r_vector(clickstream, query) 1171 | e_r_vector_given_CP = self.build_e_r_vector_given_CP(clickstream, 0, query) 1172 | cp_vector_given_e = self.build_CP_vector_given_e(clickstream, query, cr_dict) 1173 | # last clicked position 1174 | last_r = self.get_last_r(clickstream) 1175 | 1176 | for r in range(json_object_array_length(clickstream)): 1177 | doc_data = json_object_array_get_idx(clickstream, r) 1178 | self.update_tmp_alpha(r, query, doc_data, &e_r_vector, &X_r_vector, last_r, 1179 | tmp_alpha_param) 1180 | self.update_tmp_sigma(query, r, doc_data, &X_r_vector, last_r, 1181 | tmp_sigma_param) 1182 | self.update_tmp_gamma(r, last_r, doc_data, query, &cp_vector_given_e, 1183 | &e_r_vector_given_CP, cr_dict, tmp_gamma_param) 1184 | 1185 | cdef void restart_tmp_params( 1186 | self, 1187 | unordered_map[string, vector[float]] *tmp_alpha_param, 1188 | unordered_map[string, vector[float]] *tmp_sigma_param, 1189 | vector[float] *tmp_gamma_param 1190 | ): 1191 | """ 1192 | Re-creates temporary parameters to be used in the optimization process for each 1193 | query and step. 1194 | """ 1195 | tmp_alpha_param[0].erase( 1196 | tmp_alpha_param[0].begin(), 1197 | tmp_alpha_param[0].end() 1198 | ) 1199 | tmp_sigma_param[0].erase( 1200 | tmp_sigma_param[0].begin(), 1201 | tmp_sigma_param[0].end() 1202 | ) 1203 | tmp_gamma_param[0][0] = 1 1204 | tmp_gamma_param[0][1] = 2 1205 | -------------------------------------------------------------------------------- /tests/test_cy_DBN.pyx: -------------------------------------------------------------------------------- 1 | import gzip 2 | import tempfile 3 | 4 | import ujson 5 | 6 | from cython.operator cimport dereference, postincrement 7 | from libcpp.string cimport string 8 | from libcpp.unordered_map cimport unordered_map 9 | from libcpp.vector cimport vector 10 | 11 | from pyClickModels.DBN cimport DBNModel, Factor 12 | 13 | from pyClickModels.DBN import DBN 14 | 15 | from pyClickModels.jsonc cimport (json_object, json_object_get_object, 16 | json_object_put, json_tokener_parse, 17 | lh_table) 18 | 19 | from conftest import build_DBN_test_data 20 | from numpy.testing import assert_allclose, assert_almost_equal 21 | 22 | ctypedef unordered_map[string, unordered_map[string, float]] dbn_param 23 | 24 | 25 | cdef string query = b'query' 26 | cdef dbn_param alpha_params 27 | cdef dbn_param sigma_params 28 | cdef float gamma_param 29 | 30 | alpha_params[query][b'doc0'] = 0.5 31 | alpha_params[query][b'doc1'] = 0.5 32 | alpha_params[query][b'doc2'] = 0.5 33 | 34 | sigma_params[query][b'doc0'] = 0.5 35 | sigma_params[query][b'doc1'] = 0.5 36 | sigma_params[query][b'doc2'] = 0.5 37 | 38 | gamma_param = 0.7 39 | 40 | 41 | cdef bint test_fit(): 42 | cdef: 43 | DBNModel model = DBN() 44 | unordered_map[string, unordered_map[string, float]].iterator it 45 | string query 46 | dict dquery 47 | string doc 48 | 49 | gamma, params, tmp_folder = build_DBN_test_data(users=30000, docs=6, queries=2) 50 | 51 | # print('expected value of sigma: ', params[0][0][1]) 52 | 53 | model.fit(tmp_folder.name, iters=10) 54 | # print('model gamma ', model.gamma_param) 55 | # print('real gamma: ', gamma) 56 | 57 | # it = model.alpha_params.begin() 58 | while(it != model.alpha_params.end()): 59 | # print(dereference(it).first) 60 | query = (dereference(it).first) 61 | dquery = extract_keys(query) 62 | 63 | if dquery == {'search_term': 0, 'region': 'north', 'favorite_size': 'L'}: 64 | # print( 65 | # 'model.alpha_params doc 0', model.alpha_params[ 66 | # b'search_term:0|region:north|favorite_size:L'][b'0'] 67 | # ) 68 | # print('params alpha ', params[0][0][0]) 69 | 70 | # print( 71 | # 'model.sigma_params doc 0', model.sigma_params[ 72 | # b'search_term:0|region:north|favorite_size:L'][b'0'] 73 | # ) 74 | # print('params sigma ', params[0][0][1]) 75 | 76 | try: 77 | assert_allclose(model.gamma_param, gamma, atol=.1) 78 | assert_allclose( 79 | model.alpha_params[query][b'0'], params[0][0][0], atol=.15 80 | ) 81 | assert_allclose( 82 | model.sigma_params[query][b'0'], params[0][0][1], atol=.15 83 | ) 84 | except AssertionError: 85 | return False 86 | 87 | postincrement(it) 88 | return True 89 | 90 | cdef dict extract_keys(string result): 91 | return dict(e.split(':') for e in str(bytes(result).decode()).split('|')) 92 | 93 | cdef bint test_get_search_context_string(): 94 | cdef: 95 | DBNModel model = DBNModel() 96 | json_object *search_keys = json_tokener_parse(b"{'search_term': 'query'}") 97 | lh_table *tbl = json_object_get_object(search_keys) 98 | string result = model.get_search_context_string(tbl) 99 | dict expected = {'search_term': 'query'} 100 | dict r = extract_keys(result) 101 | if not r == expected: 102 | return False 103 | 104 | search_keys = json_tokener_parse( 105 | b"{'search_term': 'query', 'key0': 'value0', 'key1': 'value1'}" 106 | ) 107 | 108 | tbl = json_object_get_object(search_keys) 109 | # result is something like: b'search_term:query|key0:value0|key1:value1' 110 | result = model.get_search_context_string(tbl) 111 | r = extract_keys(result) 112 | expected = {'search_term': 'query', 'key0': 'value0', 'key1': 'value1'} 113 | 114 | if not r == expected: 115 | return False 116 | 117 | json_object_put(search_keys) 118 | return True 119 | 120 | 121 | cdef bint test_compute_cr(): 122 | cdef: 123 | DBNModel model = DBNModel() 124 | string query = b'query' 125 | # cr_dict is like: {'query_term': {'doc0': 0.2, 'doc1: 0}} 126 | unordered_map[string, unordered_map[string, float]] cr_dict 127 | unordered_map[string, unordered_map[string, float]] expected 128 | const char *sessions = b""" 129 | [ 130 | { 131 | 'session': [ 132 | {"doc": "doc0", "click": 0, "purchase": 0}, 133 | {"doc": "doc1", "click": 1, "purchase": 0}, 134 | {"doc": "doc2", "click": 1, "purchase": 1} 135 | ] 136 | }, 137 | { 138 | 'session': [ 139 | {"doc": "doc0", "click": 0, "purchase": 0}, 140 | {"doc": "doc1", "click": 1, "purchase": 0}, 141 | ] 142 | }, 143 | ] 144 | """ 145 | json_object *jso_sessions = json_tokener_parse(sessions) 146 | 147 | expected[query][b'doc0'] = 0 148 | expected[query][b'doc1'] = 0 149 | expected[query][b'doc2'] = 1 150 | 151 | model.compute_cr(&query, jso_sessions, &cr_dict) 152 | 153 | if not expected == cr_dict: 154 | return False 155 | 156 | # test if query is already available in cr_dict 157 | jso_sessions = json_tokener_parse('') 158 | model.compute_cr(&query, jso_sessions, &cr_dict) 159 | if not expected == cr_dict: 160 | return False 161 | 162 | json_object_put(jso_sessions) 163 | return True 164 | 165 | 166 | cdef bint test_get_param(): 167 | cdef: 168 | string query = b'query' 169 | string doc = b'doc0' 170 | DBNModel model = DBNModel() 171 | float result 172 | float result2 173 | float result3 174 | 175 | result = model.get_param(b'alpha', &query, &doc)[0] 176 | if not result > 0 and result < 1: 177 | return False 178 | 179 | model.alpha_params.erase(query) 180 | result2 = model.get_param(b'alpha', &query, &doc)[0] 181 | if not( 182 | result2 > 0 and result2 < 1 183 | or result != result2 184 | ): 185 | return False 186 | 187 | result3 = model.get_param(b'alpha', &query, &doc)[0] 188 | if not result2 == result3: 189 | return False 190 | return True 191 | 192 | 193 | cdef bint test_build_e_r_vector(dbn_param *alpha_params, dbn_param *sigma_params, 194 | float *gamma_param): 195 | cdef: 196 | const char *s = ( 197 | b'[{"doc": "doc0", "click": 0, "purchase": 0},' 198 | b'{"doc": "doc1", "click": 1, "purchase": 0},' 199 | b'{"doc": "doc2", "click": 1, "purchase": 1}]' 200 | ) 201 | json_object *session = json_tokener_parse(s) 202 | string query = b'query' 203 | vector[float] expected = [1, 0.4375, 0.1914, 0.0837] 204 | vector[float] result 205 | unordered_map[string, float] cr_dict 206 | DBNModel model = DBNModel() 207 | 208 | cr_dict[b'doc0'] = 0.5 209 | cr_dict[b'doc1'] = 0.5 210 | cr_dict[b'doc2'] = 0.5 211 | 212 | model.alpha_params = alpha_params[0] 213 | model.sigma_params = sigma_params[0] 214 | model.gamma_param = gamma_param[0] 215 | 216 | result = model.build_e_r_vector(session, &query, &cr_dict) 217 | try: 218 | assert_almost_equal(result, expected, decimal=4) 219 | except AssertionError: 220 | return False 221 | 222 | json_object_put(session) 223 | return True 224 | 225 | 226 | cdef bint test_build_X_r_vector(dbn_param *alpha_params, dbn_param *sigma_params, 227 | float *gamma_param): 228 | cdef: 229 | const char *s = ( 230 | b'[{"doc": "doc0", "click": 0, "purchase": 0},' 231 | b'{"doc": "doc1", "click": 1, "purchase": 0},' 232 | b'{"doc": "doc2", "click": 1, "purchase": 1}]' 233 | ) 234 | json_object *session = json_tokener_parse(s) 235 | vector[float] expected = [0.73625, 0.675, 0.5, 0] 236 | vector[float] result 237 | string query = b'query' 238 | 239 | DBNModel model = DBNModel() 240 | 241 | model.alpha_params = alpha_params[0] 242 | model.sigma_params = sigma_params[0] 243 | model.gamma_param = gamma_param[0] 244 | 245 | result = model.build_X_r_vector(session, &query) 246 | try: 247 | assert_almost_equal(result, expected, decimal=4) 248 | except AssertionError: 249 | return False 250 | 251 | json_object_put(session) 252 | return True 253 | 254 | 255 | cdef bint test_build_e_r_vector_given_CP(dbn_param *alpha_params, 256 | dbn_param *sigma_params, 257 | float *gamma_param): 258 | cdef: 259 | char *s = ( 260 | b'[{"doc": "doc0", "click": 0, "purchase": 0},' 261 | b'{"doc": "doc0", "click": 1, "purchase": 1},' 262 | b'{"doc": "doc1", "click": 0, "purchase": 0}]' 263 | ) 264 | json_object *session = json_tokener_parse(s) 265 | vector[float] expected = [1, 0.7, 0, 0] 266 | vector[float] result 267 | string query = b'query' 268 | DBNModel model = DBNModel() 269 | 270 | model.alpha_params = alpha_params[0] 271 | model.sigma_params = sigma_params[0] 272 | model.gamma_param = gamma_param[0] 273 | 274 | result = model.build_e_r_vector_given_CP(session, 0, &query) 275 | 276 | try: 277 | assert_almost_equal(result, expected, decimal=4) 278 | except AssertionError: 279 | return False 280 | 281 | result = model.build_e_r_vector_given_CP(session, 1, &query) 282 | expected = [1, 0, 0] 283 | 284 | try: 285 | assert_almost_equal(result, expected, decimal=4) 286 | except AssertionError: 287 | return False 288 | 289 | result = model.build_e_r_vector_given_CP(session, 2, &query) 290 | expected = [1, 0.7] 291 | 292 | try: 293 | assert_almost_equal(result, expected, decimal=4) 294 | except AssertionError: 295 | return False 296 | 297 | s = ( 298 | b'[{"doc": "doc0", "click": 0, "purchase": 0},' 299 | b'{"doc": "doc0", "click": 1, "purchase": 0},' 300 | b'{"doc": "doc1", "click": 0, "purchase": 0}]' 301 | ) 302 | session = json_tokener_parse(s) 303 | expected = [1, 0.7, 0.35, 0.1484] 304 | 305 | result = model.build_e_r_vector_given_CP(session, 0, &query) 306 | 307 | try: 308 | assert_almost_equal(result, expected, decimal=4) 309 | except AssertionError: 310 | return False 311 | 312 | result = model.build_e_r_vector_given_CP(session, 1, &query) 313 | expected = [1, 0.35, 0.148484] 314 | 315 | try: 316 | assert_almost_equal(result, expected, decimal=4) 317 | except AssertionError: 318 | return False 319 | 320 | result = model.build_e_r_vector_given_CP(session, 2, &query) 321 | expected = [1, 0.7] 322 | 323 | try: 324 | assert_almost_equal(result, expected, decimal=4) 325 | except AssertionError: 326 | return False 327 | 328 | json_object_put(session) 329 | return True 330 | 331 | 332 | cdef bint test_build_cp_p(dbn_param *alpha_params): 333 | cdef: 334 | const char *s = ( 335 | b'[{"doc": "doc0", "click": 0, "purchase": 0},' 336 | b'{"doc": "doc0", "click": 1, "purchase": 1},' 337 | b'{"doc": "doc1", "click": 1, "purchase": 0}]' 338 | ) 339 | json_object *session = json_tokener_parse(s) 340 | float expected = 0.005625 341 | float result 342 | string query = b'query' 343 | vector[float] e_r_vector_given_CP = [1, 0.6, 0.3] 344 | DBNModel model = DBNModel() 345 | unordered_map[string, float] cr_dict 346 | 347 | cr_dict[b'doc0'] = 0.5 348 | cr_dict[b'doc1'] = 0.5 349 | cr_dict[b'doc2'] = 0.5 350 | 351 | model.alpha_params = alpha_params[0] 352 | 353 | result = model.compute_cp_p(session, 0, &query, &e_r_vector_given_CP, &cr_dict) 354 | 355 | try: 356 | assert_almost_equal(result, expected, decimal=4) 357 | except AssertionError: 358 | return False 359 | 360 | expected = 0.0375 361 | result = model.compute_cp_p(session, 1, &query, &e_r_vector_given_CP, &cr_dict) 362 | 363 | try: 364 | assert_almost_equal(result, expected, decimal=4) 365 | except AssertionError: 366 | return False 367 | 368 | json_object_put(session) 369 | return True 370 | 371 | 372 | cdef bint test_build_CP_vector_given_e(dbn_param *alpha_params, dbn_param *sigma_params, 373 | float *gamma_param): 374 | cdef: 375 | char *s = ( 376 | b'[{"doc": "doc0", "click": 0, "purchase": 0},' 377 | b'{"doc": "doc0", "click": 1, "purchase": 1}]' 378 | ) 379 | json_object *session = json_tokener_parse(s) 380 | DBNModel model = DBNModel() 381 | vector[float] result 382 | vector[float] expected 383 | unordered_map[string, float] cr_dict 384 | 385 | cr_dict[b'doc0'] = 0.5 386 | cr_dict[b'doc1'] = 0.5 387 | cr_dict[b'doc2'] = 0.5 388 | 389 | model.alpha_params = alpha_params[0] 390 | model.sigma_params = sigma_params[0] 391 | model.gamma_param = gamma_param[0] 392 | 393 | result = model.build_CP_vector_given_e(session, &query, &cr_dict) 394 | expected = [0.25] 395 | 396 | try: 397 | assert_almost_equal(result, expected, decimal=4) 398 | except AssertionError: 399 | return False 400 | 401 | s = ( 402 | b'[{"doc": "doc0", "click": 0, "purchase": 0},' 403 | b'{"doc": "doc0", "click": 1, "purchase": 0},' 404 | b'{"doc": "doc0", "click": 1, "purchase": 1}]' 405 | ) 406 | session = json_tokener_parse(s) 407 | 408 | result = model.build_CP_vector_given_e(session, &query, &cr_dict) 409 | expected = [0.021875, 0.25] 410 | 411 | try: 412 | assert_almost_equal(result, expected, decimal=4) 413 | except AssertionError: 414 | return False 415 | 416 | s = ( 417 | b'[{"doc": "doc0", "click": 0, "purchase": 0},' 418 | b'{"doc": "doc0", "click": 1, "purchase": 0},' 419 | b'{"doc": "doc0", "click": 0, "purchase": 0}]' 420 | ) 421 | session = json_tokener_parse(s) 422 | 423 | result = model.build_CP_vector_given_e(session, &query, &cr_dict) 424 | expected = [0.2062, 0.5] 425 | 426 | try: 427 | assert_almost_equal(result, expected, decimal=4) 428 | except AssertionError: 429 | return False 430 | 431 | json_object_put(session) 432 | return True 433 | 434 | 435 | cdef bint test_get_last_r(): 436 | cdef: 437 | DBNModel model = DBNModel() 438 | char *s = ( 439 | b'[{"doc": "doc0", "click": 0, "purchase": 0},' 440 | b'{"doc": "doc0", "click": 1, "purchase": 1},' 441 | b'{"doc": "doc1", "click": 1, "purchase": 0},' 442 | b'{"doc": "doc2", "click": 1, "purchase": 1}]' 443 | ) 444 | json_object *session = json_tokener_parse(s) 445 | int result = model.get_last_r(session) 446 | if not result == 3: 447 | return False 448 | 449 | s = ( 450 | b'[{"doc": "doc0", "click": 0, "purchase": 0},' 451 | b'{"doc": "doc0", "click": 1, "purchase": 1},' 452 | b'{"doc": "doc1", "click": 1, "purchase": 0},' 453 | b'{"doc": "doc2", "click": 0, "purchase": 1}]' 454 | ) 455 | session = json_tokener_parse(s) 456 | result = model.get_last_r(session) 457 | if not result == 2: 458 | return False 459 | 460 | s = ( 461 | b'[{"doc": "doc0", "click": 0, "purchase": 0},' 462 | b'{"doc": "doc0", "click": 0, "purchase": 1},' 463 | b'{"doc": "doc1", "click": 0, "purchase": 0},' 464 | b'{"doc": "doc2", "click": 0, "purchase": 1}]' 465 | ) 466 | session = json_tokener_parse(s) 467 | result = model.get_last_r(session) 468 | if not result == 0: 469 | return False 470 | 471 | json_object_put(session) 472 | return True 473 | 474 | 475 | cdef bint test_update_tmp_alpha(dbn_param *alpha_params, dbn_param *sigma_params, 476 | float *gamma_param): 477 | cdef: 478 | DBNModel model = DBNModel() 479 | unsigned int r = 0 480 | unsigned int last_r = 1 481 | char *s = b'{"doc": "doc0", "click": 1}' 482 | json_object *doc_data = json_tokener_parse(s) 483 | vector[float] e_r_vector = [0.5] 484 | vector[float] X_r_vector = [0.5] 485 | string query = b'query' 486 | unordered_map[string, vector[float]] tmp_alpha_param 487 | vector[float] expected 488 | 489 | model.alpha_params = alpha_params[0] 490 | model.sigma_params = sigma_params[0] 491 | model.gamma_param = gamma_param[0] 492 | 493 | tmp_alpha_param[b'doc0'] = [0, 0] 494 | 495 | model.update_tmp_alpha(r, &query, doc_data, &e_r_vector, &X_r_vector, last_r, 496 | &tmp_alpha_param) 497 | if not tmp_alpha_param[b'doc0'] == [1, 1]: 498 | return False 499 | 500 | r = 1 501 | last_r = 0 502 | s = b'{"doc": "doc0", "click": 0}' 503 | doc_data = json_tokener_parse(s) 504 | e_r_vector = [0.5, 0.5] 505 | X_r_vector = [0.5, 0.5] 506 | tmp_alpha_param[b'doc0'] = [0, 0] 507 | model.update_tmp_alpha(r, &query, doc_data, &e_r_vector, &X_r_vector, last_r, 508 | &tmp_alpha_param) 509 | expected = [1. / 3, 1] 510 | 511 | try: 512 | assert_almost_equal(tmp_alpha_param[b'doc0'], expected, decimal=4) 513 | except AssertionError: 514 | return False 515 | 516 | r = 1 517 | last_r = 2 518 | s = b'{"doc": "doc0", "click": 0}' 519 | doc_data = json_tokener_parse(s) 520 | e_r_vector = [0.5, 0.5] 521 | X_r_vector = [0.5, 0.5] 522 | tmp_alpha_param[b'doc0'] = [0, 0] 523 | model.update_tmp_alpha(r, &query, doc_data, &e_r_vector, &X_r_vector, last_r, 524 | &tmp_alpha_param) 525 | expected = [0.0, 1] 526 | 527 | try: 528 | assert_almost_equal(tmp_alpha_param[b'doc0'], expected, decimal=4) 529 | except AssertionError: 530 | return False 531 | 532 | json_object_put(doc_data) 533 | return True 534 | 535 | 536 | cdef bint test_update_tmp_sigma(dbn_param *alpha_params, dbn_param *sigma_params, 537 | float *gamma_param): 538 | cdef: 539 | DBNModel model = DBNModel() 540 | unsigned int r = 0 541 | unsigned int last_r = 1 542 | char *s = b'{"doc": "doc0", "click": 0, "purchase": 0}' 543 | json_object *doc_data = json_tokener_parse(s) 544 | vector[float] X_r_vector = [0.5, 0.5, 0.5] 545 | unordered_map[string, vector[float]] tmp_sigma_param 546 | vector[float] expected 547 | string query = b'query' 548 | 549 | model.alpha_params = alpha_params[0] 550 | model.sigma_params = sigma_params[0] 551 | model.gamma_param = gamma_param[0] 552 | 553 | tmp_sigma_param[b'doc0'] = [0, 0] 554 | 555 | model.update_tmp_sigma(&query, r, doc_data, &X_r_vector, last_r, &tmp_sigma_param) 556 | 557 | expected = [0, 0] 558 | 559 | try: 560 | assert_almost_equal(tmp_sigma_param[b'doc0'], expected, decimal=4) 561 | except AssertionError: 562 | return False 563 | 564 | s = b'{"doc": "doc0", "click": 1, "purchase": 0}' 565 | doc_data = json_tokener_parse(s) 566 | 567 | model.update_tmp_sigma(&query, r, doc_data, &X_r_vector, last_r, &tmp_sigma_param) 568 | expected = [0, 1] 569 | 570 | try: 571 | assert_almost_equal(tmp_sigma_param[b'doc0'], expected, decimal=4) 572 | except AssertionError: 573 | return False 574 | 575 | r = 1 576 | tmp_sigma_param[b'doc0'] = [0, 0] 577 | model.update_tmp_sigma(&query, r, doc_data, &X_r_vector, last_r, &tmp_sigma_param) 578 | expected = [0.6060, 1] 579 | 580 | try: 581 | assert_almost_equal(tmp_sigma_param[b'doc0'], expected, decimal=4) 582 | except AssertionError: 583 | return False 584 | 585 | json_object_put(doc_data) 586 | return True 587 | 588 | 589 | cdef bint test_compute_factor_last_click_lower_than_r(): 590 | cdef: 591 | float result 592 | int r = 0 593 | int last_r = 0 594 | vector[float] cp_vector_given_e = [0.2] 595 | vector[float] e_r_vector_given_CP = [0.4] 596 | unordered_map[string, float] cr_dict 597 | DBNModel model = DBNModel() 598 | dbn_param alpha_params 599 | dbn_param sigma_params 600 | float gamma 601 | string query = b'query' 602 | bint click = False 603 | bint purchase = True 604 | string doc = b'doc0' 605 | Factor factor 606 | 607 | cr_dict[doc] = 0.1 608 | alpha_params[query][doc] = 0.4 609 | sigma_params[query][doc] = 0.4 610 | gamma = 0.7 611 | 612 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 613 | factor = Factor() 614 | factor.cinit( 615 | r, 616 | last_r, 617 | click, 618 | purchase, 619 | alpha_params[query][doc], 620 | sigma_params[query][doc], 621 | gamma, 622 | cr_dict[doc], 623 | &e_r_vector_given_CP, 624 | &cp_vector_given_e 625 | ) 626 | result = factor.compute_factor(0, 0, 0) 627 | 628 | try: 629 | assert_almost_equal(result, 0.6) 630 | except AssertionError: 631 | return False 632 | 633 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 634 | click = True 635 | purchase = False 636 | factor = Factor() 637 | factor.cinit( 638 | r, 639 | last_r, 640 | click, 641 | purchase, 642 | alpha_params[query][doc], 643 | sigma_params[query][doc], 644 | gamma, 645 | cr_dict[doc], 646 | &e_r_vector_given_CP, 647 | &cp_vector_given_e 648 | ) 649 | result = factor.compute_factor(0, 0, 0) 650 | 651 | try: 652 | assert_almost_equal(result, 0.0) 653 | except AssertionError: 654 | return False 655 | 656 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 657 | click = True 658 | purchase = True 659 | factor = Factor() 660 | factor.cinit( 661 | r, 662 | last_r, 663 | click, 664 | purchase, 665 | alpha_params[query][doc], 666 | sigma_params[query][doc], 667 | gamma, 668 | cr_dict[doc], 669 | &e_r_vector_given_CP, 670 | &cp_vector_given_e 671 | ) 672 | result = factor.compute_factor(0, 0, 0) 673 | 674 | try: 675 | assert_almost_equal(result, 0.0) 676 | except AssertionError: 677 | return False 678 | 679 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 680 | click = True 681 | purchase = False 682 | factor = Factor() 683 | factor.cinit( 684 | r, 685 | last_r, 686 | click, 687 | purchase, 688 | alpha_params[query][doc], 689 | sigma_params[query][doc], 690 | gamma, 691 | cr_dict[doc], 692 | &e_r_vector_given_CP, 693 | &cp_vector_given_e 694 | ) 695 | result = factor.compute_factor(0, 0, 1) 696 | 697 | try: 698 | assert_almost_equal(result, 0.0) 699 | except AssertionError: 700 | return False 701 | 702 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 703 | click = True 704 | purchase = False 705 | factor = Factor() 706 | factor.cinit( 707 | r, 708 | last_r, 709 | click, 710 | purchase, 711 | alpha_params[query][doc], 712 | sigma_params[query][doc], 713 | gamma, 714 | cr_dict[doc], 715 | &e_r_vector_given_CP, 716 | &cp_vector_given_e 717 | ) 718 | result = factor.compute_factor(0, 0, 1) 719 | 720 | try: 721 | assert_almost_equal(result, 0.0) 722 | except AssertionError: 723 | return False 724 | 725 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 726 | click = True 727 | purchase = True 728 | factor = Factor() 729 | factor.cinit( 730 | r, 731 | last_r, 732 | click, 733 | purchase, 734 | alpha_params[query][doc], 735 | sigma_params[query][doc], 736 | gamma, 737 | cr_dict[doc], 738 | &e_r_vector_given_CP, 739 | &cp_vector_given_e 740 | ) 741 | result = factor.compute_factor(0, 0, 1) 742 | 743 | try: 744 | assert_almost_equal(result, 0.0) 745 | except AssertionError: 746 | return False 747 | 748 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 749 | click = False 750 | purchase = False 751 | factor = Factor() 752 | factor.cinit( 753 | r, 754 | last_r, 755 | click, 756 | purchase, 757 | alpha_params[query][doc], 758 | sigma_params[query][doc], 759 | gamma, 760 | cr_dict[doc], 761 | &e_r_vector_given_CP, 762 | &cp_vector_given_e 763 | ) 764 | result = factor.compute_factor(0, 1, 0) 765 | 766 | try: 767 | assert_almost_equal(result, 0.0) 768 | except AssertionError: 769 | return False 770 | 771 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 772 | click = True 773 | purchase = False 774 | factor = Factor() 775 | factor.cinit( 776 | r, 777 | last_r, 778 | click, 779 | purchase, 780 | alpha_params[query][doc], 781 | sigma_params[query][doc], 782 | gamma, 783 | cr_dict[doc], 784 | &e_r_vector_given_CP, 785 | &cp_vector_given_e 786 | ) 787 | result = factor.compute_factor(0, 1, 0) 788 | 789 | try: 790 | assert_almost_equal(result, 0.0) 791 | except AssertionError: 792 | return False 793 | 794 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 795 | click = True 796 | purchase = True 797 | factor = Factor() 798 | factor.cinit( 799 | r, 800 | last_r, 801 | click, 802 | purchase, 803 | alpha_params[query][doc], 804 | sigma_params[query][doc], 805 | gamma, 806 | cr_dict[doc], 807 | &e_r_vector_given_CP, 808 | &cp_vector_given_e 809 | ) 810 | result = factor.compute_factor(0, 1, 0) 811 | 812 | try: 813 | assert_almost_equal(result, 0.0) 814 | except AssertionError: 815 | return False 816 | 817 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 818 | click = False 819 | purchase = False 820 | factor = Factor() 821 | factor.cinit( 822 | r, 823 | last_r, 824 | click, 825 | purchase, 826 | alpha_params[query][doc], 827 | sigma_params[query][doc], 828 | gamma, 829 | cr_dict[doc], 830 | &e_r_vector_given_CP, 831 | &cp_vector_given_e 832 | ) 833 | result = factor.compute_factor(0, 1, 1) 834 | 835 | try: 836 | assert_almost_equal(result, 0.0) 837 | except AssertionError: 838 | return False 839 | 840 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 841 | click = True 842 | purchase = False 843 | factor = Factor() 844 | factor.cinit( 845 | r, 846 | last_r, 847 | click, 848 | purchase, 849 | alpha_params[query][doc], 850 | sigma_params[query][doc], 851 | gamma, 852 | cr_dict[doc], 853 | &e_r_vector_given_CP, 854 | &cp_vector_given_e 855 | ) 856 | result = factor.compute_factor(0, 1, 1) 857 | 858 | try: 859 | assert_almost_equal(result, 0.0) 860 | except AssertionError: 861 | return False 862 | 863 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 864 | click = True 865 | purchase = True 866 | factor = Factor() 867 | factor.cinit( 868 | r, 869 | last_r, 870 | click, 871 | purchase, 872 | alpha_params[query][doc], 873 | sigma_params[query][doc], 874 | gamma, 875 | cr_dict[doc], 876 | &e_r_vector_given_CP, 877 | &cp_vector_given_e 878 | ) 879 | result = factor.compute_factor(0, 1, 1) 880 | 881 | try: 882 | assert_almost_equal(result, 0.0) 883 | except AssertionError: 884 | return False 885 | 886 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 887 | click = True 888 | purchase = False 889 | factor = Factor() 890 | factor.cinit( 891 | r, 892 | last_r, 893 | click, 894 | purchase, 895 | alpha_params[query][doc], 896 | sigma_params[query][doc], 897 | gamma, 898 | cr_dict[doc], 899 | &e_r_vector_given_CP, 900 | &cp_vector_given_e 901 | ) 902 | result = factor.compute_factor(1, 0, 0) 903 | 904 | try: 905 | assert_almost_equal(result, 0.02592) 906 | except AssertionError: 907 | return False 908 | 909 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 910 | click = True 911 | purchase = True 912 | factor = Factor() 913 | factor.cinit( 914 | r, 915 | last_r, 916 | click, 917 | purchase, 918 | alpha_params[query][doc], 919 | sigma_params[query][doc], 920 | gamma, 921 | cr_dict[doc], 922 | &e_r_vector_given_CP, 923 | &cp_vector_given_e 924 | ) 925 | result = factor.compute_factor(1, 0, 0) 926 | 927 | try: 928 | assert_almost_equal(result, 0.0) 929 | except AssertionError: 930 | return False 931 | 932 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 933 | click = False 934 | purchase = False 935 | factor = Factor() 936 | factor.cinit( 937 | r, 938 | last_r, 939 | click, 940 | purchase, 941 | alpha_params[query][doc], 942 | sigma_params[query][doc], 943 | gamma, 944 | cr_dict[doc], 945 | &e_r_vector_given_CP, 946 | &cp_vector_given_e 947 | ) 948 | result = factor.compute_factor(1, 0, 1) 949 | 950 | try: 951 | assert_almost_equal(result, 0.02016) 952 | except AssertionError: 953 | return False 954 | 955 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 956 | click = True 957 | purchase = False 958 | factor = Factor() 959 | factor.cinit( 960 | r, 961 | last_r, 962 | click, 963 | purchase, 964 | alpha_params[query][doc], 965 | sigma_params[query][doc], 966 | gamma, 967 | cr_dict[doc], 968 | &e_r_vector_given_CP, 969 | &cp_vector_given_e 970 | ) 971 | result = factor.compute_factor(1, 0, 1) 972 | 973 | try: 974 | assert_almost_equal(result, 0.012096) 975 | except AssertionError: 976 | return False 977 | 978 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 979 | click = True 980 | purchase = True 981 | factor = Factor() 982 | factor.cinit( 983 | r, 984 | last_r, 985 | click, 986 | purchase, 987 | alpha_params[query][doc], 988 | sigma_params[query][doc], 989 | gamma, 990 | cr_dict[doc], 991 | &e_r_vector_given_CP, 992 | &cp_vector_given_e 993 | ) 994 | result = factor.compute_factor(1, 0, 1) 995 | 996 | try: 997 | assert_almost_equal(result, 0.0) 998 | except AssertionError: 999 | return False 1000 | 1001 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 1002 | click = False 1003 | purchase = False 1004 | factor = Factor() 1005 | factor.cinit( 1006 | r, 1007 | last_r, 1008 | click, 1009 | purchase, 1010 | alpha_params[query][doc], 1011 | sigma_params[query][doc], 1012 | gamma, 1013 | cr_dict[doc], 1014 | &e_r_vector_given_CP, 1015 | &cp_vector_given_e 1016 | ) 1017 | result = factor.compute_factor(1, 1, 0) 1018 | 1019 | try: 1020 | assert_almost_equal(result, 0.0) 1021 | except AssertionError: 1022 | return False 1023 | 1024 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 1025 | click = True 1026 | purchase = False 1027 | factor = Factor() 1028 | factor.cinit( 1029 | r, 1030 | last_r, 1031 | click, 1032 | purchase, 1033 | alpha_params[query][doc], 1034 | sigma_params[query][doc], 1035 | gamma, 1036 | cr_dict[doc], 1037 | &e_r_vector_given_CP, 1038 | &cp_vector_given_e 1039 | ) 1040 | result = factor.compute_factor(1, 1, 0) 1041 | 1042 | try: 1043 | assert_almost_equal(result, 0.01728) 1044 | except AssertionError: 1045 | return False 1046 | 1047 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 1048 | click = True 1049 | purchase = True 1050 | factor = Factor() 1051 | factor.cinit( 1052 | r, 1053 | last_r, 1054 | click, 1055 | purchase, 1056 | alpha_params[query][doc], 1057 | sigma_params[query][doc], 1058 | gamma, 1059 | cr_dict[doc], 1060 | &e_r_vector_given_CP, 1061 | &cp_vector_given_e 1062 | ) 1063 | result = factor.compute_factor(1, 1, 0) 1064 | 1065 | try: 1066 | assert_almost_equal(result, 0.00192) 1067 | except AssertionError: 1068 | return False 1069 | 1070 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 1071 | click = False 1072 | purchase = False 1073 | factor = Factor() 1074 | factor.cinit( 1075 | r, 1076 | last_r, 1077 | click, 1078 | purchase, 1079 | alpha_params[query][doc], 1080 | sigma_params[query][doc], 1081 | gamma, 1082 | cr_dict[doc], 1083 | &e_r_vector_given_CP, 1084 | &cp_vector_given_e 1085 | ) 1086 | result = factor.compute_factor(1, 1, 1) 1087 | 1088 | try: 1089 | assert_almost_equal(result, 0) 1090 | except AssertionError: 1091 | return False 1092 | 1093 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 1094 | click = True 1095 | purchase = False 1096 | factor = Factor() 1097 | factor.cinit( 1098 | r, 1099 | last_r, 1100 | click, 1101 | purchase, 1102 | alpha_params[query][doc], 1103 | sigma_params[query][doc], 1104 | gamma, 1105 | cr_dict[doc], 1106 | &e_r_vector_given_CP, 1107 | &cp_vector_given_e 1108 | ) 1109 | result = factor.compute_factor(1, 1, 1) 1110 | 1111 | try: 1112 | assert_almost_equal(result, 0.008064) 1113 | except AssertionError: 1114 | return False 1115 | 1116 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 1117 | click = True 1118 | purchase = True 1119 | factor = Factor() 1120 | factor.cinit( 1121 | r, 1122 | last_r, 1123 | click, 1124 | purchase, 1125 | alpha_params[query][doc], 1126 | sigma_params[query][doc], 1127 | gamma, 1128 | cr_dict[doc], 1129 | &e_r_vector_given_CP, 1130 | &cp_vector_given_e 1131 | ) 1132 | result = factor.compute_factor(1, 1, 1) 1133 | 1134 | try: 1135 | assert_almost_equal(result, 0) 1136 | except AssertionError: 1137 | return False 1138 | 1139 | return True 1140 | 1141 | cdef bint test_compute_factor_last_click_higher_than_r(): 1142 | cdef: 1143 | float result 1144 | int r = 0 1145 | int last_r = 1 1146 | vector[float] cp_vector_given_e = [0.2] 1147 | vector[float] e_r_vector_given_CP = [0.4] 1148 | unordered_map[string, float] cr_dict 1149 | DBNModel model = DBNModel() 1150 | dbn_param alpha_params 1151 | dbn_param sigma_params 1152 | float gamma 1153 | string query = b'query' 1154 | bint click = False 1155 | bint purchase = True 1156 | string doc = b'doc0' 1157 | Factor factor 1158 | 1159 | cr_dict[doc] = 0.1 1160 | alpha_params[query][doc] = 0.4 1161 | sigma_params[query][doc] = 0.4 1162 | gamma = 0.7 1163 | 1164 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 1165 | click = False 1166 | purchase = False 1167 | factor = Factor() 1168 | factor.cinit( 1169 | r, 1170 | last_r, 1171 | click, 1172 | purchase, 1173 | alpha_params[query][doc], 1174 | sigma_params[query][doc], 1175 | gamma, 1176 | cr_dict[doc], 1177 | &e_r_vector_given_CP, 1178 | &cp_vector_given_e 1179 | ) 1180 | result = factor.compute_factor(0, 0, 0) 1181 | 1182 | try: 1183 | assert_almost_equal(result, 0) 1184 | except AssertionError: 1185 | return False 1186 | 1187 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 1188 | click = True 1189 | purchase = False 1190 | factor = Factor() 1191 | factor.cinit( 1192 | r, 1193 | last_r, 1194 | click, 1195 | purchase, 1196 | alpha_params[query][doc], 1197 | sigma_params[query][doc], 1198 | gamma, 1199 | cr_dict[doc], 1200 | &e_r_vector_given_CP, 1201 | &cp_vector_given_e 1202 | ) 1203 | result = factor.compute_factor(0, 0, 0) 1204 | 1205 | try: 1206 | assert_almost_equal(result, 0) 1207 | except AssertionError: 1208 | return False 1209 | 1210 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 1211 | click = True 1212 | purchase = True 1213 | factor = Factor() 1214 | factor.cinit( 1215 | r, 1216 | last_r, 1217 | click, 1218 | purchase, 1219 | alpha_params[query][doc], 1220 | sigma_params[query][doc], 1221 | gamma, 1222 | cr_dict[doc], 1223 | &e_r_vector_given_CP, 1224 | &cp_vector_given_e 1225 | ) 1226 | result = factor.compute_factor(0, 0, 0) 1227 | 1228 | try: 1229 | assert_almost_equal(result, 0) 1230 | except AssertionError: 1231 | return False 1232 | 1233 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 1234 | click = False 1235 | purchase = False 1236 | factor = Factor() 1237 | factor.cinit( 1238 | r, 1239 | last_r, 1240 | click, 1241 | purchase, 1242 | alpha_params[query][doc], 1243 | sigma_params[query][doc], 1244 | gamma, 1245 | cr_dict[doc], 1246 | &e_r_vector_given_CP, 1247 | &cp_vector_given_e 1248 | ) 1249 | result = factor.compute_factor(0, 0, 1) 1250 | 1251 | try: 1252 | assert_almost_equal(result, 0) 1253 | except AssertionError: 1254 | return False 1255 | 1256 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 1257 | click = True 1258 | purchase = False 1259 | factor = Factor() 1260 | factor.cinit( 1261 | r, 1262 | last_r, 1263 | click, 1264 | purchase, 1265 | alpha_params[query][doc], 1266 | sigma_params[query][doc], 1267 | gamma, 1268 | cr_dict[doc], 1269 | &e_r_vector_given_CP, 1270 | &cp_vector_given_e 1271 | ) 1272 | result = factor.compute_factor(0, 0, 1) 1273 | 1274 | try: 1275 | assert_almost_equal(result, 0) 1276 | except AssertionError: 1277 | return False 1278 | 1279 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 1280 | click = True 1281 | purchase = True 1282 | factor = Factor() 1283 | factor.cinit( 1284 | r, 1285 | last_r, 1286 | click, 1287 | purchase, 1288 | alpha_params[query][doc], 1289 | sigma_params[query][doc], 1290 | gamma, 1291 | cr_dict[doc], 1292 | &e_r_vector_given_CP, 1293 | &cp_vector_given_e 1294 | ) 1295 | result = factor.compute_factor(0, 0, 1) 1296 | 1297 | try: 1298 | assert_almost_equal(result, 0) 1299 | except AssertionError: 1300 | return False 1301 | 1302 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 1303 | click = False 1304 | purchase = False 1305 | factor = Factor() 1306 | factor.cinit( 1307 | r, 1308 | last_r, 1309 | click, 1310 | purchase, 1311 | alpha_params[query][doc], 1312 | sigma_params[query][doc], 1313 | gamma, 1314 | cr_dict[doc], 1315 | &e_r_vector_given_CP, 1316 | &cp_vector_given_e 1317 | ) 1318 | result = factor.compute_factor(0, 1, 0) 1319 | 1320 | try: 1321 | assert_almost_equal(result, 0) 1322 | except AssertionError: 1323 | return False 1324 | 1325 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 1326 | click = False 1327 | purchase = False 1328 | factor = Factor() 1329 | factor.cinit( 1330 | r, 1331 | last_r, 1332 | click, 1333 | purchase, 1334 | alpha_params[query][doc], 1335 | sigma_params[query][doc], 1336 | gamma, 1337 | cr_dict[doc], 1338 | &e_r_vector_given_CP, 1339 | &cp_vector_given_e 1340 | ) 1341 | result = factor.compute_factor(0, 1, 0) 1342 | 1343 | try: 1344 | assert_almost_equal(result, 0) 1345 | except AssertionError: 1346 | return False 1347 | 1348 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 1349 | click = True 1350 | purchase = True 1351 | factor = Factor() 1352 | factor.cinit( 1353 | r, 1354 | last_r, 1355 | click, 1356 | purchase, 1357 | alpha_params[query][doc], 1358 | sigma_params[query][doc], 1359 | gamma, 1360 | cr_dict[doc], 1361 | &e_r_vector_given_CP, 1362 | &cp_vector_given_e 1363 | ) 1364 | result = factor.compute_factor(0, 1, 0) 1365 | 1366 | try: 1367 | assert_almost_equal(result, 0) 1368 | except AssertionError: 1369 | return False 1370 | 1371 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 1372 | click = False 1373 | purchase = False 1374 | factor = Factor() 1375 | factor.cinit( 1376 | r, 1377 | last_r, 1378 | click, 1379 | purchase, 1380 | alpha_params[query][doc], 1381 | sigma_params[query][doc], 1382 | gamma, 1383 | cr_dict[doc], 1384 | &e_r_vector_given_CP, 1385 | &cp_vector_given_e 1386 | ) 1387 | result = factor.compute_factor(0, 1, 1) 1388 | 1389 | try: 1390 | assert_almost_equal(result, 0) 1391 | except AssertionError: 1392 | return False 1393 | 1394 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 1395 | click = True 1396 | purchase = False 1397 | factor = Factor() 1398 | factor.cinit( 1399 | r, 1400 | last_r, 1401 | click, 1402 | purchase, 1403 | alpha_params[query][doc], 1404 | sigma_params[query][doc], 1405 | gamma, 1406 | cr_dict[doc], 1407 | &e_r_vector_given_CP, 1408 | &cp_vector_given_e 1409 | ) 1410 | result = factor.compute_factor(0, 1, 1) 1411 | 1412 | try: 1413 | assert_almost_equal(result, 0) 1414 | except AssertionError: 1415 | return False 1416 | 1417 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 1418 | click = True 1419 | purchase = True 1420 | factor = Factor() 1421 | factor.cinit( 1422 | r, 1423 | last_r, 1424 | click, 1425 | purchase, 1426 | alpha_params[query][doc], 1427 | sigma_params[query][doc], 1428 | gamma, 1429 | cr_dict[doc], 1430 | &e_r_vector_given_CP, 1431 | &cp_vector_given_e 1432 | ) 1433 | result = factor.compute_factor(0, 1, 1) 1434 | 1435 | try: 1436 | assert_almost_equal(result, 0) 1437 | except AssertionError: 1438 | return False 1439 | 1440 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 1441 | click = False 1442 | purchase = False 1443 | factor = Factor() 1444 | factor.cinit( 1445 | r, 1446 | last_r, 1447 | click, 1448 | purchase, 1449 | alpha_params[query][doc], 1450 | sigma_params[query][doc], 1451 | gamma, 1452 | cr_dict[doc], 1453 | &e_r_vector_given_CP, 1454 | &cp_vector_given_e 1455 | ) 1456 | result = factor.compute_factor(1, 0, 0) 1457 | 1458 | try: 1459 | assert_almost_equal(result, 0) 1460 | except AssertionError: 1461 | return False 1462 | 1463 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 1464 | click = True 1465 | purchase = False 1466 | factor = Factor() 1467 | factor.cinit( 1468 | r, 1469 | last_r, 1470 | click, 1471 | purchase, 1472 | alpha_params[query][doc], 1473 | sigma_params[query][doc], 1474 | gamma, 1475 | cr_dict[doc], 1476 | &e_r_vector_given_CP, 1477 | &cp_vector_given_e 1478 | ) 1479 | result = factor.compute_factor(1, 0, 0) 1480 | 1481 | try: 1482 | assert_almost_equal(result, 0) 1483 | except AssertionError: 1484 | return False 1485 | 1486 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 1487 | click = True 1488 | purchase = True 1489 | factor = Factor() 1490 | factor.cinit( 1491 | r, 1492 | last_r, 1493 | click, 1494 | purchase, 1495 | alpha_params[query][doc], 1496 | sigma_params[query][doc], 1497 | gamma, 1498 | cr_dict[doc], 1499 | &e_r_vector_given_CP, 1500 | &cp_vector_given_e 1501 | ) 1502 | result = factor.compute_factor(1, 0, 0) 1503 | 1504 | try: 1505 | assert_almost_equal(result, 0) 1506 | except AssertionError: 1507 | return False 1508 | 1509 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 1510 | click = False 1511 | purchase = False 1512 | factor = Factor() 1513 | factor.cinit( 1514 | r, 1515 | last_r, 1516 | click, 1517 | purchase, 1518 | alpha_params[query][doc], 1519 | sigma_params[query][doc], 1520 | gamma, 1521 | cr_dict[doc], 1522 | &e_r_vector_given_CP, 1523 | &cp_vector_given_e 1524 | ) 1525 | result = factor.compute_factor(1, 0, 1) 1526 | 1527 | try: 1528 | assert_almost_equal(result, 0.02016) 1529 | except AssertionError: 1530 | return False 1531 | 1532 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 1533 | click = True 1534 | purchase = False 1535 | factor = Factor() 1536 | factor.cinit( 1537 | r, 1538 | last_r, 1539 | click, 1540 | purchase, 1541 | alpha_params[query][doc], 1542 | sigma_params[query][doc], 1543 | gamma, 1544 | cr_dict[doc], 1545 | &e_r_vector_given_CP, 1546 | &cp_vector_given_e 1547 | ) 1548 | result = factor.compute_factor(1, 0, 1) 1549 | 1550 | try: 1551 | assert_almost_equal(result, 0.012096) 1552 | except AssertionError: 1553 | return False 1554 | 1555 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 1556 | click = True 1557 | purchase = True 1558 | factor = Factor() 1559 | factor.cinit( 1560 | r, 1561 | last_r, 1562 | click, 1563 | purchase, 1564 | alpha_params[query][doc], 1565 | sigma_params[query][doc], 1566 | gamma, 1567 | cr_dict[doc], 1568 | &e_r_vector_given_CP, 1569 | &cp_vector_given_e 1570 | ) 1571 | result = factor.compute_factor(1, 0, 1) 1572 | 1573 | try: 1574 | assert_almost_equal(result, 0) 1575 | except AssertionError: 1576 | return False 1577 | 1578 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 1579 | click = False 1580 | purchase = False 1581 | factor = Factor() 1582 | factor.cinit( 1583 | r, 1584 | last_r, 1585 | click, 1586 | purchase, 1587 | alpha_params[query][doc], 1588 | sigma_params[query][doc], 1589 | gamma, 1590 | cr_dict[doc], 1591 | &e_r_vector_given_CP, 1592 | &cp_vector_given_e 1593 | ) 1594 | result = factor.compute_factor(1, 1, 0) 1595 | 1596 | try: 1597 | assert_almost_equal(result, 0) 1598 | except AssertionError: 1599 | return False 1600 | 1601 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 1602 | click = True 1603 | purchase = False 1604 | factor = Factor() 1605 | factor.cinit( 1606 | r, 1607 | last_r, 1608 | click, 1609 | purchase, 1610 | alpha_params[query][doc], 1611 | sigma_params[query][doc], 1612 | gamma, 1613 | cr_dict[doc], 1614 | &e_r_vector_given_CP, 1615 | &cp_vector_given_e 1616 | ) 1617 | result = factor.compute_factor(1, 1, 0) 1618 | 1619 | try: 1620 | assert_almost_equal(result, 0) 1621 | except AssertionError: 1622 | return False 1623 | 1624 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 1625 | click = True 1626 | purchase = True 1627 | factor = Factor() 1628 | factor.cinit( 1629 | r, 1630 | last_r, 1631 | click, 1632 | purchase, 1633 | alpha_params[query][doc], 1634 | sigma_params[query][doc], 1635 | gamma, 1636 | cr_dict[doc], 1637 | &e_r_vector_given_CP, 1638 | &cp_vector_given_e 1639 | ) 1640 | result = factor.compute_factor(1, 1, 0) 1641 | 1642 | try: 1643 | assert_almost_equal(result, 0) 1644 | except AssertionError: 1645 | return False 1646 | 1647 | # doc_data = {'doc': 'doc0', 'click': 0, 'purchase': 0} 1648 | click = False 1649 | purchase = False 1650 | factor = Factor() 1651 | factor.cinit( 1652 | r, 1653 | last_r, 1654 | click, 1655 | purchase, 1656 | alpha_params[query][doc], 1657 | sigma_params[query][doc], 1658 | gamma, 1659 | cr_dict[doc], 1660 | &e_r_vector_given_CP, 1661 | &cp_vector_given_e 1662 | ) 1663 | result = factor.compute_factor(1, 1, 1) 1664 | 1665 | try: 1666 | assert_almost_equal(result, 0) 1667 | except AssertionError: 1668 | return False 1669 | 1670 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 0} 1671 | click = True 1672 | purchase = False 1673 | factor = Factor() 1674 | factor.cinit( 1675 | r, 1676 | last_r, 1677 | click, 1678 | purchase, 1679 | alpha_params[query][doc], 1680 | sigma_params[query][doc], 1681 | gamma, 1682 | cr_dict[doc], 1683 | &e_r_vector_given_CP, 1684 | &cp_vector_given_e 1685 | ) 1686 | result = factor.compute_factor(1, 1, 1) 1687 | 1688 | try: 1689 | assert_almost_equal(result, 0.008064) 1690 | except AssertionError: 1691 | return False 1692 | 1693 | # doc_data = {'doc': 'doc0', 'click': 1, 'purchase': 1} 1694 | click = True 1695 | purchase = True 1696 | factor = Factor() 1697 | factor.cinit( 1698 | r, 1699 | last_r, 1700 | click, 1701 | purchase, 1702 | alpha_params[query][doc], 1703 | sigma_params[query][doc], 1704 | gamma, 1705 | cr_dict[doc], 1706 | &e_r_vector_given_CP, 1707 | &cp_vector_given_e 1708 | ) 1709 | result = factor.compute_factor(1, 1, 1) 1710 | 1711 | try: 1712 | assert_almost_equal(result, 0) 1713 | except AssertionError: 1714 | return False 1715 | 1716 | return True 1717 | 1718 | 1719 | cdef bint test_update_tmp_gamma(): 1720 | cdef: 1721 | DBNModel model = DBNModel() 1722 | int r = 0 1723 | int last_r = 0 1724 | char *s = b'{"doc": "doc0", "click": 1, "purchase": 0}' 1725 | json_object *doc_data = json_tokener_parse(s) 1726 | vector[float] cp_vector_given_e = [0.2] 1727 | vector[float] e_r_vector_given_CP = [0.4] 1728 | unordered_map[string, float] cr_dict 1729 | vector[float] tmp_gamma_param 1730 | unordered_map[string, vector[float]] tmp_alpha_param 1731 | dbn_param alpha_params 1732 | dbn_param sigma_params 1733 | string query = b'query' 1734 | float ESS_den = 0 1735 | float ESS_0 1736 | float ESS_1 1737 | int i 1738 | int j 1739 | int k 1740 | bint click = True 1741 | bint purchase = False 1742 | float alpha = 0.4 1743 | float sigma = 0.4 1744 | float gamma = 0.7 1745 | float cr = 0.1 1746 | 1747 | alpha_params[query][b'doc0'] = 0.4 1748 | sigma_params[query][b'doc0'] = 0.4 1749 | gamma = 0.7 1750 | 1751 | model.alpha_params = alpha_params 1752 | model.sigma_params = sigma_params 1753 | model.gamma_param = gamma 1754 | 1755 | cr_dict[b'doc0'] = 0.1 1756 | tmp_gamma_param = [0, 0] 1757 | 1758 | factor = Factor() 1759 | factor.cinit( 1760 | r, 1761 | last_r, 1762 | click, 1763 | purchase, 1764 | alpha, 1765 | sigma, 1766 | gamma, 1767 | cr, 1768 | &e_r_vector_given_CP, 1769 | &cp_vector_given_e 1770 | ) 1771 | 1772 | ESS_den = 0 1773 | for i in range(2): 1774 | for j in range(2): 1775 | for k in range(2): 1776 | ESS_den += factor.compute_factor(i, j, k) 1777 | 1778 | ESS_0 = 0.02592 / ESS_den 1779 | ESS_1 = 0.012096 / ESS_den 1780 | 1781 | model.update_tmp_gamma(r, last_r, doc_data, &query, &cp_vector_given_e, 1782 | &e_r_vector_given_CP, &cr_dict, &tmp_gamma_param) 1783 | 1784 | try: 1785 | assert_almost_equal(tmp_gamma_param[0], ESS_1) 1786 | assert_almost_equal(tmp_gamma_param[1], ESS_1 + ESS_0) 1787 | except AssertionError: 1788 | return False 1789 | 1790 | json_object_put(doc_data) 1791 | return True 1792 | 1793 | 1794 | cdef bint test_update_alpha_params(): 1795 | cdef: 1796 | DBNModel model = DBNModel() 1797 | unordered_map[string, vector[float]] tmp_alpha_param 1798 | string doc = b'doc0' 1799 | string query = b'query' 1800 | 1801 | tmp_alpha_param[doc] = [1, 2] 1802 | model.update_alpha_param(&query, &tmp_alpha_param) 1803 | if not model.alpha_params[query][doc] == 0.5: 1804 | return False 1805 | 1806 | return True 1807 | 1808 | 1809 | cdef bint test_update_sigma_params(): 1810 | cdef: 1811 | DBNModel model = DBNModel() 1812 | unordered_map[string, vector[float]] tmp_sigma_param 1813 | string doc = b'doc0' 1814 | string query = b'query' 1815 | 1816 | tmp_sigma_param[doc] = [1, 2] 1817 | model.update_sigma_param(&query, &tmp_sigma_param) 1818 | if not model.sigma_params[query][doc] == 0.5: 1819 | return False 1820 | 1821 | return True 1822 | 1823 | 1824 | cdef bint test_update_gamma_param(): 1825 | cdef: 1826 | DBNModel model = DBNModel() 1827 | vector[float] tmp_gamma_param 1828 | 1829 | tmp_gamma_param = [1, 2] 1830 | model.update_gamma_param(&tmp_gamma_param) 1831 | if not model.gamma_param == 0.5: 1832 | return False 1833 | 1834 | return True 1835 | 1836 | cdef bint test_export_judgments(): 1837 | cdef: 1838 | DBNModel model = DBNModel() 1839 | dbn_param alpha_params 1840 | dbn_param sigma_params 1841 | 1842 | alpha_params[b'query0'][b'doc0'] = 0.3 1843 | alpha_params[b'query0'][b'doc1'] = 0.4 1844 | alpha_params[b'query0'][b'doc2'] = 0.5 1845 | alpha_params[b'query1'][b'doc0'] = 0.6 1846 | 1847 | sigma_params[b'query0'][b'doc0'] = 0.3 1848 | sigma_params[b'query0'][b'doc1'] = 0.4 1849 | sigma_params[b'query0'][b'doc2'] = 0.5 1850 | sigma_params[b'query1'][b'doc0'] = 0.6 1851 | 1852 | model.alpha_params = alpha_params 1853 | model.sigma_params = sigma_params 1854 | 1855 | tmp_file = tempfile.NamedTemporaryFile() 1856 | model.export_judgments(tmp_file.name) 1857 | flag = False 1858 | for row in open(tmp_file.name): 1859 | result = ujson.loads(row) 1860 | try: 1861 | if 'query1' in result: 1862 | assert_almost_equal(result['query1']['doc0'], 0.36) 1863 | flag = True 1864 | else: 1865 | assert_almost_equal(result['query0']['doc0'], 0.09) 1866 | assert_almost_equal(result['query0']['doc1'], 0.16) 1867 | assert_almost_equal(result['query0']['doc2'], 0.25) 1868 | except AssertionError: 1869 | return False 1870 | if not flag: 1871 | return False 1872 | 1873 | tmp_file = tempfile.NamedTemporaryFile() 1874 | filename = tmp_file.name + '.gz' 1875 | model.export_judgments(filename) 1876 | flag = False 1877 | for row in gzip.GzipFile(filename, 'rb'): 1878 | result = ujson.loads(row) 1879 | try: 1880 | if 'query1' in result: 1881 | assert_almost_equal(result['query1']['doc0'], 0.36) 1882 | flag = True 1883 | else: 1884 | assert_almost_equal(result['query0']['doc0'], 0.09) 1885 | assert_almost_equal(result['query0']['doc1'], 0.16) 1886 | assert_almost_equal(result['query0']['doc2'], 0.25) 1887 | except AssertionError: 1888 | return False 1889 | if not flag: 1890 | return False 1891 | 1892 | return True 1893 | 1894 | 1895 | cdef bint test_not_null_converence(): 1896 | cdef: 1897 | DBNModel model = DBN() 1898 | 1899 | model.fit('tests/fixtures/null_test', iters=10) 1900 | return True 1901 | 1902 | 1903 | cdef bint test_long_list_null_converence(): 1904 | cdef: 1905 | DBNModel model = DBN() 1906 | 1907 | model.fit('tests/fixtures/eighty_skus', iters=10) 1908 | return True 1909 | 1910 | 1911 | cdef bint test_all_clicks_set(): 1912 | cdef: 1913 | DBNModel model = DBN() 1914 | 1915 | model.fit('tests/fixtures/all_clicks_set', iters=10) 1916 | return True 1917 | 1918 | 1919 | cpdef run_tests(): 1920 | assert test_get_search_context_string() 1921 | assert test_compute_cr() 1922 | assert test_get_param() 1923 | assert test_build_e_r_vector(&alpha_params, &sigma_params, &gamma_param) 1924 | assert test_build_X_r_vector(&alpha_params, &sigma_params, &gamma_param) 1925 | assert test_build_e_r_vector_given_CP(&alpha_params, &sigma_params, &gamma_param) 1926 | assert test_build_cp_p(&alpha_params) 1927 | assert test_build_CP_vector_given_e(&alpha_params, &sigma_params, &gamma_param) 1928 | assert test_get_last_r() 1929 | assert test_update_tmp_alpha(&alpha_params, &sigma_params, &gamma_param) 1930 | assert test_update_tmp_sigma(&alpha_params, &sigma_params, &gamma_param) 1931 | assert test_compute_factor_last_click_lower_than_r() 1932 | assert test_compute_factor_last_click_higher_than_r() 1933 | assert test_update_tmp_gamma() 1934 | assert test_update_alpha_params() 1935 | assert test_update_sigma_params() 1936 | assert test_update_gamma_param() 1937 | assert test_fit() 1938 | assert test_export_judgments() 1939 | assert test_not_null_converence() 1940 | assert test_long_list_null_converence() 1941 | assert test_all_clicks_set() 1942 | 1943 | 1944 | if __name__ == '__main__': 1945 | #assert test_get_search_context_string() 1946 | #assert test_compute_cr() 1947 | #assert test_get_param() 1948 | #assert test_build_e_r_vector(&alpha_params, &sigma_params, &gamma_param) 1949 | #assert test_build_X_r_vector(&alpha_params, &sigma_params, &gamma_param) 1950 | #assert test_build_e_r_vector_given_CP(&alpha_params, &sigma_params, &gamma_param) 1951 | #assert test_build_cp_p(&alpha_params) 1952 | #assert test_build_CP_vector_given_e(&alpha_params, &sigma_params, &gamma_param) 1953 | #assert test_get_last_r() 1954 | #assert test_update_tmp_alpha(&alpha_params, &sigma_params, &gamma_param) 1955 | #assert test_update_tmp_sigma(&alpha_params, &sigma_params, &gamma_param) 1956 | #assert test_compute_factor_last_click_lower_than_r() 1957 | #assert test_compute_factor_last_click_higher_than_r() 1958 | #assert test_update_tmp_gamma() 1959 | #assert test_update_alpha_params() 1960 | #assert test_update_sigma_params() 1961 | #assert test_update_gamma_param() 1962 | #assert test_fit() 1963 | #assert test_export_judgments() 1964 | #assert test_not_null_converence() 1965 | pass 1966 | -------------------------------------------------------------------------------- /notebooks/DBN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ClickModels: DBN" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "ClickModels is a field of study in Machine Learning that uses Probabilistic Graphical Models to model the interactions between users and a set of ranked items.\n", 15 | "\n", 16 | "One of the main uses of ClickModels is to train models from past observed data to evaluate how good each document probably is for each query, also known in literature as judgments' values.\n", 17 | "\n", 18 | "In order to compute the judgments for each document for each query, we rely on the [work](https://pdfs.semanticscholar.org/0b19/b37da5e438e6355418c726469f6a00473dc3.pdf) developed by Aleksandr et. al. where users interactions with each query result is modeled through a Dynamic Bayesian Network as depicted below\n", 19 | "\n", 20 | "

\n", 21 | " \n", 22 | "

" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "$E_r$ is a random variable that tracks whether a given document $u$ was examined at rank $r$ by the customer or not (this would be equivallent to the impression event from GA's dataset).\n", 30 | "\n", 31 | "$A_r$ is an indicator as to whether the customer found that given document attractive or not. When a sku is examined and it's attractive, then we have a Click event,represented by the observed variable $C_r$. \n", 32 | "\n", 33 | "Another observed variable is $P_r$ which represents the purchasing event. $P_r$ and $C_r$ both directly influence $S_ur$ which indicates whether customer is satisfied already or not.\n", 34 | "\n", 35 | "Case not, then it's considered that customers can continue examining through the result dataset with a $\\gamma$ probability factor." 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "Creating the DNB above is done through the following code:" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAuQAAAEHCAYAAADmnFPUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzdd5hV1dXH8e/vDChNQDSCGhtib4i9l1jQxKhvIBrfmGAjUUyiKWos0VdM1NgVUWIBNdFExZgYDZZYoqgoZWwhSNFYUYygwAAD96z3j30GxnEaM+fec+5lfZ6HR2bm3nvW4Mze6+6z9toyM5xzzjnnnHPZiLIOwDnnnHPOuVWZJ+TOOeecc85lyBNy55xzzjnnMuQJuXPOOeeccxnyhNw555xzzrkMeULunHPOOedchjwhd84555xzLkOekDvnnHPOOZchT8idc84555zLkCfkzjnnnHPOZcgTcuecc8455zLkCblzzjnnnHMZ8oTcOeecc865DHlC7pxzzjnnXIY8IXfOOeeccy5DnpA755xzzjmXoQ5pvZAkS+u1nHPOgZkpzdfzcdo559KXxljtK+TOOeecc85lKLUV8jppr+g459yqptgr2T5OO+dc+6U5VvsKuXPOOeeccxlKfYXcOVc6ktYDdgK2AroCnYCOwGJgEfApMAWoNrOarOJ0zrlVlSQBmxDG6n5AZ8JYHRHG6sXAh8Bk4A0zq80oVJchmaWz2l63bO+3Qp0rDklVwNeAvQgD+06E5HsS8DownzCwLwNWJwz66wADgK2BWcljJwKPmtmbJf4WXCsVazz1cdq54pPUGTgc2JUwTg8Aaghj77Tk74sAIyTmnYANksduArxBGKsnAX8zsw9L/C24VkpzTPWE3Lmck7QOcBLwQ+Aj4HFWDNbvWCt+iSWtDmxLGPB3Bo4EXgVuJAz4y4oTvWsLT8idKz+SNiWM00OAauCfJGO1mX3UytfoCvQnjNW7ERL7x4CRwD9bM9670vGE3LkKl9zi3BM4jTAgjwVuMrNJKb3+6sC3ktffCPgdcIuZzU7j9V37eELuXHlI7lweBgwjLHaMBkaZ2cyUXr8HcDxhrDZCYn6XmX2exuu79vGE3LkKJqkPcBOwHTACuMPM5hbxejsApwKDgIuBEWYWF+t6rmWekDuXf5K2IiTgVcANwH1mtqhI1xKwHyEx3wcYZmYPFONarvU8IXeuAiUD7neAa4BbgYvNbEkJr785YXIpACea2YxSXdt9kSfkzuVXsir+M+AXwK8IK+IlW8SQtBdhrJ4E/MjMPinVtd0XpTmmettD53IgWRV/ADgX+LqZnVfKZBwg2eS5L/Bn4EVJP5bkY4RzziWSVfHxwKHALmZ2U6nvKJrZeEKd+YfAq5KOLuX1XXH4ZOtcxpLVjmrgX8BOZjYxq1jMrGBm1xDq148B/i6pW1bxOOdcXkj6DvAscAdwsJm9nVUsZlZjZj8Fvg1cLunmZOXelSkvWXEuQ5IOBe4CjjezR7OOp75kcL+ZUMt+uJl9mnFIqwwvWXEuXyT9EDgfGGhmr2cdT33JosmDhHMnvut9zEvHS1acqwCSvgHcCRyVt2Qcwmo5MJSwIvS0pF4Zh+SccyUn6QxCvfi+eUvGAcxsAfANYDXgAUmrZRySawNPyJ3LgKT9gNuBI8zs+azjaUrS8/Ys4FHgES9fcc6tSiSdCJwJHGBms7KOpylmthgYTNiUf6eXr5QfL1lxrsQkbQM8BRxjZk9lHU9rJB1gfgdsCBzmbRGLy0tWnMuepMMJHa/2L5eTjSV1Ah4BXjOzn2QdT6XzkhXnypSkjoQNQeeVSzIOy1fKfwh0AU7POBznnCsqSWsRkvFjyiUZh+Ur5UcDR0k6OOt4XOv5CrlzJSTpPEJrwYHleASypM2A54E9vE958fgKuXPZkvQH4GMzOzPrWNpC0iHALcB2fqpn8fjBQM6VIUnbAU8SWhu+k3U8bZVscPofwm1cL10pAk/IncuOpKOAK4AdzKwm63jaStItQGxmP8g6lkrlJSvOlZmkVGUM8MtyTsYTNxDGjh9lHYhzzqUpKVUZCZxQzsl44mfAQC9dKQ++Qu5cCUg6ExhImZaqNJSUrrwAbG9mH2QdT6XxFXLnspGsKi80szMa+dpBQMPV5kvNbLKky5O/zytFnK2VnHUxCtjMzJZmHU+l8ZIV58pI0n5qBvBtM3u5na+VmwlB0k3Ah2Z2cSmutyrxhNy50pO0NmGs7mdmnzT42n3Apw3LPySNAmYCPzCzTUsW7EqQ9E/gejO7P+tYKo0n5M6VEUlfBy4ys13a+Tq5mhAkbU9or7WJr7ykyxNy50pP0i+AbcxsSIPPjwLmmdnZTTxvLnBvXmu1JR1DmB8OzDqWSpPmmNqh/eE451pwGqEmsc2SCWFWYxOCmf2gbkJozzVWlpm9Kukt4AjggVJe2znn0iQpAk4Fjm3w+YOAoS0kXBOBx4sYXnv9GbhW0lZmNjXrYFzjfFOnc0UkaRNgN+BP7XiNugmh0dWZRFYTwkjCGw7nnCtnhwKfAg3LCg8GnmjhubNa8ZjMmFktoaf6qVnH4prmCblzxfUD4I527tbP84TwALCtpC0zuLZzzqXlNGBkI5vuBwB9W3jupLxt5mzE74DvSuqWdSCucZ6QO1dch9OO1fFEbicEM1tCuB06sNTXds65NCRtaQ8E7mvky5OBvpJGSerZ2PPN7HfFjC8NZvYu8BqwR9axuMZ5DXkDybvHHYGdgZ2AzYHOyZ8qYHHyZw7hF3VS8uetSmhn59IjqTPQD3ilnS81GTgoqSM/u7HEO+MJYSKwf4bXd6ugpF/0TvX+bEwYpzsBIozTi4APWDFOT/I2na4RWwP/MbP5jXztUmAQMBQYKmkyYZHlCTObXMIY0zCR8LuS53r3VdYq32VFkghHmQ8Bdgc2JLyLrBvA/wUsJAzuBWB1woC/HmHlcidC8t4lefzfCCUKeb995YpM0u6EW6AD2vk6PQk/W3Wr5LmaECT1B+42s62zjqVSeJeVL5PUAfgGcBywC9ALmEL43ZhIaFe3KPkDYZzuDGzEFxP32uTx9xM6Yywu3Xfh8kjSicABZnZ8E1/vCfwSOIgw79d5wszK5tAdSccBR5vZ4KxjqRTe9jAFkroDxxPqxkRonP8UMLUtLdwk9SZMEscBhxEG+5vykDC5bEgaBvQ3s1NSeK3cTgjJ7d55QG8zW5BlLJXCE/IVJPUBTibsx3iHsDltPDDDzOKVfC0RFl12JyzC7AyMBm42s1kphu3KiKQbgelmdm0rHtuTMA7/kjAWn21mvy1yiK1Wd1ZFY0m3pC2Av5tZSyWQrpXSHFNXuRpySRtIGgm8TbjNfjqh7+h1ZvZqW/spm9lHZvY3MzsO2BJ4C3hQ0guSBicTgVu17ExYvWs3M5tnZmeb2U7AmsBgVpSynJXGNdoR21LgDaB/lnG4yiJpB0l/BKYCGwBHmNleZjbazN5c2WQcwIL/mNmfzOwwQj1tBLwk6RFJ+6X7Xbgy0eqxOhmL70/G4lnAMUWNrJUkDUgOhxtM03uOpgNrJ+VeLmdWmYRcwcmEJGYesK2ZDTazp9Ku/U6S898QfikuBS4A/ippvTSv43JvR8LPW6pyOiFM4osr9861iaTVJP0foc71RWBjM/uBmVWnfS0zm2FmPyck/PcBv5c00jtRrDqSk5S3A6obfH5QK57+BNDoRs+0JXuImmRmk5PWuE3WhydvYqfgiye5tEok5JI2BMYBPyTUiZ1bio09ZrbMzP5KePc9BaiWdLyvlq8yegEftfXJeZsQWvAx4ft1rs2S/QgvEWq9dzSza83ss2Jf18wWmdloQmLWCXhNkp9quGroDMSNbOhszUJHL3Lcf7wJHwO+Qp5DFZ2QJ6vipxBW7/4J7GFmr5c6DjOrNbNfEVrD/QJfLV9VdGbFBrO2KKcJYRHh+3VupUnqmKyKPwZcQyhPeb/UcSR3n04EhgF3SLrJV8srXmdC04aGDmqqzWH9x9B4q8Q8W0R40+lypmIT8uQY3BGEGvEDzezXba0PT0uywXNnwq2xFyVtlWU8rug6AUva8fxymhAW44O8awNJXYC/EjZa7mhmd2TdQtbMHiGslncDnpH0lSzjcUX1pXFa0gDCncdvN/WkpITkXjN7ot7nBkkaWn9fj6SekibV+3iopMuTmu9ByZ9SjuGL8cWTXKrIhDzp+nAnYUDd18xeyzik5ZLV8guA84EnJe2UdUyuaJbSxl7/ZTghdCR8v861mqQewKOEcx2+nsWqeFOS1rXfA/4O/FPSVzMOyRXHUsIZI/UdRLjzOFjS0PpfSMbT+wDM7Af1Pj+AsKdnIqEjUP3XmlXvMfcC/wXuM7P7CfuMPk3zG2qBj9U5VXEHAyUbNO4gdKIY2M4jy4vGzO6U9DnwiKRDzKy9h8e4/GnPqnH9CeELB/8kq+a3AJ82MSHMIqya/7beazWcEIYSJoRNJfUF2ts6sRON3/Z1rlFJKcg4QkLyo7Z0TSm2ZKX+fEmfAU9J2sfMZmcdl0tVYyvGawGDzWyepLMk1d8oOQ8YVX8hpO7zZjYr6XRyf73PH8OKjZbzktfcBbgcIGm3WT+Bp4kFkgGNfP7xNhwK14n2lVK6IqmohDzZLDmCcGjPYWaW6x86M3swWc3/u6T9zGx61jG5VC0E1mjjczOdENqgG1D0zXeuMkjqBDxIOHjt9KxLVFpiZlckMT+WjNVzs47JpaYG6CwpqntTmHQrIfn7b1mxuNGken3shxI2Jdc5CDi7wWMOApo8n6KJHuKj6i/AtEM3wvfscqaiEnLgJ8CuhE4quU7G65jZfclt20ck9TezhVnH5FIzDdgG+PfKPjHrCaENtiUc2OJca1xPeAM3NO/JeD2XEDZR35vc1SyXuF0zzKxW0nvA5rRhrK4vudu4fJyt/7Gkvsl/BxDubmZ1mve2tPP7dMVRMTXkkjYj1GUfY2afZx3PyjCzWwn9dn+TdSwuVRP5YmJcdC1NCMnnUp0QkjtTO5HSIUiuskk6FDgUOMHMClnH01pJAv4LoAfhTa+rHGmN1T1JygMTg5LXhhXnNNSVIxZLk+1nkwOBehEOCHI5UxEJeVI3Phq42MxmZB1PG/0EGCRp36wDcamZRIkTcrKZENYjbIp6N8XXdBUouRt4C3ByuS2cQDhbAjgB+LWkjbKOx6UmlbE66aQ2MdkoXzfOzks2htaNubtQhM5YWnFS59mEevNRDTekEuaAKXncr+Eqp2Tlx0BMqB8vS2b2qaRTgdsl7eClKxVhErCTJJXq9raZTZY0MTlUaB5hEtglGZjvTR62C9DsqW8raSdgkt/Cd61wJTDOzJo8TRCWdxCa1IYNa0VnZm9Iugq41UtXKsYkwona7dZInffgBl//Un14StedTNggfXYzD/M7mTmmtMYSSQZgZiU9hTIpVXkB2L2Yq+OlmiAk3UUoJ/hJMa/jSkPSh4QDqd7OOpZikXQxUGVm52UdS6Uo1nia1TidXPtQ4HfAds2tjielVTOB39bfS5EnkjoAzwO3mVmab25dBiT1At4GeuZ19biuBr2dr3E/8ICZ3Z1SWKu8NMfUSihZuQy4rMjJeF9CzeCmxbpGPT8BjkveaLjy9zzwtayDKLKvEb5P5xqV7DO4Bji1FaUqdQluS4diZaZB6UqXrONx7WNmnwLvE5pC5FIKyfhqwD6E/Wouh8o6IU8OajiAdG+/N6ZkE0QyMNxO+9vQuXy4Dfhh1kEUi6TtgY0Ih7s415T9CWWFf2/uQUnd7eTkw75FjqldzOwNwt3ZY7OOxaXidip4rAaOBt5ob2LviqesE3JC27a7zWx+sS6Q0QQxCvi+JD/etvw9CqyV9P2uRKcCv0tWDJ1rymnAyFbUWw8GLk3+3mS3iBwZCQxL7gC48jYaOErS2lkHUiTDCD+vLqfKNiFPDtQ5BbipyJcq+QSRvIN9iXCgiytjSVu3mwkJSUWR1J3wM3pL1rG4/JK0HqGs6fctPG4o4cCrunacuV4hTzxKOBW6Ut9wrzLM7BPgL4RSpIoiaTtCye1fso7FNa1sE3LgKGB6ctuwKDKeIEZSgUncKup2wsrLWlkHkrLjCUc3f5h1IC7XTgH+2MJGzp7ApkmnCAgdgnJbQ14n2QB4Ez5WV4obgVMllXNu1Ji6O5lLsw7ENa2cf+iKujqegwliHPAVSf1LeE1XBMnKy0OEQbEiJBuEfkTx71C5MpaUcrRmrB7KijuRkPTST8bhvKsrdeiedSCu3V4GPiUs+FUESesQ9jn4Sco5V5YJeXIQ0B4UdyNZphNEUurwOLBXKa7niu4i4AxJW2QdSErOJbSmeybrQFyubURor/taUw9IuljNa3By7KfJf3NfR5684X6d0h8C5lKW7HE4C7iuTN4MtsaNwK1m9n7WgbjmlWVCDmwOfGxmc4vx4jmaICYBO5fweq5Ikn0BFwGjkzeUZSu5a3MaMNQPRXEtaM1BJD9o5HyHuk4Q5VBHDj5WVwwze5JwR/OarGNpL0mDgW2BX2Udi2tZuSbkO7PiOPBiyMsEkcXR6654RgK1wBlZB9JWSanKA8CffMXFtcLONJOQSxpAuBPYUN1iSO5WKSUdJKnh0ec+VleWs4D9JR2edSBtJekrwA3ACWa2OOt4XMs6ZB1AGxXt+NecTRCvAf0kdTazRSW8risCM4slnQi8JOlvZjYt65ja4FygI3B60hJ0m7yebOdyYSfg+ma+fjmApIYncvZt8N/MJXPDMYQ5oGFcqR297rJnZgsknQTcIWm7BnfLy8WNwJ1m5gcBlYlyTsgfKtJr52aCMLMlkv4N7ICfrlURzGyWpAuBeyTt34pTC3ND0oGEUpUdCT+TDwMFSf3N7JVMg3O5k2zobHLxRNIg4Ox6G+cbfu0+oGSdiSSNMrMmD2RL4pycxNawPOXfwLqSepjZZ8WM05WGmT0p6a/A7ZIGJ/u6ykLSIW574PtZx+Jar1xLVrYGXk37RetNEAc3/APUJeilbl33KqEGzFWOkYQ+838pl8OfJO0K/BEYbGbvm9kjQN2R4dWSfAe/a6g3EDfTFnOXxpLxRFnVkCfJ2r8Ic5OrHD8F1gBGlcvhT5KOAS4EjvA76+WlXBPybkAxVhbzOEF8DnQt8TVdESUbIYcBs4H7JXXKOKRmJbfq/wqcaGbLu6qY2SIzE6He8iRJlrTYcg7CuNXoOC3pLL7Yxaqh3NaQN8PH6gpjZksIR85vQ+i8kuucSdKRhBKxw8xsetbxuJWT6x+uxiS/EB0Jm+PSfN28ThCLgFwnbG7lJStq3yNM4n+XtEbGITVK0j6EnvinmdnfGnuMmV0BbJB8+JGkium37tqlM2H8+oKki9VazdXlJl2JoJEFEEmDJA1Nxuy6z/WUNKnex0MlXS5pQPL4QY1sxEzbYnysrjhmtgA4jFCmdJukXJb6SjoeGAV83cxSryBwxVd2CTmh7r2QZru1UkwQ7bAUWC2F13E5k5ya9l1gGvB0nnqUK/geoaPKcWb2QHOPN7P3ktXy+4GRkhYmHVncqqsjsKz+J5Lezo8TDmBpjS+Mt8ndmlmELlv1670PYsVZEQOAe4H/AveZ2f3AZFa0ri0WH6srVJIbHAysBzyYpzuBkjpIugD4DXCgmRWzA50ronJMyJcCVWndOirFBNFOqxNWXlwFSlbKTyWc9jdB0tlZ9ymXtB6hROVnwEFm9kRrn2tmgwmHdnUBlkg6oDhRujKwhCRBTRYoHgfmEsbQW5I9O18i6XFJc+t9PFPS5cmH85KywmMIb/7qHMOK7lh1Z0jsQrJJ38xm1d+wKem+hn+Agxr5/NCV+H59rK5gZrYQ+CbhEKipSa12piRtS2j4sDewp5n9K+OQXDvk8tZLc8zMJC0hDH5t3rCQJOL3ERJnCBMEyWpKw8c+Tr1d9ZJmAveb2dmEwX9WMmE0NUG0R2d8kK9oyc/0Z4TNQ/8LfFPSiaVui5hsWjoeuJJw1Pm3zGylS8PM7EVJHYH/AE9KegwY6IcIrXIWEcav+iuMLUo20Tf1tbpFjqF8se/3QSQb7+s95iDglCZeZ3DDz7XUZaUVOuFjdaWrBWJgTeDXyZvKYWb2cSmDSMpmziacafFL4DYfX8tfOa6QQ6jn/kp7XsDM5iUdVJT8WbOxZDx57MHJ1+seu2mSjDecIEbVe9pBQKtXFpuxNmFVyVUoSacDdwIfEloK/hEYL+k8SSU5GVbh9M26VfFDzezCtiTjdcxsmZmtT2i7dQgQS9oknWhdmZgHrJX2RrikxHD52Fv/47q/J3cuPy1x/+i1WbHXyFWY5Of4JkIC/ASh+9ks4FVJJ5eiY1ZSSnggYVV8P2AnM7vVk/HKUK4JeTUhccmF1kwQ7TCA8P26CpMMrhcQbqvXAsPNrGBmNwC7EVqozZJ0u6TUTwGU1EnSdyW9ROjr/xyh09CUtK5hZncCdW8qZkm6OK3XdvlmZnMJiwmbpvzSPfliOeAgVpzcPCD5b1oLIo350pvkJBnrR2h96CpMsiJ9D+EOYi1wsZktThbmjiR0YnlH0pWS+hXh+j0lnUHod389MIKwcPJO2tdy2SnXhDxvxxS3ZoJYaZJ6EDaR/Lvtobk8SspDrgbOIam3JqySA2BmM83sf4HNgTeBsZJeknSCpA2S57fluqtJ2lHSZcA7hJaFmwE7mNnl7VkVb4qZzU02fF4DXKDQHjGXXWVc6lIfq5Ma8onJZvq6xHteUu9dl4TvQihJTE3SseVyQqnAAEmj6tWYbw9M8yPKK4+k1QkLFt8gjNUzgPF1XzezCWb2dWB3QjnLC5LGSfofhePr23rdLpL2lvQ74C3CXXgRxuoxvipeeZTW/1NJBpBMvEUl6Wjg5OSXIBckjSLUjM8j7Ob/ZfLxvW29bSppf+A3ZrZnWnG6fFA4rfMcVtSdXmVm5zfz+CpC662TCZsmRUh26v68DsxPXmtp8rqdgXUIbwp3IuyD2AZ4m9DK8GbCXoxXCT+nRd+kJGkrVqwiDm6qTGxVV6zxtJTjdHK984CeZvaLUlyvPdpTQy7pNGCAmZ2cclguY5LGAocTxtQFwElmdm8zj+8MDCa0td0F+IwV4/REQletGsIei5gwTncitI7dqd6fTYGpwIPALcD+hFX688zsNyl/m66N0hxTyzUh34Dwg92nkt8lSvoZsLGZ/SjrWFy6kpW9xwkDci2wUWs3BiWr4+sTEuy6wXsrwupNJ0K7ucWEAX8uoeVb3YRQnfTVrXutKsLkAHCsmf253d9c6+J/nrCi9G9gGzOLi33dclJBCfmhhNOPDyzF9dpDUt96e4JW9rm3ARPN7KaUw3IZS95s3Zh8OAdYz8yWNfOU+s+NCIl1w0S7LgmvIozTi4GP+OIiy2sWDiaqe62+wHTC3dTdvdd4PnhCHib0jwgbGt4t9vWyIulu4FEzuyPrWFy6FI6aPwmYAjxlZj/LMJZqYAfCIUWbm9lHJbru4cDDyYf9zeyVUly3HFRQQr424RZ/r0p+05X8Dp1iZq1tn+vKhEILzp6EO3tXmdntGcUhwl3QLoQS2W3qJ+wuG2mOqWVZQ56sij8B/E/WsRSLpK7AocBTWcfi0pV0gDgJOMvMBmSZjCeeBoywavOHttanrywze4QwuQBUJ29SXAUxs08IJVJfyziUopG0OWGvj7+hrDAKpw73BPY2s22ySsZhed5TTShXXBf4dVaxuOIoy4Q8cRNwWqmShwx8Bxjvu6grS1IiMgmWHzmfB+MJKy8dCQdMfKdUFzazRcnKwlnAScmGz9ycgudScRNwWtZBFNEPgduLsSHaZUdSb2Ak4cyR8S09vkSeBAqEhYwfS8pNtznXfuWckD9HqKXKfW3iykreZAxjRd2aqxxvJ/8tes/alfASoZ4RwibPmaUOIHlzskHy4UfJypSrDH8A9k32/lQUSV0Im/duzjoWl7rZ0PghUhl6EairXy8A/80wFpeysk3Ik9s3I6nMlZfdCKc2pnHSp8sJSUOArwIH5qw92jvAeYQd/ZjZhCyCMLP3ktXy+4GRkhZKWi2LWFx6kk3EfyC0bas0xwIvmNnbWQfi0iPpnuSv62UayJc9A1yS/L2T30GvLGW5qbPeNdcgHM+9vZm9V6rrFpukup6n25nZ61nH49ov2dw2B/i7mR2edTyNkdSJsOP/FDPLtJ5b0u7AC8mHB5rZKrWXolI2dda77laE2+0bVUppR3In8z9AD0q4GdoVl6Q9CF2gfmRmI7KOpzGStiO0q93MzGZkHc+qbJXvstLgutcCq5tZRdziTiau5wkbSep4Yl7msvr9WFmS5gE98hBncjreO4QNTI8BAyu5zWl9lZaQJ9d+DHgoOYm27En6BqFUZf3kU/8ldL7wxLxMSepIaENbY2Zds46nOcnv8hwz8z03GVrlu6w0cBHwDUllX0ueJCBjgHOT/7m7J196Ldnstm1mwbk2q9c9ZKNMA2md3QAkbZN1IGa2zMzWA74PHALEkjbJOCzXdj8CLpS0adaBtJekNQnJ+HeTsfoYYC1gtqRPkg2BrvzUnQWxZqZRtM5pwFe8rK9ylH1CnpyCORS4rQKO4/4p4SSwUbD8SF5PzMtYgxaHua/3M7NpyV9z00/ZzO4EeiUfzpJ0cZbxuLZJfrYuBW5PDkwpZ9cCD5rZ0wBmdq8n5uWtQYvD3JdV1TuE6neZBuJSU/YlK/Wufzuw2MzKcpNnUqryT2CXpjYISdqNsMu6jpey5FjS4nAZ5L9UpT5J3wPuALqY2aKs46lP0jXAGcmH3c1sfpbxFEsllqwk168CngXuzmt9bkuSUpXrCXuXFjTxmG8Df0o+9FKWnJPUB/iQ0OIwT11VmiXpL8A3y2l+qTReQ9749XsCrwFDzOwfWcTQVkmpynPAHa05etkT8/Ig6V1CV5XOOeuq0qLk93msmQ3KOpaGkjev/0o+HGxm92cZTzFUakKexLAFoff9bmZW8hab7ZGUqrxGKFV5uhWP98S8DOTh96ItkgMEFwDHmdk9LT3epc9ryBuRlK6cBPw+GfDLQrJT/yZgHkmpSku8lCX/ctzisLXuAb6VdRCNMbOphLHrReA+SVMroARilZGUrvwf8FdJa2UdT2slXdKszFQAACAASURBVIjGAve2JhkHL2UpBzlucdgiM1sILAXuzjoW134VNYmZ2WPAL4HHJG2YdTwtSZLx3wLbE1b64pV5vifm+ZS0OBwNPFLG7fpOBJB0StaBNMaCPYCvA1sCBUk7ZByWa70RwN+Av5fD3p+k+8afCJv+frGyz/fEPJ+SFofHAqeb2YdZx9NGOwFI6pd1IK59KqZkpUEsPwF+DBxsZrOyjqcxSTJ+KSGh2M/MPk3hNb2UJQfy9LvQHnlqgdgcSZ2BmuTD28zs5CzjSUMll6zUi6Xu7uB2wNeTu5y5I2l1wgpkJ+DoNDb8eSlL9uq1OFxoZt2yjqc9vAVidrxkpQVmdh1wFfDPPLRvayjZ2HQT8DVg/zSScfAV8zyo1+Iw93doWiE3LRCbY2aLkp/7s4CTkp95n5hyLukpfxowEXg6j6vFkroBDwEG/E9a3Td8xTwX6loc9mr2UeXBWyBWgIpMyAHMbCRwNmGgPylZjclc0kf5cWBz4Gtm9t+0r+GJeTYatDh8N+t42iuPLRCbY2ZXABskH36UtDFzOZaU6Z0BPAi8LOnQjENaTtIuwATgXeBYM1uS9jU8Mc9GubU4bIm3QKwMFVmyUp+k7QmH7XxEOBL8vYziiIAfAMMJdeNXm9myEl3bS1mKrFxbHLakXgvErmZW09Lj80LSfcAgQinLmuU26a4KJSsNSToYuJWwYPEzM/ssozhWBy4kvLk+E7jH0pooW762l7IUWbm2OGyJt0DMhpesrAQze5Vw6/15YIqkE0u9Wp6sij8BDAH2MbPflioZB18xL5G3k/92zjKItCWH8gDclWkgKymZaPcAugBLJB2QcUiuBWb2OKGevEAYo0q+Wi5pZ2ASsBWwg5ndXapkHHzFvEQ+hOVjRCU5DkDSd7IOxLVNxa+Q11dvtXwB4aS1vxYzMVY4IvqHwAmUeFW8OavairmkDQilDN2BrsBi4HNgDjCtvRNu0uJwNKHFYbl2VWmSpLuB77T0u5280d2aUJPZk7AJbj6hpee7ZvZ+sWNtJKYOwDvAusBjwMBSJlhttSqukNdXb7V8InAD8Ewx/79J2gkYRthkfwbwxzz8nKxKK+bJ+LEZsA7Qg/BmegFhDHnPmjgwbyWv8UfCm531yrirSpMk1QIdWzFWdwS2JYzTawAdWTFWzyxGKW2lSnNMXaUSclj+gzgYOBXYhFBzdUtav5xJ6cLhhE0WOxMStZvM7K00Xj9NlZiYJ62fvgUcWFVVtUOhUPgKECV3BKzu5kgcx0p+Vq2qquqzQqHwL8IJgg8CE1o7GSctDucQWhx+vRjfU9aS/suLgKFmdkvyOQF7A0dK2kfSlnEcd0++ZlEU1Y0HmJnMTJIKURR9XCgUpgD/IBw89J8SfQ91pTcAffP4+1jfqp6Qw/INld8njKUCRgJ3pVXKknTn+Xby+n2AmwlzwSdpvH6aKi0xT8aPAcDRkvaJomjbQqGwZvKl5WN1/fEDiKuqqv5bKBReBZ4hjB//au46Da65B+FO+elmdmMxvq+sSdoOeBXYzMxmJJ/rCAwEvh5F0R5AvziOuwBEUWT1xoT6Y/XSKIreLxQKEwklZGM9SW+cJ+QpUehbfCrhHfM/gKcItytfsVYeGZ4MLBsTeoHuTOhpOpswedzX2tfJUrkn5pK+Alwl6dtmtnqnTp0K6623nrbccstol112oX///nTo0OFLz4vjmFmzZvHCCy/w+uuv2zvvvFNYsGBBByCW9IyZnWlmr7Rw7bL7uW8LJS0Qgd0kXW1me0qiW7duhY033rhqm2220Z577skmm2zS6PNra2uZMmUKL7/8MtOmTSt8+OGHLFmypCqKokVxHN9F2Ahb1JphhVMW6zoaDTezXxXzeu3hCfkKyRi7LyFxPgT4M+Gkz0nAG2a2tJWvU0XYTL8ToXzvGMIK/EjCG+pC+tGnq9wTc0mbS7oWOMTMqrp27bpsgw02qNpmm2202267scUWWxBFX66kjeOY1157jQkTJjB16tT4/fffjxctWtRB0lIz+wtwRnN34LSixeECM8t93/v2SH7H5wBDoyj6TRzHW0VRZD169Cj07du3avvtt9eee+5Jnz59Gn1+TU0NEyZMYMqUKUybNm3ZnDlztHTp0qooiubGcTyCMHa26nduVeAJecok9SBsANuNMFhvBcwgDPj/AhYSyhyWEW7DdyLcAt8p+bM4eewk4G9mNqnE30Iqyi0xlzQwiqLfxnG8Xffu3ZcdeeSRHY466ihWW63tnZ/iOOa5557jD3/4Q+GDDz6oiqLogziOLwduaLhqLukW4GRgw0roqtKUJJG5LIqin8dxzIYbblj43ve+V7Xbbru163Vra2u59957efjhhwsLFiyIJE0ys5+b2TPpRN44SdcQyhIAupvZ/GJery08IW+cpHUJdzh3IYy9GwFvEBLrGYQ7OYsIbQo7E8bqjZPH9ie0uptIGKvHmtnM0n4H6SinxDx5Q3ViFEUXxHG80TrrrLPsmGOO6XDQQQc1mny31rJly3j44YcZO3bssrlz53aIoujfcRyfb2ZjG4lhLqE8Y7VKTiYldQX+GkXRgWZmW221lZ100knR5ptv3q7X/fzzz7njjjt4+umnC7W1tZL0RLJg1eo7FJXKE/IiS3bZb0cYxDcnDOydgQ6EwX4x8AlJEm5mszMKtSjynphLOiiKonvjOF5ziy22iE888cRo6623Tv06c+bM4dZbb7UJEyZYHMcFMzvPQmu9uhaHkwgru1ekfvGckHSxpF9WVVVFe++9t0488UStueaaqV+nurqa0aNHF2bNmlUVRdEncRwfaWbPp36hhKStCG+2IZySe3+xrtUWnpC3TlLW0p8wVm/CigWTiBXJ+QeE39XJZjY3o1CLIu+JuaQfSrpW0moDBgzg5JNP1vrrr5/6dWbMmMFtt90Wv/HGG5K0MI7j75vZA0kMpxLuguxtZuNTv3gOJIsmvweO6dy5c3zIIYdUffe736VTp06pX+vJJ5/knnvuKcyePbsqiqIZcRwfnEZ9f7nyhNyVRN4Sc0kdJd1vZt/cfffd4zPPPDPq0qVL0a8bxzF333039957r0maHsfxgcB7ULk/75L6RlH0FLDB9773PR199NHtWs1qrc8//5wrrrgirq6ujiTdbWbfLdbmumTl7nlC+cK/CclMXIxrrSxPyN3KyFtiLmmtKIqeMLP+RxxxBCeccEKjZYNpW7x4MSNHjrSnnnpKkp4ysxOBt6iwFof1JXt4/taxY8duZ555ZrT33nuX5Loffvghw4cPL7z77rsilLFcVJIL54wn5K6k8pCYSzpI0oOrr7565/POOy/q379/KS8PhBXzc889tzB79uyIsMmss5ktLnkgRSbpYuD8DTfcMP71r39d1bNnz5LH8OKLL3LFFVfES5cunW9mhxd5tfxw4OHkw/4t7RsoBU/IXVvkITGXNFTSjb169dIll1xS9dWvfrWUlwdg2rRpXHjhhYWampqo3qbQilJvVfzYnXbaKT733HOj9pRrttWDDz7I6NGjDfhPHMf7l2qjfl54Qu4ykVViHkXRn8zs23vssYedddZZKsVKS3P++Mc/cvfdd5ukN+M43iWPNchtkaxqTQI2PPHEE3XkkUdmGk9tbS3Dhw+vWy2/LY7jk4t1raTjRt3BR7eZWdGu1cp4PCF3bZZFYi5p9SiKnjezAUcffTQnnHBCMS/XojiOue666+zJJ59UFEWPxXFcFi1PW0PSdpKe69ixY7ezzz472nXXXTONZ+7cuZx33nl1q+Vnm9mVmQZUQp6Qu0yVKjGXpCiKXpS0y0UXXaQsVsWbMmfOHM4888zC/Pnz58dxvLmZzck6pvaQtJGkN9Zee+1OV199dSar4k158cUXufTSSw14Io7jQ4s5qUr6BeHMAIDeZvZxsa7VQhyekLt2K1ViLmmNKIqmd+rU6StXXXVVlMWqeFOmTZvGL3/5y7hQKLwZx/G25dBNpzlJicpTW265pS655JJMVsWbMnbsWMaMGQPhzJWfZRxOSXhC7nKhmIm5pKooil6vqqra/Prrr8/VAF+ntraW0047rfDxxx8vNrMtmmu7lWdJK7JXN9hggw7XXXddVdZ3IBozY8YMfv7zn5uZTYrjeNciJ+VfBeq65pxmZjcV61rNxOAJuUtNMRNzSWtGUTSre/fua4waNaqqFPt6VtYnn3zCaaedFi9ZsuT9OI43M7MlWcfUFpIOA/621157cc455+TypPV//vOfXHnllQaMjuP4pKzjKTZPyF2upJ2YJyvjUzp27LjdzTffHK299trtD7JIli1bxo9+9KPC+++/X2NmG5VbFwdJ60ua0a9fv45XXnllVSk2brbV+++/z+mnnx7HcfxcoVDYr9jXk3QfoR1qDbCmmdUW+5r1ru0JuUtd2om5pK5RFP2nV69ePUeNGlWVp9Xahj7//HOGDh1aWLRo0TtJUl5WK+WS9gaeOfjgg/XjH/8417+/kydP5qKLLjIzu97Mzmj5GeUrzTE1v7OvKxtmNiH5Ydw9+dRrCietbduW14ui6KkoirYbMWJErpNxgA4dOnDDDTdUrbXWWl2iKJqe1CKXBUk9JE1df/31c5+MA6y//vpcc801EbBPFEV/Lvb1kq4MexCO8F4i6YBiX9O5YjKze5Ox+hhgLWC2pE8k9V7Z10ruYk7r1q1bz5tuuinXyThA9+7dufnmm6s6duy4UbJXpmxI2kbS03vssQd5T8YBBgwYwDnnnCPgJ5JyewBb3uR7BnZlpTWJuaRm6yEk/cLM9r366qujpk4Sy5sOHTpw0003VXXq1GnNKIr+nnU8rRVF0dM9evTocsMNN+Q+Ga+z8cYbc+mll8rMjpJ0YrGvZ2YvAh2BD4EnJT2atEt0rmy1JjFvxVj9pw4dOqw7atSoqmL0uy6Gnj17cuONN0bA9pLKYuNhcsf42c0331znnntueQzUwJ577smwYcMALpK0Y9bxlIOy+Z/rykczifnFwAJJ32/seZLWBy477rjj1NQR7HnVqVMnhg8fHsVxvJ+k3Pe7lfRDM9vh0ksvzWXNeHO23nprvvnNbyJplMIpu0VlZsvMbD3g+4Sj22NJ5fUD6lwjmknMfwf8t6m7QpIOMLNvnXfeeVG3bt1KGXK79e7dm1NPPVXATyVtmXU8rXB7VVVVj0suuaTs8rWBAwey9dZbWxRFj/tCRsvK7n+wKx+NJOYXAKsDIyWNVjgRdbkoip7s06ePHXvssaUONRWbb745X/va10zSXXkuXZG0lqQbjj76aOVxs2xrnHTSSfTo0UNRFD1Rqmua2Z1Ar+TDWckbTOfKXiOJ+SlAd+BhSefWT6YkVUn66y677BIPGDAgo4jbZ+DAgfTt2zeOougfWcfSHEm7AkN++tOfRuVyF6KhCy+8MIqiaE3g5qxjyTtPyF3RmdkE4BvAsuRTXQgD/yRJG8LyUpXNfvOb31RlFGYqfvzjH6tr164d8ly6EkXRE7169VLWfYLbI4oiLrnkkqo4jncuRelKHTObmyQu1wIXJHd+1ijV9Z0rJjO7FziXFWN15+TjRyR1h1Cqstpqq3XJa5eP1ho+fHiVpHXzWrqSlKqM22677QqlOn2zGLp06cKZZ54ZAad46UrzvMuKKwlJawI/B44GNgWWAN0IHSyGAH86/vjjo29/+9uZxZiWN998k5/97GcAg8xsbNbx1CfpFEmjRo4cWbar4/XdcsstPPTQQ8vMrFepD2iStBXwr+TDwWZ2f4qv7V1WXCYk9QV+ChwB9CYk512A9wnJ+Z3/93//R2tWx8ePH8+zzz7L7NmzWWONNZg/fz59+vRhyJAh9OnThwULFjBmzBhOP/30In5HTRs3bhw33nijAVua2ZuZBNEESbd17NhxyN133718dXzEiBG88sorzJ49G4BNN92UNdZYsR4wf34YAvfZZx++9a1vlT7oZpx99tnxv//9708LhcJXso4lTd720JU1Sb2AA4EjgYHAWj179ozvuuuu1FfHs5oQhg8fbhMnTny/UChskOoLt1NVVdV/999//15nnnnmSj83r5PBMcccU6ipqRllZsNKdtFEciv/eUJZ1r8JbeTiFF7XE3KXuWRfz0HA/wAHRFHUdYsttuC3v/1ts6vj1dXV3HjjjWy66aYMGjSIfv36Lf/aggULuPzyyxk2bNjyxwwZMqSo30dzTjnllMJHH330TBzHX8ssiAYkdQQWDRs2rGrgwIFf+voRRxxBnz59uOWWW770tdmzZ3PGGWfQp08frr322hJE2zo1NTUce+yxZmbfNbO7s44nLWmOqeW1m8tVBDP7FLgfuF9B7aBBg1L9WVzZCSFtQ4cO1UsvvfRVSduY2RupX6ANJO0H9DrppLad1VD3pqVuMmhssK+bDJ599tmSTQaHHXZY1Z///OchQMkT8uSAoj0kHQ48DBQk9TezV0odi3NpSw47uwO4Q9J6cRy/P3To0GafM2LECB599FGGDx9OY6crd+vWjeHDh3PssceycOHCzFdyjz/++Korrrhif0mr5+jAoF927NhRhxxyyJe+MGPGDAB22GGHRp/Yp08fBg8ezJgxYxg3bhyNJfRZ6NKlC9tvv7299tpr/wdUTEKeprKuAXMV4UdRFEVHHHFEai84YsQILrjgAoYNG8Y555zzhWQcVkwIZ5xxBtXV1Y1OGu3Vu3dv1l133YKk3CxRRFF0Zd++fQvdu3dv82u0djKYOXMm48aNa/N1VsZxxx2HmXWWdExJLtgIM3uEcFsfoFrSrVnF4lyRXNmzZ89Cw/G0vssuu4xHH32Ua665psVxtS5RLMb4uzL23XdfVlttNQMuyjSQeqIo+sm+++4bNdaO9pVXwnv9HXdsuhy7a9euQFiYypOTTz45iuO4n6SNso4ljzwhd5mKoujs3XbbTWn1wc7ThPCd73ynyswOTG4/ZkpSjziOdzrhhBPaVRaUx8lgtdVWY9ttt7UoijLtemJmi5LblmcBJyUbPtfJMibn0pDcyRx09NFHNzl+jBkzhvHjxzNs2LAvLYI0ZrPNNivK3cm2OPDAA6uiKDo16zgAJO0Tx3GvpjbdT58+HWh6UQRWLJzk7SyPjTfemLXWWqsAXJN1LHnkCbnLjKQd4jhe7+STT06lnjVvE8IBBxxQt/JyYdEu0nqXd+3aNW7vm4+8TgannHJKFMfx5pIyr9k3syuAujg+kpSLid65djhNUtVRRx3V6BdnzJjB2LFj2XTTTVtdItG1a9fMV8frfP/73yeO4x6SvlwjUmKSrtpkk00KPXo0fsRCdXU1ffr0obn+78899xxAbspV6hs8eHCVpCO8L/mXeULusnRB7969C1/5Svs3Xed1Qth7772rqqqqStaWrylRFB1z8MEHt3vTbF4ng0022YQePXosI/S6z5yZvZeslt9P6Lu/UFK+zxZ3rglRFP14wIABTd7JHDFiBMBKbc7s1q0beWnn161bNzbZZJNY0rlZx2JmO/3v//5vo2P17NmzWbhwYbMLImPHjmXhwoUMHz48dyvkAIcddhiSIuC7WceSN56Qu8xUVVXtuu2226bSWSWvE8I+++xDHMe9W35k8UhSHMc9Dzig0UP3Wi3vk8EWW2zRIYqivUp60RaY2WBgD0J9+ZKmTj50Luc22n333Rtd0ZwxYwYzZ85c6QWOfv36tepOZqnsuOOOURRF22UZg6TtgGiXXXZp9Ot1ZYCNlQwuWLCAESNGMG7cuCY31OZBFEWss846MXBY1rHkjXdZcZmJ43jdnXfeud2v054Jodj69++PmUWS+prZrKJfsHF7S7K+ffu26xZhS5PBmDFjeOWVVzKbDLbffnsmTpyYuyPtzezFZB/BO8CTkh4DBlpaPWedK6Lk4KvV99hjj0a/XndHLC+r3W2122678cADD6wpSRn+bn6rS5cuy6IoajQ3qxuDn332WaZMmfKFr3Xr1o0dd9wxs57uK2OzzTbr8PHHH++adRx54wm5y4Sk3kCHNBLyPE8IHTp0oFOnToXFixcPBi7PKIwju3XrVqCdv+95nwz22msvbr311s45a18GgJktA9aT9D1CG7k4eZP2VsahOdeSb1ZVVcXdu3dv9I563bkEzW30zlJ1dTXjxo3jnHPOafZxW265JYCAAcCkEoTWmP2++tWvNlm5UF1dTdeuXVv8XvKguX/3AQMG8Nxzz2W+3ydvPCF3WRnUsWPHQqdOndpdspL3CWHdddflrbfe2p+MEnJJ+2y00Uap1I/neTJYe+21iaIojuP468ADWcfTGDO7U9JDwKfALEnDzexXWcflXDMOW3vttWOaKHGtG3979259Zd6CBQua3YeShhkzZvDcc8+xYMGC5TE2J4oiunbtumzhwoVHk1FCXlVVtd3WW2/d5L/zwoUL2WuvXFXlfUlr/t133313rrvuutUk9TCzz0ocYm55DbnLykErM4A3p60TQqlstdVWVVVVVZkV9EnacptttmlXuUrdZJDXusQ6a665ZgwcnnUczTGzucmGz2uBC5L2iGu09DznshBF0a79+vVrcvGu/mm9rTVmzJj2hASs2DfUlH79+jFkyJCVWqjZYIMNqiTt097Y2qpQKPTaddfGKzlmzpwJhO5gWUrj371bt2506NAhBo5OObyy5gm5y0QURX3XX3/9VDZ0ZjUhtNZmm22Gma1Zsgs2YGZdk9uxbZaXyaAlffr06QDkZ6dYM8zsTGDr5MPPJQ3KMh7nmtC7b9++TX6xrnXsRx991KoXmzFjBuuuu24qgaVtgw02UBRFmRxaI6kzoG222abRr9eVCTa3qb6cdOvWLQYq45tJiSfkLiudV1999VReKO8TQpcuXTCzov2uSaqS9Jikv0r6maT9JS0/jtPMora8aamvXCaD1VdfHUlds46jtcxsKmEcngDcJ2lq0hLMubzo0KVLlya/WLd3p+G+kqaMGzeOb33rW6kElrZOnToBpDMxNULSVyS9KOmPkoZJ2j1JxAF6QSidaUzdoWx56kzTHh07dgQom7G6FLyG3GVC0mrJ4Ndue++9N2PHjmXKlCmtqq8bN25cSTcfJpNZB0lvFukSHYC67iKHAIuBLpI+ASYCam5CbY1ymQyShDydH6wSSTo67C7pcOBhoJBxSM7VV9XcWN2vXz8OPfRQHn30UQYOHNjsGHHZZZc1OvaOGzeO2bNns/feey9fWHn22WdLvl+lc+fOAL2KOFZvAHQCdgO+CSwljNXvAf9q6kl19dgrc5Dd+PHjmT9/PgsXLlz+BmjBggWcf/75XHvttUC2/+6ekH+ZJ+QuK6md0pX3CSE5kExAKeo9VickdEuAdYAt68XQJuU0GSTfZ1muMJvZI5K6ADVZx+JcfU2t2tapG1PPP/98hgwZ8qVDwep+108//fQvbeacMWMGe++9N48++iiXX345t9xyC7Nnz271inuaJGFmHSjNWN2ZMFbXAusRkvNG1XUSa+0enhkzZtC7d2969+7N5ZdfvnwMfuWVV5afD5H1v3syVqdStlopPCF3mTCzJbW1tam9Xp4nhEWLFgEsM7OOqbxgA5KqgOnAMuAFYDyhS8DrZrZEUlxTU9PmjLycJoMlS5ZgZova9SIZSmKXJCP8xYDeZvZxtpG5VVghGcOadfrppzNw4EDuv/9+xo0bB4T9PV27dmXQoEFNntzbrVs3unXrxvTp05ePFX369PnCwslll132pefNnDnzS5/v379/u04IXrx4MZLmmFlRTjWT9FVgMvAR8BxhvJ4ITCMk5e/Uf/yYMWMYP3788sYF48aNo7q6miFDhjQ7Hnfr1o0+ffowZsyYL9w1fvbZZ5c/L+t/96VLlwIsXKknVThPyF1WFi9Zkm6r6GJOCO1RU1ODpKKVIZhZAWhy15WkeOHChSu9EpH1ZNAWtbW1mFmlDfIfSTrNzG7KOhC3SlpWU9O6mzb9+vVb6TtcdW/Sq6urm/z9b+w1R4wYkXrp4eLFiyHcXSwKM3uPcOfySyR92vBzQ4YMWanTp+vU/ZuOGzdu+R1JYPn4Xf8xWf27Jwl56dqdlQFPyF0m4jh+b/bs2VuT8i2rYk0I7fHWW28hKbMkUdLiWbNmdV3ZPu1ZTwZt8fHHHy8D3kvtBTNmZpJ0HzBS0pXAmmaW3q0l51ogad67777bveVHtt2MGTNYY401it6bvCUffPCBxXHcuu4A6asBmDVrFs11tWmtuoWUurG2/sezZ8+mT58+mf67L1y4UIQ7uy5RlrWWriI89eGHH2Ydw3LFHJimTp0aFwqF11J/4VYysxmvvvpqSY+CbmkygOL8m//3v/+NgMdSe8EcMLPBwB5AF2CJpAMyDsmtQgqFwstvvvlmUTcav/LKK0Xr4DR//vxWP/Y///lPwcyeLUogLTAzi6Los+effz6V11uwYMHy8RfCvp66trV1bWyz+nevra2ltra2ChhblIuXKU/IXVbGLl68uGrZsmVZxwEUd2B69913Y+CZorx4K5jZ82+99VZJO3dkMRl8/vnnLFu2LAIeTO1Fc8LMXgQ6Ah8CT0p6VO3Zqetc6z3x8cfF3cIwffr05e0T0zJjxgzGjBnD2LFjmTlzJiNGjFheytiU+fPnVwF/TTWQlWBmU19//fVUFk/69etHv379GD9+PNXV1eywww507dqVcePGLR93s/p3f+mll5C0zMzysyqXA16y4jJhZrOiKIqrq6ujnXfeOetwmD59ers2AzUljmNqamo6kO1R7g9/9tlnPyzlBetPBl27dmWHHXZg+vTpjBs3bvkEkPa/+Ysvvoik2jiOK/IoZjNbBqwn6XvAHUAsqa+ZvZVxaK6yjV26dOlNNTU1tLd9alOK0Wavbgxqbdnd22+/TXKC7j9TD6aVzOy5d955Z2dSys0algM2/HfO6t990qRJRFHkyXgDnpC7zERRNOfll1/unYeEvFh9V6dOnQrhbuQrRblA6zwWFVOLsgAAIABJREFUx7E+/PDDkh6IVOrJYMqUKUh6N9UXzSEzu1PSQ8CnwCxJw83sV1nH5SqTmc2Jomjpyy+/3HG//fbLOpzlBg1K92DbF154gSiKPi8UCiUt72vgLwsWLPhZhtdvURr/7tOmTSsUCoXJKYRTUbxkxWWmUChMef311+Os4yim8ePHE0XRl3bPl5KZLY2iaOHTTz+dZRhFN23atGVxHL+UdRylYGZzk9W8a4ELJJmk9h3H6lwToij6YMKECVmH8QX1S+LSUF1dbWY2LdUXXXnjzUz/+leTZwRlLo1/92Qf0RPtfqEK4wm5y9JV77zzTrRgQeV2PnrqqacKcRxnvnEljuNx48aNq9gTIOfMmcOcOXM6AJdnHUspmdmZwNbJh59LSnfZ0DmgUCjcOWHChIodP2pra5k6dSpmdkOWcSQbO9+86667Knah6vnnn2fp0qURcFvWseSNJ+QuM2b2RBRF8+64446sQymKV155hQULFkTAWVnHApz56aefVr31VmWWG996660WRdGHGZcGZcLMphLG8gnAfZKmSvKx3aVpeG1trZ566qms4yiKe++9F0lLzOyurGOJ4/j8N954Q0lP9Ipz1113FSQ9W84HuBWLD9ouU3Ecj3zyyScrcuVl9OjRhSiKJplZ5psMzezdKIrevPXWWytu5SWOYyZMmGBxHH/5WLlVhAW7A18HtgQKkorTNsitcsxsqaR/3H333RU5Vj/88MOFOI7/kHUcAGZ2n6Sae+65J+tQUvfJJ5/w3nvvVZnZGVnHkkeekLusXVxbW6tnnsmsK2BRfP7558ycObMqjuM8rI4DEMfxr1577TXV1lbWuTIPPfQQcRwXgExvN+eBmT1C6FcOUC3p1izjcZXDzM6YPXt2Vd05ApUiuZNZBfwi61jqxHE8phJLDJM7mbPNbErWseSRJ+Su5BTsIOks4BlJ/P73v6+owWf06NFEUTTXzHJzj9fM/iRp0d133511KKkaO3Zswcz+bGZZdkfIDTNblGz4PAs4Kdnw2ehx3c41R1IHSXtKGg7cG0VRfMstt1TU79no0aPjKIomm9ncrGOp55c1NTXRyy+/nHUcqYnjmBdffNHiOP5t1rHklSfkriQkdZZ0vKSxwDzgOeBSYGczGz579uyoUuoT58yZwz/+8Q+L4/jirGNpKI7ja/785z/bZ59lXkWTir/85S/MnTs3An6adSx5Y2ZXABskH34k6dQs43HlQdJakk6V9DjwOfB34DxgkziOr3jppZf09ttvZxpjWiZPnszMmTOjOI5/knUs9ZnZ/CiKHr/qqqsKcVwZVYZXX321xXG8hNAZyjXCE3JXKocQdlX/D9AdWI3QR3lPM7sIuPn666+Pa2pqsoswJeeee25B0nQzy93AY2bnAx9ccMEFZX9HYt68edx+++0GXGJm72cdTx6Z2XvJavn9wMis43Fl4TRC0nQQ0JlwQuybwLZmdk4URRMuuOCCsk8Ua2tr+c1vfhNLGmtmz2UdT0NxHB9VU1Oz7Lrrriv7OxJvvPEGzzzzjMzsGL+T2TRPyF3RSepJqO/tmHyqBpgEbGVmdX2jh8Vx/OlFF11U1qP8Pffcw+zZsxXH8f5Zx9KUOI4PfOutt6JHHnkk61Da5bzzzisA7/qhOC0zs8HAHnUfSzogw3BcTknqC1xMWDCBMFY/APSvOxE2juODP/vss7IvXbnsssvi2traGjM7JutYGpOUnh335JNPatq0rNujt10cx1x88cUFSY+Z2UNZx5NnnpC7opHUU9KbwFzCrfO6VYgbgX3M7JO6x5qZxXF80NSpU8u2tdacOXO45557DDjbzHJ7LLCZvQlcMWrUqLItXfnLX/7CO++8E8Vx7IllK5nZi/U+fFLSo5KUWUAuNyT1lWTAzORT1cAy4Awz+66ZLe/BZ2bzzeyUv/3tb2VbujJ58mRefvnlyMy+aWa5vVtoZg9IeurCCy8s2zsSV199tS1atKjWzL6ZdSx55wm5S12DRHwzYGhy23x/YGMzO6uxQTDpIX3z9ddfH8+bN6+kMbdXHMecc845BUlvmtmVWcfTEjM7G/jg/9u79zi75nv/46/P2jNJSBE0dWurpVqkpHKhLi0q8SulVXEpqqGNIDgoKj3iFsJJVasexyVCUJwe9FdytL+eH/nV+TnUZSRCaYMUTVL8hGRMmSQzs9fn98d3jYwx9732XnvteT8fjzzGzOxZ38/E5Lve813fSzLKnCsrV67sOFXllazryanJhGlksZl9NutiJBtdBPEJSV/9ZUJfPberr3P326MoenLGjBnFtra2SpWbiubm5o5TVap+9Mfdv9Hc3Nx29dVX5+6JxLPPPttxqsq6rOupdgrkkprugnh7p+7uRXf/Wy+XOS2O42WnnnpqMU/zyadPnx6vXLmyLU8jtnEc77ds2TKfOXNmboZe3n33XaZNmxYDz2uqysC5+y+BzZJ3XzGzqluALOXTXRB39/8D4O7reluXEcfxxKampjVnnnlmbkZvW1paOOWUU4qtra1vV+tUlc6SqSvffuSRR7jzzjuzLqfPli5dykUXXeRm9m+aqtI3CuRSst6CeH8kU1d2bG5ufueUU04p5mHP7JkzZ8ZLliyJ3X1MNU9V6czdl7r73g0NDfzsZz+r+tGX5uZmkp+J5XEc75Z1PXnn7quT0dBrgAuT7RE3yrouKZ/egnh/JFNXvrh8+fK26dOnx9Ueytva2jj99NOLjY2N78dx/PlqnqrSmbv/Hjjx7rvv9vvvvz/rcnq1YsUKzj33XAcWxHF8XNb15IUCuQxYmkG8I3dfF8fx5999992m73//+1U7Uh7HMTNmzCg2NDS4u3/F3f+cdU39lSyqPejhhx/2K6+8smrvqI2NjSQ/CyvjON4pTzfTaufuZwM7J+82mdkRWdYj6UsziHfk7n9z9zFLliwpnnPOOVU7Ut7S0sLJJ59cfPPNN9e6+47VcHpyf7n77cAPb7nlFq/mUzyXLl3K6aef7u7+ZBzH/yPrevLE0tqBJvnHTjLiIjUs2TXlKUIIhxDESwrh3bQzPIqil4cNG7bFtddeG22xxRZpNzFgLS0tnHfeecVXX3216O67J/Pfc8vM9jGz/xo1ahSXXXZZoa6uLuuSPrBixQrOOuusuLW19W9JGK/5uYjl6k97um6ywPNxYA9gCTDK3aszYUmfJLum/LXDhyaUGsK7aedzZvbcVlttNeTnP/95YcMNN+z9iyqksbGRM844o9jU1NQUx/EO7v5O1jWVwsx+AMw96KCDmDZtWlXlrUWLFnHppZc68FCxWBwUYTzNvlqBXPqsUkG8U5v1URQtdPcvHnPMMXbMMceUs7k+eeqpp5g9e3bc2tr6nrvvVisLC81sFzN7YtiwYUMvvPDCwi677JJ1Sdx6663cd999mNmTcRzvPVhGxrMI5B1eczDwu+TdL+X9l83BqFJBvFObW0VR9GwURZufffbZ0Ve/+tVyNtcn8+fPb18A/locx7u4+/tZ15QGMzvczO7ZfPPNufzyywvbbLNNpvW0tbUxe/bs+IknnojM7I44jr+XaUEVpEAuFZVFEO+ihnOB2VtuuaVfccUVhZEjR1ayeSCMis+aNStetGhRBPw78N1aC4hmVm9m97v7wXvvvbefe+65lsVo+YoVK5gxY0Zx1apV7u5nuPuNFS8iQ1kG8uR1GxD2oAa4xd2npFmHlEcWQbxT+wbcCJy08847+8UXXxxlMVre2NjIBRdcUFy2bFkEXOHhQLSaYmabR1H0sLt/cdKkSTZ58uRM6li8eDGzZs2K161bt8bdD3P3BZkUkhEFcqmIagjinerZKoqiP7j7Fyo9Wt5pVPwQd//vijWeATM7yMx+PWzYsKEXXHBBYfTo0RVpN45jbr/99vZR8cVxHE/I+yPmgcg6kHd4/XnAT5J3t3D3t9KsR9KRdRDvop7doih6KIqiTc8666xo3333rVjbDzzwADfffLMDy+M43r9WnmB2x8xOM7NfVHq0vKWlhZ/+9Kfx448/HpnZf7j7Ee7eWpHGq4gCeQWY2SbAGOALhOODNwAKwNrkz1vAIuCvtTbPstqCeGdmdq6Z/Ut9fT377bdfYfLkyWy88capt9PW1sZvfvMb5s+f39bU1FRHjY6Kd6fjaPmmm27aNmnSpLpDDz2UKEp/Lfjq1auZN2+eP/roo14sFovu/k+DbVS8o2oJ5MnXfBJYnrw7zd1vSLOmUpnZJ4CxwGcJ/fQwwAj99Brg74STgVd4Wje8KlFtQbyjjqPlw4cPj7/xjW8Ujj76aIYMGdLbl/Zbc3Mzd9xxBwsWLCiuXbvWgCtrcVS8O8lo+R/iON516623Lh5//PGFffbZpyxtLV++nLlz58aLFy82M3s/juNJ7v5gWRrLAQXylJlZBOxFOAxhbPJna+BZ4M/A+4SOvUjo7Iclnx8DjACeIXT4TwMLvMMJlHlS7UG8IzOrBy6IouiMOI4323777YsnnnhiKiO5b7zxBnPnzvWFCxfi7i3u/u/Aee6+suSL55CZbQNcbWaHR1FU2GOPPWzKlCmWxrShhoYGbrvttuKyZcsKURStjOP4auCng+WXnu5UUyDv8LW/BiYRprJs6u4V35PUzIYA+wPjgXGEvvpjhP53aVLbWsAJ/fQGwGeS1xmhj+7YV+dyTnE1B/HOksGt2VEUfc/dh40ePdqnTJkSbbvttiVf+y9/+Qvz5s2LlyxZEkVR1BTH8Y3AJe6+puSL51CyDugad99v6NChfsABBxQmT55MqdOG4jjmwQcf5J577imuXLmyEEXRK3Ecz0x2fhnUFMhTYmabAScApxI68T8QOuuFwJK+hAIz+zjrQ/x4wmmUDwDXA0/mYUQmT0G8K2a2fxRFP4njeGxdXZ2PHDnSd9hhh8Juu+3GXnvt1WNn1NLSwuLFi2loaODFF18svv7666xbt64QRdGyOI5nAvPy8P+wEpIRr9OiKPpxHMdbDxs2rG3rrbeOdtppp2j33Xdn1113paf55k1NTTz++OMsXryYl19+ue2dd96J2traLIqiP8Zx/MNkC0ahOgN58vVfJuzEAvA1r9BJh2b2KWAqMIUQRB9jfV/9Sm//RpOf3W1Y31e3D77cCdzg7kvKV3168hTEu2Jmx0ZRdGkcx58bMmRIccstt2THHXcsjBs3jrFjx/Y4et7c3ExDQwMLFy7kpZdeKr711lu0trYWoij6UxzH5yd7dQtgZkOBS6IoOjWO402GDx/e9qlPfaowatQo23PPPdlhhx16fNL59ttv89hjj/Hss8/yyiuvtK1evbrg7rG7/w44u9anAfWHAnmJzGwscBpwOPBbQnh+PI3gZWabsz7kNyXX/lU1jsTkPYh3lhxqchRwYBRF4919G3cfUldXF9fX18eFQoG6ujqKxSLFYpG2tjZraWkpmFkxiqKVxWLxGeBh4B7v/UTRQS0JBt8B9isUCqOLxeLHgWjIkCHFuro6LxQKFAoF2traKBaLtLS0RMViMTKzdWa2PI7jp4AHgV9X47+NrFVrIE+uUQcsA7Yi/D/8ejl+aU2eXB4ATAP2JYTnGz2l/f7N7NOsD/kvEPrq+e5edWfB5z2Id2ZmWxH6j68VCoUxcRxv4e6F+vr6Yn19/Uf6j9bW1qitrS0ys9Yoit4oFotPAwsIffWgW2PSH0neOQLYp1AojCoWiyMAGzp06Ad9tZl9cF9ct25dIY5ji6JoDfBKHMdPAP+L8G9jUD+57IoC+QAlcw2vA3YHbiCMfpZlkVJyM5lIuJmMJ8y9rIojtmotiPfEzDYFDiFMMRqe/FlLmIa0Gvi9u7+WWYE1xMw+T/iZ3wTYCBhC+Hv+B/A68FvP4YEcWajmQN7hWpOB25J3t3P3V0u9ZodrfwGYR/g5ug64y93fS+v6ndoaShicOQ3YDDjR3Z8sR1v9VWtBvCfJU5CDCP8PNiJMOVoDvEdYs/XAYJ02mKbkadFYYB/C3/NwoI71ffVSwn2x5s97SIMCeT8lP4BHAb8g3EAucfe1FWz/K4Sby1PAP2X1G/1gCuIieZaHQJ5cb1NgVfLuZe5+UYnXKwBnAT8GLgGur+SieTM7CrgWuB24uJL3iU51DJogLpJnafap6W+XUGWSUfF7gYuBb7n79Ep3sskWeaMJv+X/ycwOq2T7VqYj7kVkcHP31cmN6BrgQjPzZOpYvyWj4o8AhwJ7uPu/VnoHK3e/B9gV2B5YZGZ7VLJ9K9MR9yJS/Wo6kJvZPsBzhEcwY7J8DOnuze5+NmGk/iozuymZi1k2CuIiUglJ37Zz8m6TmR3Rn683s+MICzV/RVgs+tdevqRs3P0tdz+CMEI/38x+VO42FcRFpGanrJjZQcAvgWPd/aGs6+nIzIYDvyHM2Tom7blampoikm95mbLSxfWNsAvLHsCrQD1h96oTulv4aWanAdMJi0NfKEddA2VmWxMWrv4OmJ724lVNTRHJN80h772WbwE3AYe5++O9vT4LyUKiO4GNgW+mEcoVxEVqQ14DeYd2DiaE2JiwMG8ecGbnQGtm5wKnABPTXBSapmTnrN8DDcDpKe3GpSAuUgM0h7wHZnYAMBc4uFrDOEASwL9D2BrxV6VMX9HUFBGpMq8lbyPCLg7fBy7r+AIzm0rY2WTfag3jAMki/AmE3blmlXItTU0Rke7UVCA3sy8R5iAe4e4Ls66nN8ment8FNiTsgdsvCuIiUqVeJITw1wjb1g0Hzmmfj21m3yYstJ/o7n/Pqsi+cvcmwpZ8h5nZWf39egVxEelNzUxZSaaAPA3Mdvc7s6hhoJI55YsJx7P3ule5pqaI1La8T1np0J4RDve5iLDvsQHnA+cAh3rOTmc1s20J95n93f35PrxeU1NEapjmkHfd/uXAF4Fvl+PUuHJL9iq/G9ilu33KFcRFBodaCeSd2t6JsHhzEvCv7j690jWkwcxOAk4G9nT31m5eoyAuMggokH+07XGEBUSj3f3NSrefFjO7Bhjp7sd1+riCuMggUouBPGn/GGAGYRvaXJ4EmIz6/yfwiLvP6vQ5BXGRQUSB/MPtDgUWAle6+12VbDttZrYh8CzJ1BUFcZHBqRYDuZltSejfDnH3hkq3nyYz+zThvrO/uz+vIC4yOCmQf7jdGcA4cjpVpbNk6so9hIVQn0s+rCAuMojUaCD/N+A1d//nbj7/EKEvH5F86BWgsdPLRgDbJf/d6O6blqPWvkimrkwDvtThwwriIoOIAvn6NocAy4D93H1JCterihuCmT1BOFhDQVxkEKq1QG5mnyScmvxpd3+vl9c6PfStyZPDuYRpL9unXmwfmVkErAI2QUFcZFBKs08t69HtFXA48EIaYRzA3SfCh24IXXb2HW8IabTbhUuAK4Cby3R9EZFKmgrc1Ycw3t6nLujuNe7emIxO35tiff3m7rGZzQR2UxgXkVLlfR/yaQxg/+6e9PWGAJxEGEEvhwcJoy67l+n6IiIVkTzJPAm4oQ8vn5C8fainFyV9cOenl1m4DTjEzEZmXYiI5FtuA7mZ7QJsD/xHypfO/Ibg7jHh5jWtHNcXEamgw4Al7v7nPrx2YvK22wGRDso1INJn7r4KuI9wCJKIyIDlNpADpwI3dbcPbAmq5YZwK/BNM/t4GdsQESm3acB1fXztBAB3/1DfamYTkp1MOro7hdrScB1wqpkVsi5ERPIrz4H8EOBXZbhuVdwQksOB/ov1I/YiIrliZhsTFsrP78Nre5oueGTnPtndF5VeYencfSGwDhiVdS0ikl+5DORm9glgI+DllK9bbTeEp4GxZW5DRKRcxgDP9fFJ5rjk7YemC5rZBNbvbFWt1FeLSElyGcgJHd+iMuw7Xm03BHXyIpJnYwn9WF+0Txc82sweSv6sJvTHVTEa3gP11SJSkrxueziWcEpa2jreENr/u31f8p+Uob3eLATGmJnVwqFHIjLojAX+dx9f2z5d8EPB1szm0Msi+3JIBmJOdvcj+/DyhcBRZS5JRGpYngN5OeePV8UNwd3fNrN3CbvJLK10+yIiJRpLOFOhR8n6nBF0PRLe6O59WWSfimTq4tF8+BC43jwD7GJmde7eVrbiRKRm5XXKyhhSfoRZTTeEThaiR6EikjNmthHwSaAvB7e1L17vqp/tPIVwhJnNLqGuOT193t0Xufv5ndvt5Wv+AawAdhpoXSIyuOU1kI8EXk/5mhW7IfTTG4TvV0Sk6iT9YVfHRm8OrOzjiHH7FMGPhOAuBkOmAj2G6oy8AWibWhEZkNwF8qTjH0bYZipN1XpDWEv4fkVEqoqZ7QmsAhrN7L/N7HIz+6aZbQNsQOi/+mIMdNnXdm5vBDC+845XVUJ9tYgMWB7nkNcDRXcvpnzd/t4QKrXIcw1wlZldVaH2RKRKmFlZFnOX4bobA/sAewJFYAjwD+CdPtTSPle7L9MQ76XDYIiZTSWssbmb9fO9j+7jQsy0rSX8EiIi0m95DORFoJDmziNVfkOoAx4BnkzpeiJS/c5L3qb9i3ja192dEMTbCE8tNwBWAk8Rzon4Vh+u0b47SbeDIclCy9nAOHef2OFj9xCeWN7r7tsna4EmdnedMqsj/D2IiPRb7gK5uxfNrI0wUt6S0mWr+YawAXCfu1+T4jVFpIqZ2XkA7v6jar6umW1B6BcXExagL04WOGJmOwCTevjaOYS1O+0DGVM7HM7WbrPk8yOS98/v8LlGd280s/FJDe0nLJ/coY17u2h6TBcff8jdb+r2G+2bYYQnmiIi/Za7QJ5YAwynxEBeiRtCCoajTl5EqpC7/z/ghG4+3Uzov7r72pL6yQ7zyCcAJ3Xzmo88qTSzOaW23Q311SIyYHkN5C8BOwOPlXKRStwQUrAzcGeZri0iUi5vAMPNbDN3X1WOBpIBlFXu3liO6/ejDiNsefhSlnWISH7lbpeVRNXszV3OG4KZ1QG7Eg6dEBHJDXePCVNZOj91TNMEephqWKLN+vHa7YD33P2tMtUiIjUur4H8acKR9tWgnDeEHYHX3f3dMl1fRKScnqa8gyfjCQvtU2NmY5JzJs4nzDefkyze78lYwkCRiMiA5HXKykLg7KyLSIynfHuSjyXc0ERE8mghfdtpZUDKsb2huy8i7Lh1fm+v7UB9tYiUJK8j5C8AnzWzbhcMVYq7H9nb3uUl0KiLiORZ1Uwv7KAcpyyrrxaRkuQykLt7C2Fu4lezrqVckkVC+6P9x0Ukv14CRpjZZzKu4wNpn/KZDAxphFxESpLLQJ64jXS3F6w2ewFDgT9mXYiIyEAkCzvvIJzVUKuOBR5x95VZFyIi+WUpHXb5wVHM7m6pXLD39oYDy4Dd3H1ZJdqsJDO7C2jQgUAig0+5+tNK99NJm18gnDb8aXdfV6l2KyF5krkION/dH8y6HhGprDT71NyOkLv7+4T9uWtu5MXMPgEcDNyedS0iIqVw9xeB5+jh1M4c+zLwMcq305aIDBK5DeSJG4ApZjYk60JS9gPgf7r76qwLERFJwfXAtKyLKINpwA3J1BwRkQHLdSB39yWEHVeOybqWtJjZMOAUwi8bIiK14AFgWzMbn3UhaTGzbYBDCeuZRERKkutAnrgA+BczG5l1ISm5FHjK3bWFlojUBHdvA2YAc2vhiWYyd3wO8At3X5V1PSKSf7kP5O7+BGEu+XVZ11IqM/syMBk4LetaRERS9ktgBfDPWReSgu8BnwRmZV2IiNSG3O6y0qntDYBngAvdPdVjlCslmaryDHBRXr8HEUlHLe2y0qn9bQhnSEx098VZ1FCq5Ht4Bjgwr9+DiKRDu6x04u5rgBOAa3M8deVS4HmFcRGpVe7+d+A84LY8Tl3pMFXlBoVxEUlTTYyQd6jhKmBn4DB3b82qjv4yswMJj3N3dfe3sq5HRLJVqyPkSQ0G/JZwiucPPa2bUAWY2RnAFGB8cmK0iAxiGiHv3gWAAfPMLBffm5ntBdwFHKEwLiK1LgngxwMTgR9nXE6fmdlxwI8IAz4K4yKSqlyE1r5KOskjgG2B66o9lJvZWOA+4Hh3fzTrekREKiHZmeRA4AdmdmbW9fTGzA4Hrga+7u6vZl2PiNSeqg6sA+HuzYS9YUcBt5tZfcYldcnM9gV+D0x19//Muh4RkUpy99eBrwGnm9mFyVSWqmNmJxB28TrY3V/IuBwRqVE1F8gB3P1d4OvA5sD91bTQ04LjgXuB77j7/KxrEhHJgrv/DfgKcCRwvZltmHFJHzCzOjObAcwE9nf3RVnXJCK1qyYDOXwwUn4Y4STPP5nZkRmXhJltBdxP2GXgQHf/Q8YliYhkyt3fBPYFNgEWm9neGZeEme0M/JFQ197JqdAiImVTs4Ecwpxyd/8RIZhfZmb3ZDFanoyKH0fYf/c5YJy2zBIRCdx9tbsfC5wP3GtmP8titDwZFZ8O/F/gFsLAyfJK1yEig09NB/J2yWmeuwGvEUbLJ5vZ0Eq0bWY7ERZuTifMQbxQK/RFRD7K3e8DdgW2IoyWH1yJxfnJoMnehFHxAwiDJnPytCWjiORbTe1D3hfJ8fSzCIs+bwHmuPuylNuoJ4zKnwrsRFgQdJW7r0uzHRGpTbW8D3lfmdlhwMXARsCNwK3u/k7KbXwMOBaYBgwHZgO3KIiLSF+k2acOukDezsx2JATm7wKPEk5fe8Td3xvg9SLgc4TOfSrwMnA9cJ9GxEWkPxTIg2TnlT0IgfmbhDU4NwMNAx3gMLM6wij8CYT+/xFCX73A3eMUyhaRQUKBPEVmNpwQok8EvkSY1vI0sDD582fgfaDF3d3MCsAwwiPVscC45O0YoIlwAt317v58Zb8TEakVCuQflaz/ORE4DtgB+Avr++mFhEGQNUBr0lfXE/rqzxD66Pb+eldgBWGnq5vSfkIqIoOHAnmZJB34KNaH7LHA54ENgHqgCBQInf7bfPhmsNDdV2ZQtojUGAXyniULPkezvp8eC3yWEMAjoP3Gtga8Xq/0AAACrUlEQVR4nQ/31c8kW+OKiJREgTwDych4gWT0Jet6RKR2KZAPXDIlxdy9NetaRKS2pdmn1pVezuDg7kXCCLmIiFQpd2/LugYRkf4aFNseioiIiIhUKwVyEREREZEMKZCLiIiIiGRIgVxEREREJEMK5CIiIiIiGVIgFxERERHJkAK5iIhUjJnda2Yjsq5DRKSaaB9yEREpOzM7AtgOmJB1LSIi1UYj5CIiMmBJ0O6Vu//a3X8CrCpzSSIiuaNALiIipRifdQEiInmnQC4iIiIikiEFchGRQc7MRpjZVDObY2ZjzGyCmd2bdV0iIoNF2QO5OnoRkao3zt1vIllw6e4LgIeyLUlEZPCoxC4r49z9JjP7KzDH3ReY2XYVaFdERPog6ZdHAI3uvij52E2dX5e8ZnanD48zszkd3m909/PLV62ISO0peyDva0cvIiKZOgpY0NML3L0ROLnjx8xstgK4iEhpKjWHvNeOXkREMjWRykxT2awCbYiI5EqlAnmlOnoRERmYEcnc8bJI1g/NBkYAc81sarnaEhHJm0qd1FnWjl5ERErj7hPLfP0FhCelmt4iItJJRUbIy93Ri4hIZhqyLkBEJO/M3dO5kJkDuLulckERkUGqXP2p+mkRkfSk2afqYCARERERkQwpkIuIiIiIZEiBXEREREQkQ6nvstI+n0ZERKqT+mkRkeqiEXIRERERkQyltsuKiIiIiIj0n0bIRUREREQypEAuIiIiIpIhBXIRERERkQwpkIuIiIiIZEiBXEREREQkQwrkIiIiIiIZUiAXEREREcmQArmIiIiISIYUyEVEREREMqRALiIiIiKSIQVyEREREZEMKZCLiIiIiGRIgVxEREREJEMK5CIiIiIiGVIgFxERERHJkAK5iIiIiEiGFMhFRERERDL0/wGDFAjM6d36FgAAAABJRU5ErkJggg==\n", 53 | "text/plain": [ 54 | "
" 55 | ] 56 | }, 57 | "metadata": {}, 58 | "output_type": "display_data" 59 | } 60 | ], 61 | "source": [ 62 | "import daft\n", 63 | "from matplotlib import rc\n", 64 | "\n", 65 | "rc(\"font\", family=\"serif\", size=12)\n", 66 | "rc(\"text\", usetex=True)\n", 67 | "\n", 68 | "\n", 69 | "pgm = daft.PGM(grid_unit=4.0, node_unit=1.4)\n", 70 | "\n", 71 | "# Start with the plates.\n", 72 | "rect_params = {\"lw\": 2}\n", 73 | "edge_params = {\n", 74 | " 'linewidth': 1\n", 75 | "}\n", 76 | "pgm.add_plate(\n", 77 | " [0, 0, 3, 2],\n", 78 | " label=r\"\\Large $r$\",\n", 79 | " rect_params=rect_params,\n", 80 | ")\n", 81 | "\n", 82 | "pgm.add_plate(\n", 83 | " [3 + 0.2, 0, 3, 2],\n", 84 | " label=r\"\\Large $r+1$\",\n", 85 | " rect_params=rect_params,\n", 86 | ")\n", 87 | "\n", 88 | "pgm.add_node(\"e_r\", r\"$E_r$\", 0.5, 0.5, scale=1.5, fontsize=24)\n", 89 | "pgm.add_node(\"e_r_1\", r\"$E_{r+1}$\", 3.5 + 0.2, 0.5, scale=1.5, fontsize=24)\n", 90 | "pgm.add_edge(\"e_r\", \"e_r_1\", plot_params=edge_params)\n", 91 | "\n", 92 | "pgm.add_node(\"c_r\", r\"$C_r$\", 1.5, 1., scale=1.5, fontsize=24, observed=True)\n", 93 | "pgm.add_node(\"c_r_1\", r\"$C_{r+1}$\", 3.5 + 0.2 + 1, 1., scale=1.5, fontsize=24, observed=True)\n", 94 | "pgm.add_edge(\"e_r\", \"c_r\", plot_params=edge_params)\n", 95 | "pgm.add_edge(\"e_r_1\", \"c_r_1\", plot_params=edge_params)\n", 96 | "\n", 97 | "pgm.add_node(\"a_r\", r\"$A_u$\", 0.5, 1.5, scale=1.5, fontsize=24)\n", 98 | "pgm.add_node(\"a_r_1\", r\"$A_{ur+1}$\", 3.7, 1.5, scale=1.5, fontsize=24)\n", 99 | "pgm.add_edge(\"a_r\", \"c_r\", plot_params=edge_params)\n", 100 | "pgm.add_edge(\"a_r_1\", \"c_r_1\", plot_params=edge_params)\n", 101 | "\n", 102 | "pgm.add_node(\"p_r\", r\"$P_{ur}$\", 2.3, 1., scale=1.5, fontsize=24, observed=True)\n", 103 | "pgm.add_node(\"p_r_1\", r\"$P_{ur+1}$\", 3.5 + 2, 1., scale=1.5, fontsize=24, observed=True)\n", 104 | "pgm.add_edge(\"c_r\", \"p_r\", plot_params=edge_params)\n", 105 | "pgm.add_edge(\"c_r_1\", \"p_r_1\", plot_params=edge_params)\n", 106 | "\n", 107 | "pgm.add_node(\"s_r\", r\"$S_{ur}$\", 2., 1.7, scale=1.5, fontsize=24)\n", 108 | "pgm.add_node(\"s_r_1\", r\"$S_{ur+1}$\", 3.7 + 1.5, 1.7, scale=1.5, fontsize=24)\n", 109 | "pgm.add_edge(\"c_r\", \"s_r\", plot_params=edge_params)\n", 110 | "pgm.add_edge(\"c_r_1\", \"s_r_1\", plot_params=edge_params)\n", 111 | "\n", 112 | "pgm.add_edge(\"p_r\", \"s_r\", plot_params=edge_params)\n", 113 | "pgm.add_edge(\"p_r_1\", \"s_r_1\", plot_params=edge_params)\n", 114 | "pgm.add_edge(\"s_r\", \"e_r_1\", plot_params=edge_params)\n", 115 | "\n", 116 | "# Render and save.\n", 117 | "pgm.render()\n", 118 | "pgm.savefig(\"dbn.png\", dpi=150)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Here are the equations we'll be using for finding the parameters of the DBN." 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "\\begin{align}\n", 133 | "P\\left(E_r=1\\mid E{r-1}=0\\right) & = 0 \\label{eq:1} \\tag{1} \\\\\n", 134 | "P\\left(A_u=1\\right) & = \\alpha_{uq} \\label{eq:2} \\tag{2} \\\\\n", 135 | "P\\left(C_r=1\\mid E_r=1, A_u=1\\right) & = 1 \\label{eq:3} \\tag{3} \\\\\n", 136 | "P\\left(S_{r}=1\\mid C_r=0,P_r=0\\right) & = 0 \\label{eq:4} \\tag{4} \\\\\n", 137 | "P\\left(S_{r}=1\\mid C_r=1,P_r=0\\right) & = \\sigma_{uq} \\label{eq:5} \\tag{5} \\\\\n", 138 | "P\\left(S_{r}=1\\mid C_r=1,P_r=1\\right) & = 1 \\label{eq:6} \\tag{6} \\\\\n", 139 | "P\\left(E_{r}=1\\mid S_{r-1}=1\\right) & = 0 \\label{eq:7} \\tag{7} \\\\\n", 140 | "P\\left(E_{r}=1\\mid E_{r-1}=1,S_{r-1}=0\\right) & = \\gamma \\label{eq:8} \\tag{8} \\\\\n", 141 | "P\\left(C_r=1\\right) = P\\left(C_{r}=1\\mid E_r=1\\right)\\cdot P\\left(E_r=1\\right) & = \\alpha_{uq}\\epsilon_{ru}\\label{eq:9} \\tag{9} \\\\\n", 142 | "\\end{align}" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "Each query and each sku carries an attractive factor $\\alpha_{uq}$. When the customer interacts with a sku, there's a $\\sigma_{uq}$ chance them'll enjoy it and end their browsing through the query result page. \n", 150 | "\n", 151 | "If they are not satisfied, they continue browsing through with a probability of $\\gamma$.\n", 152 | "\n", 153 | "In this model, only clicks and purchases are observed which means all other variables are hidden; in such case we use EM optimization techniques to find values for each parameter that best describes observed data in terms of log-likelihood.\n", 154 | "\n", 155 | "This being said, the log-likelihood is given by:\n", 156 | "\n", 157 | "$$\\ell\\ell = \\sum_{s \\in S}log\\left(\\sum_{\\textbf{X}}P(\\textbf{X}, \\textbf{C}^{(s)}, \\textbf{P}^{(s)} \\mid \\Psi \\right)$$\n", 158 | "\n", 159 | "Where $X$ represents the hidden variables, $C$ and $P$ are the observed data clicks and purhcases and finally $\\Psi$ represents all variables used to model the data.\n", 160 | "\n", 161 | "Finding the derivative of this equation is intractable thanks to the summation of the hidden variables. We use them the [Expectation-Maximization](https://towardsdatascience.com/inference-using-em-algorithm-d71cccb647bc) algorithm and aim to maximize the following $Q$ function:\n", 162 | "\n", 163 | "$$Q = \\sum_{s \\in S} \\mathbb{E}_{X|C^{(s)}}\\left[logP\\left(X, C^{(s)}, P^{(s)} \\mid \\Psi\\right)\\right]$$\n", 164 | "\n", 165 | "In our case, as all variables are Bernoulli (either 0 or 1), each modeled by a parameter $\\theta_c$ which translates the above to:\n", 166 | "\n", 167 | "$$\n", 168 | "Q(\\theta_c) =\\sum_{s \\in S} \\sum_{c_i \\in s} \\left(P\\left(X_{c_i}^{(s)}=1, Par(X_{c_i}^{(s)}) = p \\mid C^{(s)}, P^{(s)}, \\Psi\\right)log(\\theta_c) + P\\left(X_{c_i}^{(s)}=0, Par(X_{c_i}^{(s)}) = p \\mid C^{(s)}, P^{(s)}, \\Psi\\right)log(1-\\theta_c)\\right) + Z \n", 169 | "$$\n", 170 | "\n", 171 | "We'll be using this equation in the maximization step, derive it to find new optimum values for each parameter of our model and repeat the process until either we reach convergence (usually set by no increment in loglikelihood metric) or by total amount of desired iterations.\n", 172 | "\n", 173 | "The derivative to find new values is given by:\n", 174 | "\n", 175 | "$$\\theta_c^{(t+1)} = \\frac{\\sum_{s\\in S}\\sum_{c_i \\in s}P\\left(P(X_{c_i}^{(s)}=1, Par(X_{c_i}^{(s)})=p \\mid C^{(s)}, P^{(s)}, \\Psi\\right)}{\\sum_{s\\in S}\\sum_{c_i \\in s}P\\left(Par(X_{c_i}^{(s)})=p \\mid C^{(s)}, P^{(s)}, \\Psi\\right)}\\label{eq:10} \\tag{10}$$" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "## Attractiveness $\\alpha_{uq}$" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "We have that:\n", 190 | "\n", 191 | "$$P(A_u = 1) = \\alpha_{uq}$$" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "Given equations 1-9, we can also derive that:" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "\\begin{equation}\n", 206 | "\\begin{split}\n", 207 | "\\epsilon_1 & = P(E_1=1) = 1 \\\\\n", 208 | "\\epsilon_{r+1} & = P(E_{r+1} =1) \\\\\n", 209 | " & = P(E_{r+1} = 1 \\mid E_r=1) \\cdot P(E_r=1) \\\\\n", 210 | " & = \\epsilon_r P\\left(E_{r+1}=1 \\mid S_r = 0, E_r=1\\right) \\cdot P(S_r=0 \\mid E_r=1) \\\\\n", 211 | " & = \\epsilon_r\\gamma P(S_r=0 \\mid E_r=1) \\\\\n", 212 | " & = \\epsilon_r\\gamma \\left(P\\left(S_r=0 \\mid C_r = 0, P_r = 0, E_r=1 \\right)P\\left(C_r=0, P_r=0 \\mid E_r=1\\right) + P\\left(S_r=0 \\mid C_r = 0, P_r = 1, E_r=1 \\right)P\\left(C_r=0, P_r=1 \\mid E_r=1\\right) + P\\left(S_r=0 \\mid C_r = 1, P_r = 0, E_r=1 \\right)P\\left(C_r=1, P_r=0 \\mid E_r=1\\right) + P\\left(S_r=0 \\mid C_r = 1, P_r = 1, E_r=1 \\right)P\\left(C_r=1, P_r=1 \\mid E_r=1\\right)\\right) \\\\\n", 213 | " & = \\epsilon_r \\gamma \\left((1 - \\alpha_{uq}) + (1 - \\sigma_{uq})(1 - cr_{uq})\\alpha_{uq} \\right) \n", 214 | "\\end{split}\\label{eq:11} \\tag{11}\n", 215 | "\\end{equation}" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "Where $cr$ is the conversion rate of document $u$ for query $q$." 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "Given equation 10, we derive for the attractiveness parameter the following updating rule:\n", 230 | "\n", 231 | "$$\\alpha_{uq}^{t+1} = \\frac{\\sum_{s \\in S_{uq}} P(A_u = 1 \\mid C, P)}{|S_{uq}|} \\label{eq:12} \\tag{12}$$" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "But given the structure of the DBN, we can infer that if $C$ is observed then $A_u$ is independent of $P$ as the former is a parent for the attractiveness variable. We can use this to assert a simplified updating rule:" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "$$\\alpha_{uq}^{t+1} = \\frac{\\sum_{s \\in S_{uq}} P(A_u = 1 \\mid C)}{|S_{uq}|} \\label{eq:13} \\tag{13}$$" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "Which can be developed as follows:" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "$$\n", 260 | "\\begin{equation}\n", 261 | "\\begin{split}\n", 262 | " P(A_u = 1 \\mid C) & = P(A_u = 1 \\mid C_r, C_{>r}) \\\\\n", 263 | " & = \\unicode{x1D7D9}(C_r=1)\\cdot P(A_u=1 \\mid C_r = 1, C_{>r}) + \\unicode{x1D7D9}(C_r=0)\\cdot P(A_u=1 \\mid C_r = 0, C_{>r}) \\\\\n", 264 | " & = c_r + (1 - c_r) \\cdot \\left(\\unicode{x1D7D9}(C_{>r}=1) \\cdot P(A_u=1|C_r=0, C_{>r}=1) + \\unicode{x1D7D9}(C_{>r}=0) \\cdot P(A_u=1 \\mid C_r=0, C_{>r}=0)\\right) \\\\\n", 265 | " & = c_r + (1 - c_r)(1 - c_{>r}) \\cdot \\frac{P(C_r=0, C_{>r}=0 \\mid A_u=1) \\cdot P(A_u=1)}{P(C_r=0, C_{>r} = 0)}\n", 266 | "\\end{split}\\label{eq:14} \\tag{14}\n", 267 | "\\end{equation}\n", 268 | " $$" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "Where $C_u$ is the click on current rank document and $C_{>r}$ is a random variable that is 1 if there's any click above current r and 0 otherwise." 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "Now developing the numerator of (14) we have:" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "$$\n", 290 | "\\begin{equation} \n", 291 | "\\begin{split}\n", 292 | " P(C_r=0, C_{>r}=0 \\mid A_u=1) & = P(C_r=0, C_{>r}=0 \\mid A_u=1, E_r=0) \\cdot P(E_r=0) \\\\\n", 293 | " & = P(E_r=0) = 1 - \\epsilon_r\n", 294 | "\\end{split}\\label{eq:14.1} \\tag{14.1}\n", 295 | "\\end{equation}\n", 296 | " $$" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "The equation above is derived from the fact that an attractive document is only not clicked if it's not examined." 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "metadata": {}, 309 | "source": [ 310 | "The numerator is already solved, we still need to develop the denominator:" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": {}, 316 | "source": [ 317 | "$$\n", 318 | "\\begin{equation} \n", 319 | "\\begin{split}\n", 320 | " P\\left(C_r=0, C_{>r}=0\\right) = P(C{\\geq r}=0) = 1 - P(C_{\\geq r} = 1)\n", 321 | "\\end{split}\\label{eq:14.2} \\tag{14.2}\n", 322 | "\\end{equation}\n", 323 | " $$" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "metadata": {}, 329 | "source": [ 330 | "$$\n", 331 | "\\begin{equation} \n", 332 | "\\begin{split}\n", 333 | "P(C_{\\geq r} = 1) = \\epsilon_r \\cdot X_r\n", 334 | "\\end{split}\\label{eq:14.3} \\tag{14.3}\n", 335 | "\\end{equation}\n", 336 | " $$" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "$$\n", 344 | "\\begin{equation} \n", 345 | "\\begin{split}\n", 346 | "X_r & = P(C_{\\geq r} \\mid E_r=1) \\\\\n", 347 | " & = P(C_r = 1 \\mid E_r=1) + P(C_r=0, C_{\\geq r+1} \\mid E_r=1) \\\\\n", 348 | " & = \\alpha_{uq} + P(C_{\\geq r+1} \\mid C_r=0, E_r=1) \\cdot P(C_r=0|E_r=1) \\\\\n", 349 | " & = \\alpha_{uq} + P(C_{\\geq r+1} \\mid E_{r+1}) \\cdot P(E_{r+1}=1 \\mid C_r=0, E_r=1) \\cdot (1 - \\alpha_{uq}) \\\\\n", 350 | " & = \\alpha_{uq} + (1 - \\alpha_{uq})\\gamma X_{r+1}\n", 351 | "\\end{split}\\label{eq:14.4} \\tag{14.4}\n", 352 | "\\end{equation}\n", 353 | " $$" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "Finally, we have the updating rule for the attractiveness parameter:" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": {}, 366 | "source": [ 367 | "$$ \\alpha_{uq}^{(t+1)} = \\frac{\\sum_{s \\in S_{uq}}\\left(c_r^{(s)} + \\left(1 - c_r^{(s)}\\right)\\left(1 - c_{>r}^{(s)}\\right) \\cdot \\frac{\\left(1 - \\epsilon_r^{(t)}\\right)\\alpha_{uq}^{(t)}}{\\left(1 - \\epsilon_r^{(t)}X_r^{(t)} \\right)} \\right)}{|S_{uq}|} \\label{eq:15} \\tag{15}$$" 368 | ] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": {}, 373 | "source": [ 374 | "Where $\\epsilon_r$ is given by equation (11)." 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "## Satisfaction $\\sigma_{uq}$" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "In our presented DBN model, the satisfaction factor is only defined when:\n", 389 | "\n", 390 | "$$ \\sigma_{uq} = P(S_u=1 \\mid C_r=1, P_r=0)$$" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "This means the updating rule for the satisfaction term is given by:" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "$$\\sigma_{uq}^{(t+1)} = \\frac{\\sum_{s \\in S'_{uq}}P(S_u=1 \\mid C, P)}{|S'_{uq}|} \\label{eq:16} \\tag{16} $$" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "Which can be developed as:" 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "metadata": {}, 417 | "source": [ 418 | "$$\n", 419 | "\\begin{equation} \n", 420 | "\\begin{split}\n", 421 | "P(S_u=1 \\mid C, P) &= P(S_u = 1 \\mid C_r=1, P_r=0, C_{>r}=0, P_{>r}=0) \\\\\n", 422 | "&= (1 - c_{>r})\\cdot P(S_u=1 \\mid C_r=1, P_r=0, C_{>r}=0, P_{>r}=0) \\\\\n", 423 | "&= (1 - c_{>r})\\cdot \\frac{P(C_{>r}=0, P_{>r}=0 \\mid S_u=1, C_r=1, P_r=0) \\cdot P(S_u=1 \\mid C_r=1, P_r=0)}{P(C_{>r}=0, P_{>r}=0 \\mid C_r=1, P_r=0)} \\\\\n", 424 | "&= \\frac{(1 - c_r)(1-p_r)\\sigma_{uq}}{P(P_{>r}=0 \\mid C_{>r}=0, C_r=1, P_r=0) \\cdot P(C_{>r}=0 \\mid C_r=1, P_r=0)} \\\\\n", 425 | "&= \\frac{(1 - c_r)(1-p_r)\\sigma_{uq}}{1 - P(C_{\\geq r+1}=1 \\mid E_{r+1})\\cdot P(E_{r+1}\\mid C_r=1, P_r=0)} \\\\\n", 426 | "&= \\frac{(1 - c_r)(1-p_r)\\sigma_{uq}}{(1 - X_{r+1}\\cdot (1-\\alpha_{uq})\\gamma)}\n", 427 | "\\end{split}\\label{eq:17} \\tag{17}\n", 428 | "\\end{equation}\n", 429 | " $$" 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "Given equations (16) and (17), we devire that the updating rule is given by:" 437 | ] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "metadata": {}, 442 | "source": [ 443 | "$$\\sigma_{uq}^{(t+1)} = \\frac{\\sum_{s \\in S^{[1, 0]}}\\frac{(1 - c_r^{(t)})(1-p_r^{(t)})\\sigma_{uq}^{(t)}}{(1 - X_{r+1}\\cdot (1-\\alpha_{uq}^{(t)})\\gamma^{(t)})}}{|S^{[1, 0]}|} \\label{eq:18} \\tag{18}$$" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": {}, 449 | "source": [ 450 | "Where $S^{[1, 0]}$ is the set of sessions of customers interactions where at rank $r$ there's an observed click and no purchase for document $u$ and query $q$." 451 | ] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": {}, 456 | "source": [ 457 | "## Persistence $\\gamma$" 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "Persistence is defined as:" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "$$\\gamma = P(E_{r+1} = 1 \\mid E_r = 1, S_{ur}=0)$$" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "The sufficient statistics for this parameter is defined as:" 479 | ] 480 | }, 481 | { 482 | "cell_type": "markdown", 483 | "metadata": {}, 484 | "source": [ 485 | "$$ESS(z) = \\sum_{s \\in S} \\sum_r P(E_{r+1}=z, E_r=1, S_{ur}=0 \\mid C, P) \\label{eq:19} \\tag{19}$$" 486 | ] 487 | }, 488 | { 489 | "cell_type": "markdown", 490 | "metadata": {}, 491 | "source": [ 492 | "There's no closed form for this equation, so we use some techniques in order to able to compute it, like so:" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "metadata": {}, 498 | "source": [ 499 | "$$\n", 500 | "\\begin{equation} \n", 501 | "\\begin{split}\n", 502 | "ESS(z) &= \\sum_{s \\in S}\\sum_{r}\\frac{P(E_{r+1}=\\, E_r=1, S_u=0, C, P)}{P(C, P)} \\\\\n", 503 | "&= \\sum_{s \\in S}\\sum_{r}\\frac{P(E_{r+1}=\\, E_r=1, S_u=0, C, P)}{\\sum_x \\sum_y \\sum_z P(E_{r+1}=z,E_r=x, S_u=y, C, P)} \\\\\n", 504 | "&= \\sum_{s \\in S}\\sum_{r}\\frac{P(E_{r+1}=\\, E_r=1, S_u=0, C, P) \\cdot \\frac{1}{P(C_{r}, P{>r} \\mid E_{r+1}=z, E_r=x, S_u=y, C_r=c_r, P_r=p_r) \\cdot P(E_r=x, S_u=y, E_{r+1}=z, C_r=c_r, P_r=p_r \\mid C_{r}, P_{>r} \\mid E_{r+1}=z) \\cdot P(E_{r+1}=z, S_u=y, C_r=c_r, P_r=p_r \\mid E_r=x) \\cdot P(E_r=x \\mid C_{r}, P_{>r} \\mid E_{r+1}=z)$$, second is $$P(E_{r+1}=z, S_u=y, C_r=c_r, P_r=p_r \\mid E_r=x)$$ and finally $$P(E_r=x \\mid C_{r}, P_{>r} \\mid E_{r+1}=z)$ which is derived as:" 626 | ] 627 | }, 628 | { 629 | "cell_type": "markdown", 630 | "metadata": {}, 631 | "source": [ 632 | "$$\n", 633 | "\\begin{equation} \n", 634 | "\\begin{split}\n", 635 | "P(C_{>r}, P_{>r} \\mid E_{r+1}=z) &= P(C_r, P_r\\mid C_{r-1}, P_{r-1}, ..., E_l=1) \\cdot P(C_{r-1}, P_{r-1}, E_l=1) \\\\\n", 636 | " &= (1-\\alpha \\epsilon_{rl})\\left((1-c_r)(1-p_r) + (1-w)(\\alpha \\epsilon_{rl}c_r(1-p_r)) + w \\alpha \\epsilon_{rl}c_r p_r \\right) \\cdot P(C_{r-1},P_{r-1} \\mid C_{\n", 658 | " @font-face {\n", 659 | " font-family: \"Computer Modern\";\n", 660 | " src: url('http://9dbb143991406a7c655e-aa5fcb0a5a4ec34cff238a2d56ca4144.r56.cf5.rackcdn.com/cmunss.otf');\n", 661 | " }\n", 662 | " @font-face {\n", 663 | " font-family: \"Computer Modern\";\n", 664 | " font-weight: bold;\n", 665 | " src: url('http://9dbb143991406a7c655e-aa5fcb0a5a4ec34cff238a2d56ca4144.r56.cf5.rackcdn.com/cmunsx.otf');\n", 666 | " }\n", 667 | " @font-face {\n", 668 | " font-family: \"Computer Modern\";\n", 669 | " font-style: oblique;\n", 670 | " src: url('http://9dbb143991406a7c655e-aa5fcb0a5a4ec34cff238a2d56ca4144.r56.cf5.rackcdn.com/cmunsi.otf');\n", 671 | " }\n", 672 | " @font-face {\n", 673 | " font-family: \"Computer Modern\";\n", 674 | " font-weight: bold;\n", 675 | " font-style: oblique;\n", 676 | " src: url('http://9dbb143991406a7c655e-aa5fcb0a5a4ec34cff238a2d56ca4144.r56.cf5.rackcdn.com/cmunso.otf');\n", 677 | " }\n", 678 | " div.cell{\n", 679 | " width:800px;\n", 680 | " margin-left:16% !important;\n", 681 | " margin-right:auto;\n", 682 | " }\n", 683 | " h1 {\n", 684 | " font-family: Helvetica, serif;\n", 685 | " }\n", 686 | " h4{\n", 687 | " margin-top:12px;\n", 688 | " margin-bottom: 3px;\n", 689 | " }\n", 690 | " div.text_cell_render{\n", 691 | " font-family: Computer Modern, \"Helvetica Neue\", Arial, Helvetica, Geneva, sans-serif;\n", 692 | " line-height: 145%;\n", 693 | " font-size: 130%;\n", 694 | " width:800px;\n", 695 | " margin-left:auto;\n", 696 | " margin-right:auto;\n", 697 | " }\n", 698 | " .CodeMirror{\n", 699 | " font-family: \"Source Code Pro\", source-code-pro,Consolas, monospace;\n", 700 | " }\n", 701 | " .prompt{\n", 702 | " display: None;\n", 703 | " }\n", 704 | " .text_cell_render h5 {\n", 705 | " font-weight: 300;\n", 706 | " font-size: 22pt;\n", 707 | " color: #4057A1;\n", 708 | " font-style: italic;\n", 709 | " margin-bottom: .5em;\n", 710 | " margin-top: 0.5em;\n", 711 | " display: block;\n", 712 | " }\n", 713 | " \n", 714 | " .warning{\n", 715 | " color: rgb( 240, 20, 20 )\n", 716 | " } \n", 717 | "\n", 718 | "" 733 | ], 734 | "text/plain": [ 735 | "" 736 | ] 737 | }, 738 | "execution_count": 1, 739 | "metadata": {}, 740 | "output_type": "execute_result" 741 | } 742 | ], 743 | "source": [ 744 | "from IPython.core.display import HTML,display\n", 745 | "\n", 746 | "\n", 747 | "def css_styling():\n", 748 | " styles = open(\"styles/custom.css\", \"r\").read()\n", 749 | " return HTML(styles)\n", 750 | "\n", 751 | "\n", 752 | "css_styling()" 753 | ] 754 | } 755 | ], 756 | "metadata": { 757 | "kernelspec": { 758 | "display_name": "Python 3", 759 | "language": "python", 760 | "name": "python3" 761 | }, 762 | "language_info": { 763 | "codemirror_mode": { 764 | "name": "ipython", 765 | "version": 3 766 | }, 767 | "file_extension": ".py", 768 | "mimetype": "text/x-python", 769 | "name": "python", 770 | "nbconvert_exporter": "python", 771 | "pygments_lexer": "ipython3", 772 | "version": "3.6.9" 773 | } 774 | }, 775 | "nbformat": 4, 776 | "nbformat_minor": 4 777 | } 778 | --------------------------------------------------------------------------------