├── .gitignore
├── .gitmodules
├── .travis.yml
├── .travis
    └── build-wheels.sh
├── COPYING
├── MANIFEST.in
├── Makefile
├── README.md
├── doc
    ├── Makefile
    ├── api.rst
    ├── conf.py
    ├── dev_notes.txt
    ├── guide.rst
    ├── index.rst
    ├── make.bat
    └── tutorial.rst
├── examples
    ├── warm_start_als.py
    └── warm_start_mcmc.py
├── fastFM
    ├── __init__.py
    ├── als.py
    ├── base.py
    ├── bpr.py
    ├── cffm.pxd
    ├── datasets.py
    ├── ffm.pyx
    ├── mcmc.py
    ├── sgd.py
    ├── tests
    │   ├── test_als.py
    │   ├── test_base.py
    │   ├── test_datasets.py
    │   ├── test_ffm.py
    │   ├── test_mcmc.py
    │   ├── test_ranking.py
    │   ├── test_sgd.py
    │   └── test_utils.py
    ├── utils.py
    └── validation.py
├── requirements.txt
├── setup.cfg
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # C
 2 | *.swp
 3 | *.o
 4 | *.a
 5 | *.so
 6 | *.zip
 7 | # latex
 8 | *.aux
 9 | *.bbl
10 | *.blg
11 | *.dvi
12 | *.log
13 | *.toc
14 | # python
15 | *.pyc
16 | fastFM/ffm.c
17 | fastFM.egg-info/
18 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "fastFM-core"]
2 | 	path = fastFM-core
3 | 	url = https://github.com/ibayer/fastFM-core.git
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: c
 2 | 
 3 | matrix:
 4 |   include:
 5 |     - os: osx
 6 |       env:
 7 |         - TRAVIS_PYTHON_VERSION="2.7"
 8 |         - DEPLOYABLE="true"
 9 |     - os: osx
10 |       env:
11 |         - TRAVIS_PYTHON_VERSION="3.5"
12 |         - DEPLOYABLE="true"
13 |     - os: osx
14 |       env:
15 |         - TRAVIS_PYTHON_VERSION="3.6"
16 |         - DEPLOYABLE="true"
17 |     - os: linux
18 |       env:
19 |         - TRAVIS_PYTHON_VERSION="2.7"
20 |     - os: linux
21 |       env:
22 |         - TRAVIS_PYTHON_VERSION="3.5"
23 |     - os: linux
24 |       env:
25 |         - TRAVIS_PYTHON_VERSION="3.6"
26 |     #- services: docker
27 |     #  sudo: required
28 |     #  env:
29 |     #    - DEPLOY_TARGET="manylinux1"
30 |     #    - DEPLOYABLE="true"
31 | 
32 | dist: trusty
33 | 
34 | before_install:
35 |     - |
36 |       # Skip if manylinux1
37 |       if [ "$DEPLOY_TARGET" = "manylinux1" ]; then
38 |         echo "Skip before_install step..."
39 |       else
40 |         # fastFM-core depends on cblas
41 |         if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo apt-get update -qq; sudo apt-get install -y libopenblas-dev; fi
42 |         if [[ "$TRAVIS_PYTHON_VERSION" =~ "^2" ]]; then
43 |           if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
44 |             wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
45 |           else
46 |             wget https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh -O miniconda.sh;
47 |           fi
48 |         else
49 |           if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
50 |             wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
51 |           else
52 |             wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh;
53 |           fi
54 |         fi
55 |         bash miniconda.sh -b -p $HOME/miniconda
56 |         export PATH="$HOME/miniconda/bin:$PATH"
57 |         hash -r
58 |         conda config --set always_yes yes --set changeps1 no
59 |         conda update -q conda
60 |         # Useful for debugging any issues with conda
61 |         conda info -a
62 |         conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION cython numpy pandas scipy scikit-learn nose
63 |         source activate test-environment
64 |       fi
65 | 
66 | install:
67 |     - |
68 |       git submodule update --init --recursive
69 |       if [ "$DEPLOY_TARGET" = "manylinux1" ]; then
70 |         :
71 |       else
72 |         make
73 |         python setup.py bdist_wheel
74 |         pip install dist/*.whl
75 |       fi
76 | 
77 | script:
78 |     - |
79 |       if [ "$DEPLOY_TARGET" = "manylinux1" ]; then
80 |         #build for 64-bit
81 |         docker run --rm -v `pwd`:/io quay.io/pypa/manylinux1_x86_64 /io/.travis/build-wheels.sh
82 |       else
83 |         nosetests
84 |       fi
85 | 
86 | deploy:
87 |   provider: releases
88 |   api_key:
89 |     secure: AJcZoe2+OiMJ4VlSkASAeMc/ii0ZRnj2PFaaL7zlSbx1THMpY/49U5BSyqX1PQioPSlTV3ZsIXI3u7KyqoXIQSXWzAuaBzpLTLS85fGSuTvUuexmaJtKU92OC143tuVVLCPnjC992+1uyctjrxMSqgoaUolfYkEftt5RGrMIKl2duGfDXrPXIueHSl8FQGXkmlY6NqkRx2v5kxsAjFcurvwTNU8ptJ84jVKjrE6t1IB61vp2eUcqVR/z6Lwau6mdvIybglnbH4lCMXP98zEIibLA8vbn3XxrC+0uU7Kjz37K6/CsJEPNL5tujJDMRKAupnrkgPsAGTpsAn6O6uLUz0ISgcen8R6KJ7cBli+cq08OZ3JLLoJpqkni62YVSQV+uYkQk9b5Pu09vUTOozJMnOqLSj9hVIswyxGiFPcTFskMgqMdx15M59gd0YpXH633YqwBgRmWNsctp4BKnTaE3iGW6aZc8lrXxpL7qcVAosjmpjLp3jiPXVSRdYf0yHl6pDUj5ZVyu27kAn1/I9JL0nH19zjXF2tUlEjuT9ydHwnhmsgBN/V+JhZxi7ZeEbOZfY1MfekKM/NwSRehVEp/J0XWqWg+kIXRU/rqY1/w0vLVNFeQirpEjUp39eCBydXeS3Bik8uANW2UTxojJo3LBfLLoAT8ZWFb3YrIBAYkzjc=
90 |   file_glob: true
91 |   file: dist/fastFM-*.whl
92 |   skip_cleanup: true
93 |   on:
94 |     tags: true
95 |     condision: $DEPLOYEABLE = "true"
96 | 


--------------------------------------------------------------------------------
/.travis/build-wheels.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | #Author: Likhith Chitneni
 4 | #License: BSD 3 Clause license - https://opensource.org/licenses/BSD-3-Clause 
 5 | #
 6 | 
 7 | set -e -x
 8 | 
 9 | # Install any system packages required here
10 | #yum install -y $PACKAGE_TO_BE_INSTALLED
11 | 
12 | #Remove Python 2.6 and 3.3 since numpy requires >=2.7 or >=3.4
13 | rm -rf /opt/python/cpython-2.6.9-*
14 | rm -rf /opt/python/cp33-cp33m
15 | 
16 | #Make fastFM-core
17 | cd /io/fastFM-core
18 | make clean && make
19 | cd /
20 | 
21 | #Compile wheels
22 | for PYBIN in /opt/python/*/bin; do
23 |     "${PYBIN}/pip" install -r /io/requirements.txt
24 |     "${PYBIN}/pip" wheel /io/ -w wheelhouse/
25 | done
26 | 
27 | # Bundle external shared libraries into the wheels
28 | for whl in wheelhouse/*.whl; do
29 |     auditwheel repair "$whl" -w /io/wheelhouse/
30 | done
31 | 
32 | # Install packages and test
33 | for PYBIN in /opt/python/*/bin; do
34 |     "${PYBIN}/pip" install fastFM --no-index -f /io/wheelhouse
35 |     "${PYBIN}/pip" install nose
36 |     (cd "$HOME"; "${PYBIN}/nosetests" /io/fastFM/tests)
37 | done
38 | 
39 | mv /io/wheelhouse /io/dist
40 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
 1 | New BSD License
 2 | 
 3 | Copyright (c) 2014–2015 Immanuel Bayer
 4 | All rights reserved.
 5 | 
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 |   a. Redistributions of source code must retain the above copyright notice,
11 |      this list of conditions and the following disclaimer.
12 |   b. Redistributions in binary form must reproduce the above copyright
13 |      notice, this list of conditions and the following disclaimer in the
14 |      documentation and/or other materials provided with the distribution.
15 |   c. Neither the name of the developers  nor the names of
16 |      its contributors may be used to endorse or promote products
17 |      derived from this software without specific prior written
18 |      permission. 
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 | DAMAGE.
32 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.rst
 2 | include fastFM/ffm.c
 3 | include fastFM/ffm.pxy
 4 | include fastFM/cffm.pxd
 5 | recursive-include fastFM-core/include *
 6 | include fastFM-core/bin/libfastfm.a
 7 | include fastFM-core/externals/CXSparse/Lib/libcxsparse.a
 8 | recursive-include fastFM-core/externals/CXSparse/Include *
 9 | recursive-include fastFM-core/externals/CXSparse/SuiteSparse_config *
10 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PYTHON ?= python
 2 | 
 3 | all:
 4 | 	( cd fastFM-core ; $(MAKE) lib )
 5 | 	$(PYTHON) setup.py build_ext --inplace
 6 | 
 7 | .PHONY : clean
 8 | clean:
 9 | 	( cd fastFM-core ; $(MAKE) clean )
10 | 	cd fastFM/
11 | 	rm -f *.so
12 | 	rm -rf build/
13 | 	rm -f fastFM/ffm.c
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Citing fastFM
  2 | =============
  3 | 
  4 | The library fastFM is an academic project. The time and resources spent
  5 | developing fastFM are therefore justified by the number of citations of
  6 | the software. If you publish scientific articles using fastFM, please
  7 | cite the following article (bibtex entry
  8 | [citation.bib](http://jmlr.org/papers/v17/15-355.bib)).
  9 | 
 10 | > Bayer, I. \"fastFM: A Library for Factorization Machines\" Journal of
 11 | > Machine Learning Research 17, pp. 1-5 (2016)
 12 | 
 13 | fastFM: A Library for Factorization Machines
 14 | ============================================
 15 | 
 16 | [![image](https://travis-ci.org/ibayer/fastFM.svg?branch=master)](https://travis-ci.org/ibayer/fastFM)
 17 | [![image](https://img.shields.io/badge/platform-OSX%7CLinux-lightgrey.svg)](https://travis-ci.org/ibayer/fastFM)
 18 | [![image](https://img.shields.io/pypi/l/Django.svg)](https://travis-ci.org/ibayer/fastFM)
 19 | 
 20 | This repository allows you to use Factorization Machines in **Python**
 21 | (2.7 & 3.x) with the well known **scikit-learn API**. All performance
 22 | critical code has been written in C and wrapped with Cython. fastFM
 23 | provides stochastic gradient descent (SGD) and coordinate descent (CD)
 24 | optimization routines as well as Markov Chain Monte Carlo (MCMC) for
 25 | Bayesian inference. The solvers can be used for regression,
 26 | classification and ranking problems. Detailed usage instructions can be
 27 | found in the [online documentation](http://ibayer.github.io/fastFM) and
 28 | on [arXiv](http://arxiv.org/abs/1505.00641).
 29 | 
 30 | Supported Operating Systems
 31 | ---------------------------
 32 | 
 33 | fastFM has a continuous integration / testing servers (Travis) for
 34 | **Linux (Ubuntu 14.04 LTS)** and **OS X Mavericks**. Other OSs are not
 35 | actively supported.
 36 | 
 37 | Usage
 38 | -----
 39 | 
 40 | ``` {.python}
 41 | from fastFM import als
 42 | fm = als.FMRegression(n_iter=1000, init_stdev=0.1, rank=2, l2_reg_w=0.1, l2_reg_V=0.5)
 43 | fm.fit(X_train, y_train)
 44 | y_pred = fm.predict(X_test)
 45 | ```
 46 | 
 47 | Tutorials and other information are available
 48 | [here](http://arxiv.org/abs/1505.00641). The C code is available as
 49 | [subrepository](https://github.com/ibayer/fastFM-core) and provides a
 50 | stand alone command line interface. If you still have **questions**
 51 | after reading the documentation please open an issue at GitHub.
 52 | 
 53 | | Task       | Solver     | Loss     |
 54 | | :------------- | :----------: | -----------: |
 55 | | Regression     | als, mcmc, sgd | Square Loss                 |
 56 | | Classification | als, mcmc, sgd | Probit(Map), Probit, Sigmoid|
 57 | | Ranking        | sgd            | BPR                         |
 58 | 
 59 | *Supported solvers and tasks*
 60 | 
 61 | Installation
 62 | ------------
 63 | 
 64 | **binary install (64bit only)**
 65 | 
 66 | `pip install fastFM`
 67 | 
 68 | **source install**
 69 | 
 70 | *Please make sure, that Python and OS bit version agree, e.g. 32bit
 71 | Python on 64bit OS won\'t work.*
 72 | 
 73 | ``` {.bash}
 74 | # Install cblas and python-dev header (Linux only).
 75 | # - cblas can be installed with libatlas-base-dev or libopenblas-dev (Ubuntu)
 76 | $ sudo apt-get install python-dev libopenblas-dev
 77 | 
 78 | # Clone the repo including submodules (or clone + `git submodule update --init --recursive`)
 79 | $ git clone --recursive https://github.com/ibayer/fastFM.git
 80 | 
 81 | # Enter the root directory
 82 | $ cd fastFM
 83 | 
 84 | # Install Python dependencies (Cython>=0.22, numpy, pandas, scipy, scikit-learn)
 85 | $ pip install -r ./requirements.txt
 86 | 
 87 | # Compile the C extension.
 88 | $ make                      # build with default python version (python)
 89 | $ PYTHON=python3 make       # build with custom python version (python3)
 90 | 
 91 | # Install fastFM
 92 | $ pip install .
 93 | ```
 94 | 
 95 | Tests
 96 | -----
 97 | 
 98 | The Python tests (`pip install nose`) can be run with:
 99 | `nosetests fastFM/fastFM/tests`
100 | 
101 | Please refer to the fastFM-core README for instruction on how to run the
102 | C tests at `fastFM/fastFM-core/src/tests`.
103 | 
104 | Contribution
105 | ------------
106 | 
107 | -   Star this repository: keeps contributors motivated
108 | -   Open an issue: report bugs or suggest improvements
109 | -   Fix errors in the documentation: small changes matter
110 | -   Contribute code
111 | 
112 | **Contributions are very welcome!** Since this project lives on GitHub
113 | we recommend to open a pull request (PR) for code contributions as early
114 | as possible. This is the fastest way to get feedback and allows [Travis
115 | CI](https://travis-ci.org/ibayer/fastFM) to run checks on your changes.
116 | 
117 | Most information you need to setup your **development environment** can
118 | be learned by adapting the great instructions on
119 | <https://github.com/scikit-learn/scikit-learn/blob/master/CONTRIBUTING.md>
120 | . Please ensure that your contribution conforms to the
121 | [PEP8](http://www.python.org/dev/peps/pep-0008/) Coding Style and
122 | includes unit tests where appropriate. More valuable guidelines that
123 | apply to fastFM can be found at
124 | <http://scikit-learn.org/stable/developers/contributing.html#coding-guidelines>
125 | .
126 | 
127 | **Contributors**
128 | 
129 | -   [aaossa](https://github.com/aaossa/)
130 | -   [altimin](https://github.com/altimin)
131 | -   [bdaskalov](https://github.com/bdaskalov)
132 | -   [chezou](https://github.com/chezou)
133 | -   [macks22](https://github.com/macks22)
134 | -   [takuti](https://github.com/takuti)
135 | -   [ibayer](https://github.com/ibayer)
136 | 
137 | License: BSD
138 | ------------
139 | 
140 | <!-- Matomo Image Tracker-->
141 | <img referrerpolicy="no-referrer-when-downgrade" src="https://matomo.palaimon.io/matomo.php?idsite=2&amp;rec=1" style="border:0" alt="" />
142 | <!-- End Matomo -->
143 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  applehelp  to make an Apple Help Book"
 34 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 35 | 	@echo "  epub       to make an epub"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | html:
 55 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 58 | 
 59 | dirhtml:
 60 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 61 | 	@echo
 62 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 63 | 
 64 | singlehtml:
 65 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 66 | 	@echo
 67 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 68 | 
 69 | pickle:
 70 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 71 | 	@echo
 72 | 	@echo "Build finished; now you can process the pickle files."
 73 | 
 74 | json:
 75 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 76 | 	@echo
 77 | 	@echo "Build finished; now you can process the JSON files."
 78 | 
 79 | htmlhelp:
 80 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 81 | 	@echo
 82 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 83 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 84 | 
 85 | qthelp:
 86 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 89 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 90 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/fastFM.qhcp"
 91 | 	@echo "To view the help file:"
 92 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/fastFM.qhc"
 93 | 
 94 | applehelp:
 95 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
 96 | 	@echo
 97 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
 98 | 	@echo "N.B. You won't be able to view it unless you put it in" \
 99 | 	      "~/Library/Documentation/Help or install it in your application" \
100 | 	      "bundle."
101 | 
102 | devhelp:
103 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
104 | 	@echo
105 | 	@echo "Build finished."
106 | 	@echo "To view the help file:"
107 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/fastFM"
108 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/fastFM"
109 | 	@echo "# devhelp"
110 | 
111 | epub:
112 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
113 | 	@echo
114 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
115 | 
116 | latex:
117 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
118 | 	@echo
119 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
120 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
121 | 	      "(use \`make latexpdf' here to do that automatically)."
122 | 
123 | latexpdf:
124 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
125 | 	@echo "Running LaTeX files through pdflatex..."
126 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
127 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
128 | 
129 | latexpdfja:
130 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
131 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
132 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
133 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
134 | 
135 | text:
136 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
137 | 	@echo
138 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
139 | 
140 | man:
141 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
142 | 	@echo
143 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
144 | 
145 | texinfo:
146 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
147 | 	@echo
148 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
149 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
150 | 	      "(use \`make info' here to do that automatically)."
151 | 
152 | info:
153 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
154 | 	@echo "Running Texinfo files through makeinfo..."
155 | 	make -C $(BUILDDIR)/texinfo info
156 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
157 | 
158 | gettext:
159 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
160 | 	@echo
161 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
162 | 
163 | changes:
164 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
165 | 	@echo
166 | 	@echo "The overview file is in $(BUILDDIR)/changes."
167 | 
168 | linkcheck:
169 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
170 | 	@echo
171 | 	@echo "Link check complete; look for any errors in the above output " \
172 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
173 | 
174 | doctest:
175 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
176 | 	@echo "Testing of doctests in the sources finished, look at the " \
177 | 	      "results in $(BUILDDIR)/doctest/output.txt."
178 | 
179 | coverage:
180 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
181 | 	@echo "Testing of coverage in the sources finished, look at the " \
182 | 	      "results in $(BUILDDIR)/coverage/python.txt."
183 | 
184 | xml:
185 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
186 | 	@echo
187 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
188 | 
189 | pseudoxml:
190 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
191 | 	@echo
192 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
193 | 


--------------------------------------------------------------------------------
/doc/api.rst:
--------------------------------------------------------------------------------
 1 | The fastFM API reference
 2 | ========================
 3 | 
 4 | 
 5 | The MCMC module
 6 | ---------------
 7 | 
 8 | .. automodule:: fastFM.mcmc
 9 |     :members:
10 |     :inherited-members: predict
11 | 
12 | The ALS module
13 | ---------------
14 | 
15 | .. automodule:: fastFM.als
16 |     :members:
17 |     :inherited-members: predict
18 | 
19 | The SGD module
20 | --------------
21 | 
22 | .. automodule:: fastFM.sgd
23 |     :members:
24 |     :inherited-members: predict
25 | 
26 | The Ranking module
27 | ------------------
28 | 
29 | .. automodule:: fastFM.bpr
30 |     :members:
31 |     :inherited-members: predict
32 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # fastFM documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Jun 15 01:42:19 2015.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | import shlex
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another directory,
 20 | # add these directories to sys.path here. If the directory is relative to the
 21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 22 | #sys.path.insert(0, os.path.abspath('.'))
 23 | #sys.path.insert(0, os.path.abspath('../fastFM'))
 24 | sys.path.append(os.path.abspath('../fastFM'))
 25 | 
 26 | # -- General configuration ------------------------------------------------
 27 | 
 28 | # If your documentation needs a minimal Sphinx version, state it here.
 29 | #needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = [
 35 |     'matplotlib.sphinxext.plot_directive',
 36 |     'sphinx.ext.autodoc',
 37 |     'sphinx.ext.doctest',
 38 |     'sphinx.ext.mathjax',
 39 |     'sphinx.ext.viewcode',
 40 |     'sphinx.ext.napoleon',
 41 | ]
 42 | 
 43 | # Add any paths that contain templates here, relative to this directory.
 44 | templates_path = ['_templates']
 45 | 
 46 | # The suffix(es) of source filenames.
 47 | # You can specify multiple suffix as a list of string:
 48 | # source_suffix = ['.rst', '.md']
 49 | source_suffix = '.rst'
 50 | 
 51 | # The encoding of source files.
 52 | #source_encoding = 'utf-8-sig'
 53 | 
 54 | # The master toctree document.
 55 | master_doc = 'index'
 56 | 
 57 | # General information about the project.
 58 | project = u'fastFM'
 59 | copyright = u'2016, Immanuel Bayer'
 60 | author = u'Immanuel Bayer'
 61 | 
 62 | # The version info for the project you're documenting, acts as replacement for
 63 | # |version| and |release|, also used in various other places throughout the
 64 | # built documents.
 65 | #
 66 | # The short X.Y version.
 67 | version = '0.2'
 68 | # The full version, including alpha/beta/rc tags.
 69 | release = '0.2.11'
 70 | 
 71 | # The language for content autogenerated by Sphinx. Refer to documentation
 72 | # for a list of supported languages.
 73 | #
 74 | # This is also used if you do content translation via gettext catalogs.
 75 | # Usually you set "language" from the command line for these cases.
 76 | language = None
 77 | 
 78 | # There are two options for replacing |today|: either, you set today to some
 79 | # non-false value, then it is used:
 80 | #today = ''
 81 | # Else, today_fmt is used as the format for a strftime call.
 82 | #today_fmt = '%B %d, %Y'
 83 | 
 84 | # List of patterns, relative to source directory, that match files and
 85 | # directories to ignore when looking for source files.
 86 | exclude_patterns = ['_build']
 87 | 
 88 | # The reST default role (used for this markup: `text`) to use for all
 89 | # documents.
 90 | #default_role = None
 91 | 
 92 | # If true, '()' will be appended to :func: etc. cross-reference text.
 93 | #add_function_parentheses = True
 94 | 
 95 | # If true, the current module name will be prepended to all description
 96 | # unit titles (such as .. function::).
 97 | #add_module_names = True
 98 | 
 99 | # If true, sectionauthor and moduleauthor directives will be shown in the
100 | # output. They are ignored by default.
101 | #show_authors = False
102 | 
103 | # The name of the Pygments (syntax highlighting) style to use.
104 | #pygments_style = 'sphinx'
105 | pygments_style = 'colorful'
106 | 
107 | # A list of ignored prefixes for module index sorting.
108 | #modindex_common_prefix = []
109 | 
110 | # If true, keep warnings as "system message" paragraphs in the built documents.
111 | #keep_warnings = False
112 | 
113 | # If true, `todo` and `todoList` produce output, else they produce nothing.
114 | todo_include_todos = False
115 | 
116 | 
117 | # -- Options for HTML output ----------------------------------------------
118 | 
119 | # The theme to use for HTML and HTML Help pages.  See the documentation for
120 | # a list of builtin themes.
121 | #html_theme = 'alabaster'
122 | html_theme = 'haiku'
123 | 
124 | # Theme options are theme-specific and customize the look and feel of a theme
125 | # further.  For a list of options available for each theme, see the
126 | # documentation.
127 | #html_theme_options = {}
128 | 
129 | # Add any paths that contain custom themes here, relative to this directory.
130 | #html_theme_path = []
131 | 
132 | # The name for this set of Sphinx documents.  If None, it defaults to
133 | # "<project> v<release> documentation".
134 | #html_title = None
135 | 
136 | # A shorter title for the navigation bar.  Default is the same as html_title.
137 | #html_short_title = None
138 | 
139 | # The name of an image file (relative to this directory) to place at the top
140 | # of the sidebar.
141 | #html_logo = None
142 | 
143 | # The name of an image file (within the static path) to use as favicon of the
144 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
145 | # pixels large.
146 | #html_favicon = None
147 | 
148 | # Add any paths that contain custom static files (such as style sheets) here,
149 | # relative to this directory. They are copied after the builtin static files,
150 | # so a file named "default.css" will overwrite the builtin "default.css".
151 | html_static_path = ['_static']
152 | 
153 | # Add any extra paths that contain custom files (such as robots.txt or
154 | # .htaccess) here, relative to this directory. These files are copied
155 | # directly to the root of the documentation.
156 | #html_extra_path = []
157 | 
158 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
159 | # using the given strftime format.
160 | #html_last_updated_fmt = '%b %d, %Y'
161 | 
162 | # If true, SmartyPants will be used to convert quotes and dashes to
163 | # typographically correct entities.
164 | #html_use_smartypants = True
165 | 
166 | # Custom sidebar templates, maps document names to template names.
167 | #html_sidebars = {}
168 | 
169 | # Additional templates that should be rendered to pages, maps page names to
170 | # template names.
171 | #html_additional_pages = {}
172 | 
173 | # If false, no module index is generated.
174 | #html_domain_indices = True
175 | 
176 | # If false, no index is generated.
177 | #html_use_index = True
178 | 
179 | # If true, the index is split into individual pages for each letter.
180 | #html_split_index = False
181 | 
182 | # If true, links to the reST sources are added to the pages.
183 | #html_show_sourcelink = True
184 | 
185 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
186 | #html_show_sphinx = True
187 | 
188 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
189 | #html_show_copyright = True
190 | 
191 | # If true, an OpenSearch description file will be output, and all pages will
192 | # contain a <link> tag referring to it.  The value of this option must be the
193 | # base URL from which the finished HTML is served.
194 | #html_use_opensearch = ''
195 | 
196 | # This is the file name suffix for HTML files (e.g. ".xhtml").
197 | #html_file_suffix = None
198 | 
199 | # Language to be used for generating the HTML full-text search index.
200 | # Sphinx supports the following languages:
201 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
202 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
203 | #html_search_language = 'en'
204 | 
205 | # A dictionary with options for the search language support, empty by default.
206 | # Now only 'ja' uses this config value
207 | #html_search_options = {'type': 'default'}
208 | 
209 | # The name of a javascript file (relative to the configuration directory) that
210 | # implements a search results scorer. If empty, the default will be used.
211 | #html_search_scorer = 'scorer.js'
212 | 
213 | # Output file base name for HTML help builder.
214 | htmlhelp_basename = 'fastFMdoc'
215 | 
216 | # -- Options for LaTeX output ---------------------------------------------
217 | 
218 | latex_elements = {
219 | # The paper size ('letterpaper' or 'a4paper').
220 | #'papersize': 'letterpaper',
221 | 
222 | # The font size ('10pt', '11pt' or '12pt').
223 | #'pointsize': '10pt',
224 | 
225 | # Additional stuff for the LaTeX preamble.
226 | #'preamble': '',
227 | 
228 | # Latex figure (float) alignment
229 | #'figure_align': 'htbp',
230 | }
231 | 
232 | # Grouping the document tree into LaTeX files. List of tuples
233 | # (source start file, target name, title,
234 | #  author, documentclass [howto, manual, or own class]).
235 | latex_documents = [
236 |   (master_doc, 'fastFM.tex', u'fastFM Documentation',
237 |    u'Immanuel Bayer', 'manual'),
238 | ]
239 | 
240 | # The name of an image file (relative to this directory) to place at the top of
241 | # the title page.
242 | #latex_logo = None
243 | 
244 | # For "manual" documents, if this is true, then toplevel headings are parts,
245 | # not chapters.
246 | #latex_use_parts = False
247 | 
248 | # If true, show page references after internal links.
249 | #latex_show_pagerefs = False
250 | 
251 | # If true, show URL addresses after external links.
252 | #latex_show_urls = False
253 | 
254 | # Documents to append as an appendix to all manuals.
255 | #latex_appendices = []
256 | 
257 | # If false, no module index is generated.
258 | #latex_domain_indices = True
259 | 
260 | 
261 | # -- Options for manual page output ---------------------------------------
262 | 
263 | # One entry per manual page. List of tuples
264 | # (source start file, name, description, authors, manual section).
265 | man_pages = [
266 |     (master_doc, 'fastfm', u'fastFM Documentation',
267 |      [author], 1)
268 | ]
269 | 
270 | # If true, show URL addresses after external links.
271 | #man_show_urls = False
272 | 
273 | 
274 | # -- Options for Texinfo output -------------------------------------------
275 | 
276 | # Grouping the document tree into Texinfo files. List of tuples
277 | # (source start file, target name, title, author,
278 | #  dir menu entry, description, category)
279 | texinfo_documents = [
280 |   (master_doc, 'fastFM', u'fastFM Documentation',
281 |    author, 'fastFM', 'One line description of project.',
282 |    'Miscellaneous'),
283 | ]
284 | 
285 | # Documents to append as an appendix to all manuals.
286 | #texinfo_appendices = []
287 | 
288 | # If false, no module index is generated.
289 | #texinfo_domain_indices = True
290 | 
291 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
292 | #texinfo_show_urls = 'footnote'
293 | 
294 | # If true, do not generate a @detailmenu in the "Top" node's menu.
295 | #texinfo_no_detailmenu = False
296 | 


--------------------------------------------------------------------------------
/doc/dev_notes.txt:
--------------------------------------------------------------------------------
 1 | update doc:
 2 | ghp-import doc/_build/html/ -p -n
 3 | 
 4 | -n          Include a .nojekyll file in the branch.
 5 | 
 6 | for new releases:
 7 | 
 8 | - update version & release in `conf.py`
 9 | - make sure doc is still correct `make doctest`
10 | 
11 | run test server
12 | cd fastFM/doc/
13 | python -m SimpleHTTPServer
14 | 


--------------------------------------------------------------------------------
/doc/guide.rst:
--------------------------------------------------------------------------------
  1 | Guide
  2 | =====
  3 | 
  4 | How to choose the right Solver.
  5 | -------------------------------
  6 | 
  7 | This section explains the trade off between the three solvers available in fastFM.
  8 | The following applies for both **classification** and **regression** tasks.
  9 | 
 10 | .. testcode::
 11 | 
 12 |     import fastFM.mcmc
 13 | 
 14 | - (+) smallest number of hyper parameter
 15 | - (+) automatic regularization
 16 | - (-) predictions need to be calculated at training time
 17 | 
 18 | `Note: The predict method of the mcmc model returns predictions based on only
 19 | the last draw of the model parameters. This evaluation is fast
 20 | but usually of low quality. Don't use mcmc if you need fast predictions!`
 21 | 
 22 | .. testcode::
 23 | 
 24 |     import fastFM.als
 25 | 
 26 | - (+) fast predictions
 27 | - (+) less hyper parameter then SGD
 28 | - (-) regularization must be specified
 29 | 
 30 | .. testcode::
 31 | 
 32 |     import fastFM.sgd
 33 | 
 34 | - (+) fast predictions
 35 | - (+) can iterate over large datasets (split and iterate over junks using warm start)
 36 | - (-) regularization must be specified
 37 | - (-) highest number of hyper parameter (requires, `step_size`)
 38 | 
 39 | 
 40 | Learning Curves
 41 | ---------------
 42 | 
 43 | Learning curves are an important tool to understand the model behavior and
 44 | enable us to use techniques such as early stopping to avoid over fitting. We can
 45 | `warm_start` every fastFM model which allows us to calculate custom statistics during
 46 | the model fitting process efficiently. The following example uses `RMSE` and
 47 | `R^2` to demonstrate how we can monitor model performance on train and test set
 48 | efficiently. Please note that we can replace them with any metric we want.
 49 | 
 50 | .. plot::
 51 |    :include-source:
 52 | 
 53 |     from fastFM import als
 54 |     from fastFM.datasets import make_user_item_regression
 55 |     from sklearn.metrics import mean_squared_error, r2_score
 56 |     import numpy as np
 57 | 
 58 |     X, y, coef = make_user_item_regression(label_stdev=.4)
 59 |     from sklearn.model_selection import train_test_split
 60 |     X_train, X_test, y_train, y_test = train_test_split(
 61 |         X, y, test_size=0.33, random_state=42)
 62 | 
 63 |     n_iter = 20
 64 |     step_size = 1
 65 |     l2_reg_w = 0
 66 |     l2_reg_V = 0
 67 | 
 68 |     fm = als.FMRegression(n_iter=0, l2_reg_w=0.1, l2_reg_V=0.1, rank=4)
 69 |     # Allocates and initalizes the model parameter.
 70 |     fm.fit(X_train, y_train)
 71 | 
 72 |     rmse_train = []
 73 |     rmse_test = []
 74 |     r2_score_train = []
 75 |     r2_score_test = []
 76 | 
 77 |     for i in range(1, n_iter):
 78 |         fm.fit(X_train, y_train, n_more_iter=step_size)
 79 |         y_pred = fm.predict(X_test)
 80 | 
 81 |         rmse_train.append(np.sqrt(mean_squared_error(fm.predict(X_train), y_train)))
 82 |         rmse_test.append(np.sqrt(mean_squared_error(fm.predict(X_test), y_test)))
 83 | 
 84 |         r2_score_train.append(r2_score(fm.predict(X_train), y_train))
 85 |         r2_score_test.append(r2_score(fm.predict(X_test), y_test))
 86 | 
 87 | 
 88 |     from matplotlib import pyplot as plt
 89 |     fig, axes = plt.subplots(ncols=2, figsize=(15, 4))
 90 | 
 91 |     x = np.arange(1, n_iter) * step_size
 92 |     with plt.style.context('fivethirtyeight'):
 93 |         axes[0].plot(x, rmse_train, label='RMSE-train', color='r', ls="--")
 94 |         axes[0].plot(x, rmse_test, label='RMSE-test', color='r')
 95 |         axes[1].plot(x, r2_score_train, label='R^2-train', color='b', ls="--")
 96 |         axes[1].plot(x, r2_score_test, label='R^2-test', color='b')
 97 |     axes[0].set_ylabel('RMSE', color='r')
 98 |     axes[1].set_ylabel('R^2', color='b')
 99 |     axes[0].legend()
100 |     axes[1].legend()
101 | 
102 | Visualizing MCMC Traces
103 | -----------------------
104 | 
105 | Our MCMC implementation samples model and hyper parameter at every iteration
106 | and calculates a running mean of the predictions. MCMC traces are an important tool
107 | for evaluating convergence and mixing behavior MCMC chains. The following example
108 | demonstrates how to calculate statistics for predictions, hyper parameter and
109 | model parameter efficiently using the `warm_start` option.
110 | 
111 | 
112 | .. plot::
113 |    :include-source:
114 | 
115 |     import numpy as np
116 |     from sklearn.metrics import mean_squared_error
117 |     from sklearn.model_selection import train_test_split
118 | 
119 |     from fastFM.datasets import make_user_item_regression
120 |     from fastFM import mcmc
121 | 
122 |     n_iter = 100
123 |     step_size = 10
124 |     seed = 123
125 |     rank = 3
126 | 
127 |     X, y, coef = make_user_item_regression(label_stdev=.4)
128 |     X_train, X_test, y_train, y_test = train_test_split(
129 |         X, y, test_size=0.33)
130 | 
131 |     fm = mcmc.FMRegression(n_iter=0, rank=rank, random_state=seed)
132 |     # Allocates and initalizes the model and hyper parameter.
133 |     fm.fit_predict(X_train, y_train, X_test)
134 | 
135 |     rmse_test = []
136 |     rmse_new = []
137 |     hyper_param = np.zeros((n_iter -1, 3 + 2 * rank), dtype=np.float64)
138 |     for nr, i in enumerate(range(1, n_iter)):
139 |         fm.random_state = i * seed
140 |         y_pred = fm.fit_predict(X_train, y_train, X_test, n_more_iter=step_size)
141 |         rmse_test.append(np.sqrt(mean_squared_error(y_pred, y_test)))
142 |         hyper_param[nr, :] = fm.hyper_param_
143 | 
144 |     values = np.arange(1, n_iter)
145 |     x = values * step_size
146 |     burn_in = 5
147 |     x = x[burn_in:]
148 | 
149 |     from matplotlib import pyplot as plt
150 |     fig, axes = plt.subplots(nrows=2, ncols=2, sharex=True, figsize=(15, 8))
151 | 
152 |     axes[0, 0].plot(x, rmse_test[burn_in:], label='test rmse', color="r")
153 |     axes[0, 0].legend()
154 |     axes[0, 1].plot(x, hyper_param[burn_in:,0], label='alpha', color="b")
155 |     axes[0, 1].legend()
156 |     axes[1, 0].plot(x, hyper_param[burn_in:,1], label='lambda_w', color="g")
157 |     axes[1, 0].legend()
158 |     axes[1, 1].plot(x, hyper_param[burn_in:,3], label='mu_w', color="g")
159 |     axes[1, 1].legend()
160 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. fastFM documentation master file, created by
 2 |    sphinx-quickstart on Mon Jun 15 01:42:19 2015.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to fastFM's documentation!
 7 | ==================================
 8 | 
 9 | This is the documentation for fastFM's python interface.
10 | **Source code** and **install instructions** can be found on https://github.com/ibayer/fastFM.
11 | A short paper describing the library is available on arXiv http://arxiv.org/abs/1505.00641
12 | 
13 | 
14 | Supported Operating Systems
15 | ---------------------------
16 | fastFM has a continous integration / testing servers (Travis) for **Linux (Ubuntu 14.04 LTS)**
17 | and **OS X Mavericks**. Other OS are not actively supported.
18 | 
19 | .. toctree::
20 |    :maxdepth: 3
21 | 
22 |    tutorial
23 |    guide
24 |    api
25 | 
26 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	echo.  coverage   to run coverage check of the documentation if enabled
 41 | 	goto end
 42 | )
 43 | 
 44 | if "%1" == "clean" (
 45 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 46 | 	del /q /s %BUILDDIR%\*
 47 | 	goto end
 48 | )
 49 | 
 50 | 
 51 | REM Check if sphinx-build is available and fallback to Python version if any
 52 | %SPHINXBUILD% 2> nul
 53 | if errorlevel 9009 goto sphinx_python
 54 | goto sphinx_ok
 55 | 
 56 | :sphinx_python
 57 | 
 58 | set SPHINXBUILD=python -m sphinx.__init__
 59 | %SPHINXBUILD% 2> nul
 60 | if errorlevel 9009 (
 61 | 	echo.
 62 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 63 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 64 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 65 | 	echo.may add the Sphinx directory to PATH.
 66 | 	echo.
 67 | 	echo.If you don't have Sphinx installed, grab it from
 68 | 	echo.http://sphinx-doc.org/
 69 | 	exit /b 1
 70 | )
 71 | 
 72 | :sphinx_ok
 73 | 
 74 | 
 75 | if "%1" == "html" (
 76 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 77 | 	if errorlevel 1 exit /b 1
 78 | 	echo.
 79 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 80 | 	goto end
 81 | )
 82 | 
 83 | if "%1" == "dirhtml" (
 84 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 85 | 	if errorlevel 1 exit /b 1
 86 | 	echo.
 87 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 88 | 	goto end
 89 | )
 90 | 
 91 | if "%1" == "singlehtml" (
 92 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 93 | 	if errorlevel 1 exit /b 1
 94 | 	echo.
 95 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 96 | 	goto end
 97 | )
 98 | 
 99 | if "%1" == "pickle" (
100 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | 	if errorlevel 1 exit /b 1
102 | 	echo.
103 | 	echo.Build finished; now you can process the pickle files.
104 | 	goto end
105 | )
106 | 
107 | if "%1" == "json" (
108 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | 	if errorlevel 1 exit /b 1
110 | 	echo.
111 | 	echo.Build finished; now you can process the JSON files.
112 | 	goto end
113 | )
114 | 
115 | if "%1" == "htmlhelp" (
116 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | 	if errorlevel 1 exit /b 1
118 | 	echo.
119 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "qthelp" (
125 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\fastFM.qhcp
131 | 	echo.To view the help file:
132 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\fastFM.ghc
133 | 	goto end
134 | )
135 | 
136 | if "%1" == "devhelp" (
137 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | 	if errorlevel 1 exit /b 1
139 | 	echo.
140 | 	echo.Build finished.
141 | 	goto end
142 | )
143 | 
144 | if "%1" == "epub" (
145 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | 	if errorlevel 1 exit /b 1
147 | 	echo.
148 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | 	goto end
150 | )
151 | 
152 | if "%1" == "latex" (
153 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | 	if errorlevel 1 exit /b 1
155 | 	echo.
156 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | 	goto end
158 | )
159 | 
160 | if "%1" == "latexpdf" (
161 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | 	cd %BUILDDIR%/latex
163 | 	make all-pdf
164 | 	cd %~dp0
165 | 	echo.
166 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdfja" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf-ja
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "text" (
181 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | 	if errorlevel 1 exit /b 1
183 | 	echo.
184 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
185 | 	goto end
186 | )
187 | 
188 | if "%1" == "man" (
189 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | 	if errorlevel 1 exit /b 1
191 | 	echo.
192 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | 	goto end
194 | )
195 | 
196 | if "%1" == "texinfo" (
197 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | 	if errorlevel 1 exit /b 1
199 | 	echo.
200 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | 	goto end
202 | )
203 | 
204 | if "%1" == "gettext" (
205 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | 	if errorlevel 1 exit /b 1
207 | 	echo.
208 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | 	goto end
210 | )
211 | 
212 | if "%1" == "changes" (
213 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | 	if errorlevel 1 exit /b 1
215 | 	echo.
216 | 	echo.The overview file is in %BUILDDIR%/changes.
217 | 	goto end
218 | )
219 | 
220 | if "%1" == "linkcheck" (
221 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | 	if errorlevel 1 exit /b 1
223 | 	echo.
224 | 	echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | 	goto end
227 | )
228 | 
229 | if "%1" == "doctest" (
230 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | 	if errorlevel 1 exit /b 1
232 | 	echo.
233 | 	echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | 	goto end
236 | )
237 | 
238 | if "%1" == "coverage" (
239 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | 	if errorlevel 1 exit /b 1
241 | 	echo.
242 | 	echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | 	goto end
245 | )
246 | 
247 | if "%1" == "xml" (
248 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | 	if errorlevel 1 exit /b 1
250 | 	echo.
251 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | 	goto end
253 | )
254 | 
255 | if "%1" == "pseudoxml" (
256 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | 	if errorlevel 1 exit /b 1
258 | 	echo.
259 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | 	goto end
261 | )
262 | 
263 | :end
264 | 


--------------------------------------------------------------------------------
/doc/tutorial.rst:
--------------------------------------------------------------------------------
  1 | Tutorials
  2 | =========
  3 | 
  4 | The following sections show how to use different features of the fastFM
  5 | library. This is mostly a demonstration on of the library and no background
  6 | on the Factorization Machine (FM) model is given.
  7 | I recommend to read [TIST2012]. This paper contains many examples on how FM's
  8 | can emulate and extend matrix factorization models through feature engineering.
  9 | 
 10 | 
 11 | Regression with ALS Solver
 12 | --------------------------
 13 | 
 14 | We first set up a small toy dataset for a regression problem. Please
 15 | refere to [SIGIR2011] for background information on the implemented ALS solver.
 16 | 
 17 | .. testcode::
 18 | 
 19 |     from fastFM.datasets import make_user_item_regression
 20 |     from sklearn.model_selection import train_test_split
 21 | 
 22 |     # This sets up a small test dataset.
 23 |     X, y, _ = make_user_item_regression(label_stdev=.4)
 24 |     X_train, X_test, y_train, y_test = train_test_split(X, y)
 25 | 
 26 | The number of iterations `n_iter`, the standard deviation `init_stdev` used to
 27 | initialize the model parameter and the number of hidden variables `rank` per feature.
 28 | This are the parameters that have to be specified for every solver and task. The ALS
 29 | solver requires in addition the regularization values for the first `l2_reg_w`
 30 | and second order `l2_reg_V` interactions.
 31 | 
 32 | .. testcode::
 33 | 
 34 |     from fastFM import als
 35 |     fm = als.FMRegression(n_iter=1000, init_stdev=0.1, rank=2, l2_reg_w=0.1, l2_reg_V=0.5)
 36 |     fm.fit(X_train, y_train)
 37 |     y_pred = fm.predict(X_test)
 38 | 
 39 | We can easily evaluate our model using the scikit-learn library.
 40 | 
 41 | .. testcode::
 42 | 
 43 |     from sklearn.metrics import mean_squared_error
 44 |     'mse:', mean_squared_error(y_test, y_pred)
 45 | 
 46 | 
 47 | Logit Classification with SGD Solver
 48 | ------------------------------------
 49 | 
 50 | We first have to convert the target of our toy dataset to -1/1 values
 51 | in order to work with the classification implementation. Currently only
 52 | binary classification is supported.
 53 | 
 54 | .. testcode::
 55 | 
 56 |     import numpy as np
 57 |     # Convert dataset to binary classification task.
 58 |     y_labels = np.ones_like(y)
 59 |     y_labels[y < np.mean(y)] = -1
 60 |     X_train, X_test, y_train, y_test = train_test_split(X, y_labels)
 61 | 
 62 | 
 63 | We could have used the ALS solver module for this problem as well but
 64 | we will use the SGD module instead. In addition to the
 65 | hyper parameter needed for the ALS module we need to specify
 66 | the SGD specific `step_size` parameter.
 67 | 
 68 | .. testcode::
 69 | 
 70 |     from fastFM import sgd
 71 |     fm = sgd.FMClassification(n_iter=1000, init_stdev=0.1, l2_reg_w=0,
 72 |                               l2_reg_V=0, rank=2, step_size=0.1)
 73 |     fm.fit(X_train, y_train)
 74 |     y_pred = fm.predict(X_test)
 75 | 
 76 | 
 77 | All classifier implementations can not only return the most likely labels
 78 | but also class probabilities via the `predict_proba`.
 79 | 
 80 | .. testcode::
 81 | 
 82 |     y_pred_proba = fm.predict_proba(X_test)
 83 | 
 84 | This is important for classification metrics such as the AUC score that require the class probabilities
 85 | as input.
 86 | 
 87 | .. testcode::
 88 | 
 89 |     from sklearn.metrics import accuracy_score, roc_auc_score
 90 |     'acc:', accuracy_score(y_test, y_pred)
 91 |     'auc:', roc_auc_score(y_test, y_pred_proba)
 92 | 
 93 | 
 94 | Bayesian Probit Classification with MCMC Solver
 95 | -----------------------------------------------
 96 | 
 97 | The MCMC module needs fewer hyper parameter that any other solver.
 98 | This solver is able to integrate out the regularization parameter and frees us
 99 | from selecting them manually. Please see [Freuden2011] for the detail on the implemented
100 | Gibbs sampler.
101 | The major drawback of the MCMC solver is that it forces us to calculate predictions
102 | during fitting time using the `fit_predict` function.
103 | It's however possible to select a subset of parameter draws to speed up prediction [RecSys2013].
104 | It's also possible to just call `predict` on a trained MCMC model but this returns predictions
105 | that are solely based on the last parameters draw.
106 | These predictions can be used for diagnostic purposes but
107 | are usually not as good as averaged predictions returned by `fit_predict`.
108 | 
109 | 
110 | .. testcode::
111 | 
112 |     from fastFM import mcmc
113 |     fm = mcmc.FMClassification(n_iter=1000, rank=2, init_stdev=0.1)
114 | 
115 | Our last example shows how to use the MCMC module for binary classification.
116 | Probit regression uses the Cumulative Distribution Function (CDF) of the standard normal Distribution
117 | as link function. Mainly because the CDF leads to an easier Gibbs solver then the
118 | sigmoid function used in the SGD classifier implementation. The results
119 | are in practice usually very similar.
120 | 
121 | .. testcode::
122 | 
123 |     y_pred = fm.fit_predict(X_train, y_train, X_test)
124 |     y_pred_proba = fm.fit_predict_proba(X_train, y_train, X_test)
125 | 
126 | 
127 | .. testcode::
128 | 
129 |     from sklearn.metrics import accuracy_score, roc_auc_score
130 |     'acc:', accuracy_score(y_test, y_pred)
131 |     'auc:', roc_auc_score(y_test, y_pred_proba)
132 | 
133 | 
134 | 
135 | .. [TIST2012] Rendle, Steffen. "Factorization machines with libfm." ACM Transactions on Intelligent Systems and Technology (TIST) 3.3 (2012): 57.
136 | .. [SIGIR2011] Rendle, Steffen, et al. "Fast context-aware recommendations with factorization machines." Proceedings of the 34th international ACM SIGIR conference on Research and development in Information Retrieval. ACM, 2011.
137 | .. [Freuden2011] C Freudenthaler, L Schmidt-Thieme, S Rendle "Bayesian factorization machines" - 2011 - Citeseer
138 | .. [RecSys2013] Silbermann, Bayer, and Rendle "Sample selection for MCMC-based recommender systems" Proceedings of the 7th ACM conference on Recommender systems 2013
139 | 


--------------------------------------------------------------------------------
/examples/warm_start_als.py:
--------------------------------------------------------------------------------
 1 | from fastFM.datasets import make_user_item_regression
 2 | from fastFM import als
 3 | from sklearn.metrics import mean_squared_error
 4 | import scipy.sparse as sp
 5 | import numpy as np
 6 | 
 7 | if __name__ == "__main__":
 8 | 
 9 |     X, y, coef = make_user_item_regression(label_stdev=.4)
10 |     from sklearn.cross_validation import train_test_split
11 |     X_train, X_test, y_train, y_test = train_test_split(
12 |         X, y, test_size=0.33, random_state=42)
13 |     X_train = sp.csc_matrix(X_train)
14 |     X_test = sp.csc_matrix(X_test)
15 |     n_iter = 50
16 | 
17 |     """
18 |     offset = '../../fastFM-notes/benchmarks/'
19 |     train_path = offset + "data/ml-100k/u1.base.libfm"
20 |     test_path = offset + "data/ml-100k/u1.test.libfm"
21 | 
22 |     from sklearn.datasets import load_svmlight_file
23 |     X_train, y_train = load_svmlight_file(train_path)
24 |     X_test,  y_test= load_svmlight_file(test_path)
25 |     X_train = sp.csc_matrix(X_train)
26 |     X_test = sp.csc_matrix(X_test)
27 |     # add padding for features not in test
28 |     X_test = sp.hstack([X_test, sp.csc_matrix((X_test.shape[0], X_train.shape[1] - X_test.shape[1]))])
29 |     """
30 | 
31 |     n_iter = 50
32 |     rank = 4
33 |     seed = 333
34 |     step_size = 1
35 |     l2_reg_w = 0
36 |     l2_reg_V = 0
37 | 
38 |     fm = als.FMRegression(n_iter=0, l2_reg_w=l2_reg_w,
39 |             l2_reg_V=l2_reg_V, rank=rank, random_state=seed)
40 |     # initalize coefs
41 |     fm.fit(X_train, y_train)
42 | 
43 |     rmse_train = []
44 |     rmse_test = []
45 |     for i in range(1, n_iter):
46 |         fm.fit(X_train, y_train, n_more_iter=step_size)
47 |         y_pred = fm.predict(X_test)
48 |         rmse_train.append(np.sqrt(mean_squared_error(fm.predict(X_train), y_train)))
49 |         rmse_test.append(np.sqrt(mean_squared_error(fm.predict(X_test), y_test)))
50 | 
51 |     print '------- restart ----------'
52 |     values = np.arange(1, n_iter)
53 |     rmse_test_re = []
54 |     rmse_train_re = []
55 |     for i in values:
56 |         fm = als.FMRegression(n_iter=i, l2_reg_w=l2_reg_w,
57 |                 l2_reg_V=l2_reg_V, rank=rank, random_state=seed)
58 |         fm.fit(X_train, y_train)
59 |         rmse_test_re.append(np.sqrt(mean_squared_error(fm.predict(X_test), y_test)))
60 |         rmse_train_re.append(np.sqrt(mean_squared_error(fm.predict(X_train), y_train)))
61 | 
62 |     from matplotlib import pyplot as plt
63 | 
64 |     x = np.arange(1, n_iter) * step_size
65 | 
66 |     with plt.style.context('fivethirtyeight'):
67 |         plt.plot(x, rmse_train, label='train')
68 |         plt.plot(x, rmse_test, label='test')
69 |         plt.plot(values, rmse_train_re, label='train re', linestyle='--')
70 |         plt.plot(values, rmse_test_re, label='test re', ls='--')
71 |     plt.legend()
72 |     plt.show()
73 | 


--------------------------------------------------------------------------------
/examples/warm_start_mcmc.py:
--------------------------------------------------------------------------------
  1 | from fastFM.datasets import make_user_item_regression
  2 | from fastFM import mcmc
  3 | from sklearn.metrics import mean_squared_error
  4 | import scipy.sparse as sp
  5 | import numpy as np
  6 | 
  7 | 
  8 | if __name__ == "__main__":
  9 | 
 10 | 
 11 |     offset = '../../fastFM-notes/benchmarks/'
 12 |     train_path = offset + "data/ml-100k/u1.base.libfm"
 13 |     test_path = offset + "data/ml-100k/u1.test.libfm"
 14 | 
 15 |     from sklearn.datasets import load_svmlight_file
 16 |     X_train, y_train = load_svmlight_file(train_path)
 17 |     X_test,  y_test= load_svmlight_file(test_path)
 18 |     X_train = sp.csc_matrix(X_train)
 19 |     X_test = sp.csc_matrix(X_test)
 20 |     # add padding for features not in test
 21 |     X_test = sp.hstack([X_test, sp.csc_matrix((X_test.shape[0], X_train.shape[1] - X_test.shape[1]))])
 22 | 
 23 |     """
 24 |     X_train = sp.csc_matrix(np.array([[6, 1],
 25 |                                 [2, 3],
 26 |                                 [3, 0],
 27 |                                 [6, 1],
 28 |                                 [4, 5]]), dtype=np.float64)
 29 |     y_train = np.array([298, 266, 29, 298, 848], dtype=np.float64)
 30 |     X_test = X_train
 31 |     y_test = y_train
 32 |     """
 33 | 
 34 |     n_iter = 50
 35 |     rank = 4
 36 |     seed = 333
 37 |     step_size = 1
 38 | 
 39 |     """
 40 |     X, y, coef = make_user_item_regression(label_stdev=.4, random_state=seed)
 41 |     from sklearn.cross_validation import train_test_split
 42 |     X_train, X_test, y_train, y_test = train_test_split(
 43 |         X, y, test_size=0.33, random_state=seed)
 44 |     X_train = sp.csc_matrix(X_train)
 45 |     X_test = sp.csc_matrix(X_test)
 46 |     X_test = X_train
 47 |     y_test = y_train
 48 |     """
 49 | 
 50 |     fm = mcmc.FMRegression(n_iter=0, rank=rank, random_state=seed)
 51 |     # initalize coefs
 52 |     fm.fit_predict(X_train, y_train, X_test)
 53 | 
 54 |     rmse_test = []
 55 |     rmse_new = []
 56 |     hyper_param = np.zeros((n_iter -1, 3 + 2 * rank), dtype=np.float64)
 57 |     for nr, i in enumerate(range(1, n_iter)):
 58 |         fm.random_state = i * seed
 59 |         y_pred = fm.fit_predict(X_train, y_train, X_test, n_more_iter=step_size)
 60 |         rmse_test.append(np.sqrt(mean_squared_error(y_pred, y_test)))
 61 |         hyper_param[nr, :] = fm.hyper_param_
 62 | 
 63 |     print '------- restart ----------'
 64 |     values = np.arange(1, n_iter)
 65 |     rmse_test_re = []
 66 |     hyper_param_re = np.zeros((len(values), 3 + 2 * rank), dtype=np.float64)
 67 |     for nr, i in enumerate(values):
 68 |         fm = mcmc.FMRegression(n_iter=i, rank=rank, random_state=seed)
 69 |         y_pred = fm.fit_predict(X_train, y_train, X_test)
 70 |         rmse_test_re.append(np.sqrt(mean_squared_error(y_pred, y_test)))
 71 |         hyper_param_re[nr, :] = fm.hyper_param_
 72 | 
 73 |     from matplotlib import pyplot as plt
 74 |     fig, axes = plt.subplots(nrows=2, ncols=2, sharex=True, figsize=(15, 8))
 75 | 
 76 |     x = values * step_size
 77 |     burn_in = 5
 78 |     x = x[burn_in:]
 79 | 
 80 |     #with plt.style.context('ggplot'):
 81 |     axes[0, 0].plot(x, rmse_test[burn_in:], label='test rmse', color="r")
 82 |     axes[0, 0].plot(values[burn_in:], rmse_test_re[burn_in:], ls="--", color="r")
 83 |     axes[0, 0].legend()
 84 | 
 85 |     axes[0, 1].plot(x, hyper_param[burn_in:,0], label='alpha', color="b")
 86 |     axes[0, 1].plot(values[burn_in:], hyper_param_re[burn_in:,0], ls="--", color="b")
 87 |     axes[0, 1].legend()
 88 | 
 89 |     axes[1, 0].plot(x, hyper_param[burn_in:,1], label='lambda_w', color="g")
 90 |     #axes[2].plot(x, hyper_param[:,2], label='lambda_V', color="r")
 91 |     axes[1, 0].plot(values[burn_in:], hyper_param_re[burn_in:,1], ls="--", color="g")
 92 |     #axes[2].plot(values, hyper_param_re[:,2], label='lambda_V', ls="--", color="r")
 93 |     axes[1, 0].legend()
 94 | 
 95 |     axes[1, 1].plot(x, hyper_param[burn_in:,3], label='mu_w', color="g")
 96 |     #axes[3].plot(x, hyper_param[:,4], label='mu_V', color="r")
 97 |     axes[1, 1].plot(values[burn_in:], hyper_param_re[burn_in:,3], ls="--", color="g")
 98 |     #axes[3].plot(values, hyper_param_re[:,4], label='mu_V', ls="--", color="r")
 99 |     axes[1, 1].legend()
100 | 
101 |     plt.show()
102 |     #plt.savefig("../../fastFM-notes/jmlr/figs/mcmc_trace.pdf", bbox_inches='tight')
103 | 


--------------------------------------------------------------------------------
/fastFM/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ibayer/fastFM/9f30c5564a8d365105876f4e5d751c46e57dc983/fastFM/__init__.py


--------------------------------------------------------------------------------
/fastFM/als.py:
--------------------------------------------------------------------------------
  1 | # Author: Immanuel Bayer
  2 | # License: BSD 3 clause
  3 | 
  4 | import ffm
  5 | import numpy as np
  6 | from sklearn.base import RegressorMixin
  7 | from .validation import check_consistent_length, check_array
  8 | from .base import (FactorizationMachine, BaseFMClassifier,
  9 |                    _validate_class_labels, _check_warm_start)
 10 | 
 11 | 
 12 | class FMRegression(FactorizationMachine, RegressorMixin):
 13 | 
 14 |     """ Factorization Machine Regression trained with a als (coordinate descent)
 15 |     solver.
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     n_iter : int, optional
 20 |         The number of samples for the MCMC sampler, number or iterations over
 21 |         the training set for ALS and number of steps for SGD.
 22 | 
 23 |     init_stdev: float, optional
 24 |         Sets the stdev for the initialization of the parameter
 25 | 
 26 |     random_state: int, optional
 27 |         The seed of the pseudo random number generator that
 28 |         initializes the parameters and mcmc chain.
 29 | 
 30 |     rank: int
 31 |         The rank of the factorization used for the second order interactions.
 32 | 
 33 |     l2_reg_w : float
 34 |         L2 penalty weight for linear coefficients.
 35 | 
 36 |     l2_reg_V : float
 37 |         L2 penalty weight for pairwise coefficients.
 38 | 
 39 |     l2_reg : float
 40 |         L2 penalty weight for all coefficients (default=0).
 41 | 
 42 |     Attributes
 43 |     ---------
 44 | 
 45 |     w0_ : float
 46 |         bias term
 47 | 
 48 |     w_ : float | array, shape = (n_features)
 49 |         Coefficients for linear combination.
 50 | 
 51 |     V_ : float | array, shape = (rank_pair, n_features)
 52 |         Coefficients of second order factor matrix.
 53 |     """
 54 |     def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
 55 |                  l2_reg_w=0.1, l2_reg_V=0.1, l2_reg=0):
 56 |         super(FMRegression, self).__init__(n_iter=n_iter,
 57 |                                            init_stdev=init_stdev, rank=rank,
 58 |                                            random_state=random_state)
 59 |         if (l2_reg != 0):
 60 |             self.l2_reg_V = l2_reg
 61 |             self.l2_reg_w = l2_reg
 62 |         else:
 63 |             self.l2_reg_w = l2_reg_w
 64 |             self.l2_reg_V = l2_reg_V
 65 |         self.l2_reg = l2_reg
 66 |         self.task = "regression"
 67 | 
 68 |     def fit(self, X_train, y_train, n_more_iter=0):
 69 |         """ Fit model with specified loss.
 70 | 
 71 |         Parameters
 72 |         ----------
 73 |         X : scipy.sparse.csc_matrix, (n_samples, n_features)
 74 | 
 75 |         y : float | ndarray, shape = (n_samples, )
 76 | 
 77 |         n_more_iter : int
 78 |                 Number of iterations to continue from the current Coefficients.
 79 | 
 80 |         """
 81 | 
 82 |         check_consistent_length(X_train, y_train)
 83 |         y_train = check_array(y_train, ensure_2d=False, dtype=np.float64)
 84 | 
 85 |         X_train = check_array(X_train, accept_sparse="csc", dtype=np.float64,
 86 |                               order="F")
 87 |         self.n_iter = self.n_iter + n_more_iter
 88 | 
 89 |         if n_more_iter > 0:
 90 |             _check_warm_start(self, X_train)
 91 |             self.warm_start = True
 92 | 
 93 |         self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train)
 94 | 
 95 |         if self.iter_count != 0:
 96 |             self.iter_count = self.iter_count + n_more_iter
 97 |         else:
 98 |             self.iter_count = self.n_iter
 99 | 
100 |         # reset to default setting
101 |         self.warm_start = False
102 |         return self
103 | 
104 | 
105 | class FMClassification(BaseFMClassifier):
106 | 
107 |     """ Factorization Machine Classification trained with a ALS
108 |     (coordinate descent)
109 |     solver.
110 | 
111 |     Parameters
112 |     ----------
113 |     n_iter : int, optional
114 |         The number of samples for the MCMC sampler, number or iterations over
115 |         the training set for ALS and number of steps for SGD.
116 | 
117 |     init_stdev: float, optional
118 |         Sets the stdev  for the initialization of the parameter
119 | 
120 |     random_state: int, optional
121 |         The seed of the pseudo random number generator that
122 |         initializes the parameters and mcmc chain.
123 | 
124 |     rank: int
125 |         The rank of the factorization used for the second order interactions.
126 | 
127 |     l2_reg_w : float
128 |         L2 penalty weight for linear coefficients.
129 | 
130 |     l2_reg_V : float
131 |         L2 penalty weight for pairwise coefficients.
132 | 
133 |     l2_reg : float
134 |         L2 penalty weight for all coefficients (default=0).
135 | 
136 |     Attributes
137 |     ---------
138 | 
139 |     w0_ : float
140 |         bias term
141 | 
142 |     w_ : float | array, shape = (n_features)
143 |         Coefficients for linear combination.
144 | 
145 |     V_ : float | array, shape = (rank_pair, n_features)
146 |         Coefficients of second order factor matrix.
147 |     """
148 |     def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
149 |                  l2_reg_w=0.1, l2_reg_V=0.1, l2_reg=None):
150 |         super(FMClassification, self).__init__(n_iter=n_iter,
151 |                                                init_stdev=init_stdev,
152 |                                                rank=rank,
153 |                                                random_state=random_state)
154 |         if (l2_reg is not None):
155 |             self.l2_reg_V = l2_reg
156 |             self.l2_reg_w = l2_reg
157 |         else:
158 |             self.l2_reg_w = l2_reg_w
159 |             self.l2_reg_V = l2_reg_V
160 |         self.l2_reg = l2_reg
161 |         self.task = "classification"
162 | 
163 |     def fit(self, X_train, y_train, n_more_iter=0):
164 |         """ Fit model with specified loss.
165 | 
166 |         Parameters
167 |         ----------
168 |         X : scipy.sparse.csc_matrix, (n_samples, n_features)
169 | 
170 |         y : float | ndarray, shape = (n_samples, )
171 |                 the targets have to be encodes as {-1, 1}.
172 | 
173 |         n_more_iter : int
174 |                 Number of iterations to continue from the current Coefficients.
175 |         """
176 |         check_consistent_length(X_train, y_train)
177 |         X_train = check_array(X_train, accept_sparse="csc", dtype=np.float64,
178 |                               order="F")
179 |         y_train = _validate_class_labels(y_train)
180 | 
181 |         self.classes_ = np.unique(y_train)
182 |         if len(self.classes_) != 2:
183 |             raise ValueError("This solver only supports binary classification"
184 |                              " but the data contains"
185 |                              " class: %r" % self.classes_)
186 | 
187 |         # fastFM-core expects labels to be in {-1,1}
188 |         y_train = y_train.copy()
189 |         i_class1 = (y_train == self.classes_[0])
190 |         y_train[i_class1] = -1
191 |         y_train[~i_class1] = 1
192 | 
193 |         self.n_iter = self.n_iter + n_more_iter
194 | 
195 |         if n_more_iter > 0:
196 |             _check_warm_start(self, X_train)
197 |             self.warm_start = True
198 | 
199 |         self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train)
200 | 
201 |         if self.iter_count != 0:
202 |             self.iter_count = self.iter_count + n_more_iter
203 |         else:
204 |             self.iter_count = self.n_iter
205 | 
206 |         # reset to default setting
207 |         self.warm_start = False
208 |         return self
209 | 


--------------------------------------------------------------------------------
/fastFM/base.py:
--------------------------------------------------------------------------------
  1 | # Author: Immanuel Bayer
  2 | # License: BSD 3 clause
  3 | 
  4 | import numpy as np
  5 | import scipy.sparse as sp
  6 | from scipy.stats import norm
  7 | from sklearn.base import BaseEstimator, ClassifierMixin
  8 | 
  9 | from .validation import check_array
 10 | import ffm
 11 | 
 12 | 
 13 | def _validate_class_labels(y):
 14 |         assert len(set(y)) == 2
 15 |         assert y.min() == -1
 16 |         assert y.max() == 1
 17 |         return check_array(y, ensure_2d=False, dtype=np.float64)
 18 | 
 19 | 
 20 | def _check_warm_start(fm, X):
 21 |     n_features = X.shape[1]
 22 |     if not fm.ignore_w_0:
 23 |         assert fm.w0_ is not None
 24 |     if not fm.ignore_w:
 25 |         assert fm.w_ is not None
 26 |         assert fm.w_.shape[0] == n_features
 27 |     if not fm.rank == 0:
 28 |         assert fm.V_.shape[1] == n_features
 29 | 
 30 | 
 31 | class FactorizationMachine(BaseEstimator):
 32 | 
 33 |     """ Factorization Machine trained MCMC (Gibbs) sampling.
 34 |     The predictions need to be calculated at training time since the individual
 35 |     parameter samples are to expensive to store.
 36 | 
 37 |     Parameters
 38 |     ----------
 39 |     n_iter : int, optional
 40 |         The number of samples for the MCMC sampler, number or iterations over
 41 |         the training set for ALS and number of steps for SGD.
 42 | 
 43 |     init_stdev: float, optional
 44 |         Sets the stdev for the initialization of the parameter
 45 | 
 46 |     random_state: int, optional
 47 |         The seed of the pseudo random number generator that
 48 |         initializes the parameters and mcmc chain.
 49 | 
 50 |     rank: int
 51 |         The rank of the factorization used for the second order interactions.
 52 | 
 53 |     copy_X : boolean, optional, default True
 54 |             If ``True``, X will be copied; else, it may be overwritten.
 55 | 
 56 |     Attributes
 57 |     ---------
 58 |     Attention these Coefficients are the last sample from the MCMC chain
 59 |     and can't be used to calculate predictions.
 60 | 
 61 |     w0_ : float
 62 |         bias term
 63 | 
 64 |     w_ : float | array, shape = (n_features)
 65 |         Coefficients for linear combination.
 66 | 
 67 |     V_ : float | array, shape = (rank_pair, n_features)
 68 |         Coefficients of second order factor matrix.
 69 |     """
 70 |     def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
 71 |                  copy_X=True):
 72 |         self.n_iter = n_iter
 73 |         self.random_state = random_state
 74 |         self.init_stdev = init_stdev
 75 |         self.rank = rank
 76 |         self.iter_count = 0
 77 |         self.warm_start = False
 78 |         self.ignore_w_0 = False
 79 |         self.ignore_w = False
 80 |         self.l2_reg_w = 0.1
 81 |         self.l2_reg_V = 0.1
 82 |         self.step_size = 0
 83 |         self.copy_X = copy_X
 84 | 
 85 |     def predict(self, X_test):
 86 |         """ Return predictions
 87 | 
 88 |         Parameters
 89 |         ----------
 90 |         X : scipy.sparse.csc_matrix, (n_samples, n_features)
 91 | 
 92 |         Returns
 93 |         ------
 94 | 
 95 |         T : array, shape (n_samples)
 96 |             The labels are returned for classification.
 97 |         """
 98 |         X_test = check_array(X_test, accept_sparse="csc", dtype=np.float64,
 99 |                              order="F")
100 |         assert sp.isspmatrix_csc(X_test)
101 |         assert X_test.shape[1] == len(self.w_)
102 |         return ffm.ffm_predict(self.w0_, self.w_, self.V_, X_test)
103 | 
104 | 
105 | class BaseFMClassifier(FactorizationMachine, ClassifierMixin):
106 | 
107 |     def predict(self, X_test):
108 |         """ Return predictions
109 | 
110 |         Parameters
111 |         ----------
112 |         X : scipy.sparse.csc_matrix, (n_samples, n_features)
113 | 
114 |         Returns
115 |         ------
116 | 
117 |         y : array, shape (n_samples)
118 |             Class labels
119 |         """
120 |         y_proba = norm.cdf(super(BaseFMClassifier, self).predict(X_test))
121 |         # convert probs to labels
122 |         y_pred = np.zeros_like(y_proba, dtype=np.float64) + self.classes_[0]
123 |         y_pred[y_proba > .5] = self.classes_[1]
124 |         return y_pred
125 | 
126 |     def predict_proba(self, X_test):
127 |         """ Return probabilities
128 | 
129 |         Parameters
130 |         ----------
131 |         X : scipy.sparse.csc_matrix, (n_samples, n_features)
132 | 
133 |         Returns
134 |         ------
135 | 
136 |         y : array, shape (n_samples)
137 |             Class Probability for the class with smaller label.
138 |         """
139 |         pred = super(BaseFMClassifier, self).predict(X_test)
140 |         return norm.cdf(pred)
141 | 


--------------------------------------------------------------------------------
/fastFM/bpr.py:
--------------------------------------------------------------------------------
 1 | # Author: Immanuel Bayer
 2 | # License: BSD 3 clause
 3 | 
 4 | import ffm
 5 | import numpy as np
 6 | from .base import FactorizationMachine
 7 | from numpy.testing import assert_array_equal
 8 | from .validation import check_array, assert_all_finite
 9 | 
10 | 
11 | class FMRecommender(FactorizationMachine):
12 | 
13 |     """ Factorization Machine Recommender with pairwise (BPR) loss solver.
14 | 
15 |     Parameters
16 |     ----------
17 |     n_iter : int, optional
18 |         The number of interations of individual samples .
19 | 
20 |     init_stdev: float, optional
21 |         Sets the stdev for the initialization of the parameter
22 | 
23 |     random_state: int, optional
24 |         The seed of the pseudo random number generator that
25 |         initializes the parameters and mcmc chain.
26 | 
27 |     rank: int
28 |         The rank of the factorization used for the second order interactions.
29 | 
30 |     l2_reg_w : float
31 |         L2 penalty weight for linear coefficients.
32 | 
33 |     l2_reg_V : float
34 |         L2 penalty weight for pairwise coefficients.
35 | 
36 |     l2_reg : float
37 |         L2 penalty weight for all coefficients (default=0).
38 | 
39 |     step_size : float
40 |         Stepsize for the SGD solver, the solver uses a fixed step size and
41 |         might require a tunning of the number of iterations `n_iter`.
42 | 
43 |     Attributes
44 |     ---------
45 | 
46 |     w0_ : float
47 |         bias term
48 | 
49 |     w_ : float | array, shape = (n_features)
50 |         Coefficients for linear combination.
51 | 
52 |     V_ : float | array, shape = (rank_pair, n_features)
53 |         Coefficients of second order factor matrix.
54 |     """
55 | 
56 |     def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
57 |                  l2_reg_w=0.1, l2_reg_V=0.1, l2_reg=0, step_size=0.1):
58 |         super(FMRecommender, self).\
59 |             __init__(n_iter=n_iter, init_stdev=init_stdev, rank=rank,
60 |                      random_state=random_state)
61 |         if (l2_reg != 0):
62 |             self.l2_reg_V = l2_reg
63 |             self.l2_reg_w = l2_reg
64 |         else:
65 |             self.l2_reg_w = l2_reg_w
66 |             self.l2_reg_V = l2_reg_V
67 |         self.step_size = step_size
68 |         self.task = "ranking"
69 | 
70 |     def fit(self, X, pairs):
71 |         """ Fit model with specified loss.
72 | 
73 |         Parameters
74 |         ----------
75 |         X : scipy.sparse.csc_matrix, (n_samples, n_features)
76 | 
77 |         y : float | ndarray, shape = (n_compares, 2)
78 |                 Each row `i` defines a pair of samples such that
79 |                 the first returns a high value then the second
80 |                 FM(X[i,0]) > FM(X[i, 1]).
81 |         """
82 |         # The sgd solver expects a transposed design matrix in column major
83 |         # order (csc_matrix).
84 |         X = X.T  # creates a copy
85 |         X = check_array(X, accept_sparse="csc", dtype=np.float64)
86 |         assert_all_finite(pairs)
87 | 
88 |         pairs = pairs.astype(np.float64)
89 | 
90 |         # check that pairs contain no real values
91 |         assert_array_equal(pairs, pairs.astype(np.int32))
92 |         assert pairs.max() <= X.shape[1]
93 |         assert pairs.min() >= 0
94 |         self.w0_, self.w_, self.V_ = ffm.ffm_fit_sgd_bpr(self, X, pairs)
95 |         return self
96 | 


--------------------------------------------------------------------------------
/fastFM/cffm.pxd:
--------------------------------------------------------------------------------
 1 | # Author: Immanuel Bayer
 2 | # License: BSD 3 clause
 3 | 
 4 | cdef extern from "../fastFM-core/externals/CXSparse/Include/cs.h":
 5 |     ctypedef struct cs_di:  # matrix in compressed-column or triplet form */
 6 |         int nzmax      # maximum number of entries */
 7 |         int m          # number of rows */
 8 |         int n          # number of columns */
 9 |         int *p         # column pointers (size n+1) or col indices (size nzmax) */
10 |         int *i         # row indices, size nzmax */
11 |         double *x      # numerical values, size nzmax */
12 |         int nz         # # of entries in triplet matrix, -1 for compressed-col */
13 | 
14 | cdef extern from "../fastFM-core/include/ffm.h":
15 | 
16 |     ctypedef struct ffm_param:
17 |         int n_iter
18 |         int k
19 |         double init_sigma
20 |         double init_lambda_w
21 |         double init_lambda_V
22 |         int TASK
23 |         double stepsize
24 |         int rng_seed
25 | 
26 |         int iter_count
27 |         int ignore_w_0
28 |         int ignore_w
29 |         int warm_start
30 | 
31 |         int n_hyper_param
32 |         double *hyper_param
33 | 
34 |     void ffm_predict(double *w_0, double * w, double * V, cs_di *X, double *y_pred, int k)
35 | 
36 |     void ffm_als_fit(double *w_0, double *w, double *V,
37 |         cs_di *X, double *y, ffm_param *param)
38 | 
39 |     void ffm_mcmc_fit_predict(double *w_0, double *w, double *V,
40 |         cs_di *X_train, cs_di *X_test, double *y_train, double *y_pred,
41 |         ffm_param *param)
42 | 
43 |     void ffm_sgd_fit(double *w_0, double *w, double *V,
44 |         cs_di *X, double *y, ffm_param *param)
45 | 
46 |     void ffm_sgd_bpr_fit(double *w_0, double *w, double *V,
47 |         cs_di *X, double *pairs, int n_pairs, ffm_param *param)
48 | 


--------------------------------------------------------------------------------
/fastFM/datasets.py:
--------------------------------------------------------------------------------
 1 | # Author: Immanuel Bayer
 2 | # License: BSD 3 clause
 3 | 
 4 | import numpy as np
 5 | import scipy.sparse as sp
 6 | from sklearn.metrics import mean_squared_error, r2_score
 7 | 
 8 | from .validation import check_random_state
 9 | from ffm import ffm_predict
10 | 
11 | 
12 | def make_user_item_regression(random_state=123, n_user=20, n_item=20,
13 |                               label_stdev=0.4, rank=2, bias=True,
14 |                               first_order=True, stdev_w0=.2, stdev_w=0.3,
15 |                               stdev_V=0.4, mean_w0=2, mean_w=5, mean_V=10):
16 | 
17 |     n_features = n_user + n_item
18 |     n_samples = n_user * n_item
19 |     # create design matrix
20 |     user_cols = np.repeat(range(n_user), n_item)
21 |     item_cols = np.array(list(range(n_item)) * n_user) + n_user
22 |     cols = np.hstack((user_cols, item_cols))
23 |     rows = np.hstack((np.arange(n_item*n_user), np.arange(n_item*n_user)))
24 | 
25 |     X = sp.coo_matrix((np.ones_like(cols, dtype=np.float64), (rows, cols)))
26 |     X = sp.csc_matrix(X)
27 |     assert X.shape[0] == n_samples
28 |     assert X.shape[1] == n_features
29 | 
30 |     # sample the model parameter
31 |     random_state = check_random_state(random_state)
32 |     w0 = random_state.normal(mean_w0, stdev_w0)
33 |     w = random_state.normal(mean_w, stdev_w, n_features)
34 |     V = random_state.normal(mean_V, stdev_V, (rank, n_features))
35 | 
36 |     y = ffm_predict(w0, w, V, X)
37 |     if label_stdev > 0:
38 |         y = random_state.normal(y, label_stdev)
39 | 
40 |     return X, y, (w0, w, V)
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     X, y, coef = make_user_item_regression(n_user=5, n_item=5, rank=2,
45 |                                            label_stdev=2)
46 |     from sklearn.model_selection import train_test_split
47 |     X_train, X_test, y_train, y_test = train_test_split(
48 |         X, y, test_size=0.33, random_state=42)
49 | 
50 |     from mcmc import FMRegression
51 |     fm = FMRegression(rank=2)
52 |     y_pred = fm.fit_predict(sp.csc_matrix(X_train), y_train,
53 |                             sp.csc_matrix(X_test))
54 | 
55 |     print('rmse', mean_squared_error(y_pred, y_test))
56 |     print('r2_score', r2_score(y_pred, y_test))
57 |     np.random.shuffle(y_pred)
58 |     print('----  shuffled pred ---------')
59 |     print('rmse', mean_squared_error(y_pred, y_test))
60 |     print('r2_score', r2_score(y_pred, y_test))
61 | 


--------------------------------------------------------------------------------
/fastFM/ffm.pyx:
--------------------------------------------------------------------------------
  1 | # Author: Immanuel Bayer
  2 | # License: BSD 3 clause
  3 | 
  4 | cimport cffm
  5 | from cffm cimport cs_di, ffm_param
  6 | # Import some functionality from Python and the C stdlib
  7 | from cpython.pycapsule cimport *
  8 | 
  9 | from libc.stdlib cimport malloc, free
 10 | from scipy.sparse import csc_matrix
 11 | cimport numpy as np
 12 | import numpy as np
 13 | 
 14 | 
 15 | # Destructor for cleaning up CsMatrix objects
 16 | cdef del_CsMatrix(object obj):
 17 |     pt = <cffm.cs_di *> PyCapsule_GetPointer(obj, "CsMatrix")
 18 |     free(<void *> pt)
 19 | 
 20 | 
 21 | # Create a CsMatrix object and return as a capsule
 22 | def CsMatrix(X not None):
 23 |     cdef cffm.cs_di *p
 24 |     p = <cffm.cs_di *> malloc(sizeof(cffm.cs_di))
 25 |     if p == NULL:
 26 |         raise MemoryError("No memory to make a Point")
 27 | 
 28 |     cdef int i
 29 |     cdef np.ndarray[int, ndim=1, mode = 'c'] indptr = X.indptr
 30 |     cdef np.ndarray[int, ndim=1, mode = 'c'] indices = X.indices
 31 |     cdef np.ndarray[double, ndim=1, mode = 'c'] data = X.data
 32 | 
 33 |     # Put the scipy data into the CSparse struct. This is just copying some
 34 |     # pointers.
 35 |     p.nzmax = X.data.shape[0]
 36 |     p.m = X.shape[0]
 37 |     p.n = X.shape[1]
 38 |     p.p = &indptr[0]
 39 |     p.i = &indices[0]
 40 |     p.x = &data[0]
 41 |     p.nz = -1  # to indicate CSC format
 42 |     return PyCapsule_New(<void *>p, "CsMatrix",
 43 |                          <PyCapsule_Destructor>del_CsMatrix)
 44 | 
 45 | 
 46 | # Destructor for cleaning up FFMParam objects
 47 | cdef del_FFMParam(object obj):
 48 |     pt = <cffm.ffm_param *> PyCapsule_GetPointer(obj, "FFMParam")
 49 |     free(<void *> pt)
 50 | 
 51 | 
 52 | # Create a FFMParam object and return as a capsule
 53 | def FFMParam(fm):
 54 |     map_flags = {'classification': 10,
 55 |                  'regression': 20,
 56 |                  'ranking': 30}
 57 |     cdef cffm.ffm_param *p
 58 |     p = <cffm.ffm_param *> malloc(sizeof(cffm.ffm_param))
 59 |     if p == NULL:
 60 |         raise MemoryError("No memory to make a FFMParam")
 61 |     p.n_iter = fm.n_iter
 62 |     p.k = fm.rank
 63 |     p.stepsize = fm.step_size
 64 |     p.init_sigma = fm.init_stdev
 65 |     p.TASK = map_flags[fm.task]
 66 |     p.rng_seed = fm.random_state
 67 |     p.init_lambda_w = fm.l2_reg_w
 68 |     p.init_lambda_V = fm.l2_reg_V
 69 |     p.iter_count = fm.iter_count
 70 | 
 71 |     p.ignore_w_0 = 1 if fm.ignore_w_0 else 0
 72 |     p.ignore_w = 1 if fm.ignore_w else 0
 73 |     p.warm_start = 1 if fm.warm_start else 0
 74 |     return PyCapsule_New(<void *>p, "FFMParam",
 75 |                          <PyCapsule_Destructor>del_FFMParam)
 76 | 
 77 | 
 78 | def ffm_predict(double w_0, double[:] w,
 79 |                 np.ndarray[np.float64_t, ndim = 2] V, X):
 80 |     assert X.shape[1] == len(w)
 81 |     assert X.shape[1] == V.shape[1]
 82 |     X_ = CsMatrix(X)
 83 |     k = V.shape[0]
 84 |     pt_X = <cffm.cs_di *> PyCapsule_GetPointer(X_, "CsMatrix")
 85 |     cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y =\
 86 |          np.zeros(X.shape[0], dtype=np.float64)
 87 |     cffm.ffm_predict(&w_0, &w[0], <double *> V.data, pt_X, &y[0], k)
 88 |     return y
 89 | 
 90 | 
 91 | def ffm_als_fit(fm, X, double[:] y):
 92 |     assert X.shape[0] == len(y) # test shapes
 93 |     n_features = X.shape[1]
 94 |     X_ = CsMatrix(X)
 95 |     pt_X = <cffm.cs_di *> PyCapsule_GetPointer(X_, "CsMatrix")
 96 |     param = FFMParam(fm)
 97 |     pt_param = <cffm.ffm_param *> PyCapsule_GetPointer(param, "FFMParam")
 98 |     cdef double w_0
 99 |     cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w
100 |     cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V
101 | 
102 |     if fm.warm_start:
103 |         w_0 = 0 if fm.ignore_w_0 else fm.w0_
104 |         w = np.zeros(n_features, dtype=np.float64) if fm.ignore_w else fm.w_
105 |         V = np.zeros((fm.rank, n_features), dtype=np.float64)\
106 |                 if fm.rank == 0 else fm.V_
107 |     else:
108 |         w_0 = 0
109 |         w = np.zeros(n_features, dtype=np.float64)
110 |         V = np.zeros((fm.rank, n_features), dtype=np.float64)
111 | 
112 |     cffm.ffm_als_fit(&w_0, <double *> w.data, <double *> V.data,
113 |                      pt_X, &y[0], pt_param)
114 |     return w_0, w, V
115 | 
116 | 
117 | def ffm_sgd_fit(fm, X, double[:] y):
118 |     """
119 |     The sgd solver expects a transposed design matrix in column major order
120 |     (csc_matrix) Samples are stored in columns, this allows fast sample by
121 |     sample access.
122 |     """
123 |     assert X.shape[1] == len(y) # test shapes
124 |     n_features = X.shape[0]
125 |     X_ = CsMatrix(X)
126 |     pt_X = <cffm.cs_di *> PyCapsule_GetPointer(X_, "CsMatrix")
127 |     param = FFMParam(fm)
128 |     pt_param = <cffm.ffm_param *> PyCapsule_GetPointer(param, "FFMParam")
129 | 
130 |     # allocate the coefs
131 |     cdef double w_0 = 0
132 |     cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w =\
133 |          np.zeros(n_features, dtype=np.float64)
134 |     cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V =\
135 |          np.zeros((fm.rank, n_features), dtype=np.float64)
136 | 
137 |     cffm.ffm_sgd_fit(&w_0, <double *> w.data, <double *> V.data,
138 |                      pt_X, &y[0], pt_param)
139 |     return w_0, w, V
140 | 
141 | 
142 | def ffm_fit_sgd_bpr(fm, X, np.ndarray[np.float64_t, ndim=2, mode='c'] pairs):
143 |     n_features = X.shape[0]
144 |     X_ = CsMatrix(X)
145 |     pt_X = <cffm.cs_di *> PyCapsule_GetPointer(X_, "CsMatrix")
146 |     param = FFMParam(fm)
147 |     pt_param = <cffm.ffm_param *> PyCapsule_GetPointer(param, "FFMParam")
148 | 
149 |     #allocate the coefs
150 |     cdef double w_0 = 0
151 |     cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w =\
152 |          np.zeros(n_features, dtype=np.float64)
153 |     cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V =\
154 |          np.zeros((fm.rank, n_features), dtype=np.float64)
155 | 
156 |     cffm.ffm_sgd_bpr_fit(&w_0, <double *> w.data, <double *> V.data,
157 |                          pt_X, <double *> pairs.data, pairs.shape[0], pt_param)
158 |     return w_0, w, V
159 | 
160 | 
161 | def ffm_mcmc_fit_predict(fm, X_train, X_test, double[:] y):
162 |     assert X_train.shape[0] == len(y)
163 |     assert X_train.shape[1] == X_test.shape[1]
164 |     n_features = X_train.shape[1]
165 |     param = FFMParam(fm)
166 |     pt_param = <cffm.ffm_param *> PyCapsule_GetPointer(param, "FFMParam")
167 |     X_train_ = CsMatrix(X_train)
168 |     pt_X_train = <cffm.cs_di *> PyCapsule_GetPointer(X_train_, "CsMatrix")
169 |     X_test_ = CsMatrix(X_test)
170 |     pt_X_test = <cffm.cs_di *> PyCapsule_GetPointer(X_test_, "CsMatrix")
171 | 
172 |     cdef double w_0
173 |     cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w
174 |     cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V
175 |     # allocate the results vector
176 |     cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y_pred =\
177 |          np.zeros(X_test.shape[0], dtype=np.float64)
178 | 
179 |     if fm.warm_start:
180 |         w_0 = 0 if fm.ignore_w_0 else fm.w0_
181 |         w = np.zeros(n_features, dtype=np.float64) if fm.ignore_w else fm.w_
182 |         V = np.zeros((fm.rank, n_features), dtype=np.float64)\
183 |                 if fm.rank == 0 else fm.V_
184 |     else:
185 |         w_0 = 0
186 |         w = np.zeros(n_features, dtype=np.float64)
187 |         V = np.zeros((fm.rank, n_features), dtype=np.float64)
188 | 
189 |     if fm.warm_start:
190 |         y_pred = fm.prediction_
191 |     else:
192 |         y_pred = np.zeros(X_test.shape[0], dtype=np.float64)
193 | 
194 |     # allocate vector for hyperparameter
195 |     w_groups = 1
196 |     n_hyper_param = 1 + 2 * w_groups + 2 * fm.rank
197 |     cdef np.ndarray[np.float64_t, ndim=1, mode='c'] hyper_param
198 | 
199 |     if fm.warm_start:
200 |         hyper_param = fm.hyper_param_
201 |     else:
202 |         hyper_param = np.zeros(n_hyper_param, dtype=np.float64)
203 |     pt_param.n_hyper_param = n_hyper_param
204 |     pt_param.hyper_param = <double *> hyper_param.data
205 | 
206 |     cffm.ffm_mcmc_fit_predict(&w_0, <double *> w.data, <double *> V.data,
207 |                               pt_X_train, pt_X_test,
208 |                               &y[0], <double *> y_pred.data,
209 |                               pt_param)
210 |     fm.hyper_param_ = hyper_param
211 |     return (w_0, w, V), y_pred


--------------------------------------------------------------------------------
/fastFM/mcmc.py:
--------------------------------------------------------------------------------
  1 | # Author: Immanuel Bayer
  2 | # License: BSD 3 clause
  3 | 
  4 | 
  5 | import ffm
  6 | import numpy as np
  7 | from sklearn.metrics import mean_squared_error
  8 | from .validation import (assert_all_finite, check_consistent_length,
  9 |                          check_array)
 10 | from .base import (FactorizationMachine, _validate_class_labels,
 11 |                    _check_warm_start)
 12 | 
 13 | 
 14 | def find_init_stdev(fm, X_train, y_train, X_vali=None, y_vali=None,
 15 |                     stdev_range=None, ):
 16 |     if not stdev_range:
 17 |         stdev_range = [0.1, 0.1, 0.2, 0.5, 1.0]
 18 | 
 19 |     if not isinstance(fm, FMRegression):
 20 |         raise Exception("only implemented for FMRegression")
 21 | 
 22 |     # just using a dummy here
 23 |     if X_vali is None:
 24 |         X_test = X_train[:2, :]
 25 |     else:
 26 |         X_test = X_vali
 27 | 
 28 |     best_init_stdev = 0
 29 |     best_mse = np.finfo(np.float64).max
 30 |     for init_stdev in stdev_range:
 31 |         fm.init_stdev = init_stdev
 32 |         y_pred_vali = fm.fit_predict(X_train, y_train, X_test)
 33 |         if X_vali is None:
 34 |             y_pred = fm.predict(X_train)
 35 |             mse = mean_squared_error(y_pred, y_train)
 36 |         else:
 37 |             mse = mean_squared_error(y_pred_vali, y_vali)
 38 |         if mse < best_mse:
 39 |             best_mse = mse
 40 |             best_init_stdev = init_stdev
 41 |     return best_init_stdev, best_mse
 42 | 
 43 | 
 44 | def _validate_mcmc_fit_input(X_train, y_train, X_test):
 45 | 
 46 |         check_consistent_length(X_train, y_train)
 47 |         assert_all_finite(y_train)
 48 |         y_train = check_array(y_train, ensure_2d=False, dtype=np.float64)
 49 | 
 50 |         assert X_train.shape[1] == X_test.shape[1]
 51 |         X_train = check_array(X_train, accept_sparse="csc", dtype=np.float64,
 52 |                               order="F")
 53 |         X_test = check_array(X_test, accept_sparse="csc", dtype=np.float64,
 54 |                              order="F")
 55 |         return X_train, y_train, X_test
 56 | 
 57 | 
 58 | class FMRegression(FactorizationMachine):
 59 |     """ Factorization Machine Regression with a MCMC solver.
 60 | 
 61 |     Parameters
 62 |     ----------
 63 |     n_iter : int, optional
 64 |         The number of samples for the MCMC sampler, number or iterations over
 65 |         the training set for ALS and number of steps for SGD.
 66 | 
 67 |     init_stdev: float, optional
 68 |         Sets the stdev  for the initialization of the parameter
 69 | 
 70 |     random_state: int, optional
 71 |         The seed of the pseudo random number generator that
 72 |         initializes the parameters and mcmc chain.
 73 | 
 74 |     rank: int
 75 |         The rank of the factorization used for the second order interactions.
 76 | 
 77 | 
 78 |     Attributes
 79 |     ----------
 80 |     w0_ : float
 81 |         bias term
 82 | 
 83 |     w_ : float | array, shape = (n_features)
 84 |         Coefficients for linear combination.
 85 | 
 86 |     V_ : float | array, shape = (rank_pair, n_features)
 87 |         Coefficients of second order factor matrix.
 88 |     """
 89 | 
 90 |     def fit_predict(self, X_train, y_train, X_test, n_more_iter=0):
 91 |         """Return average of posterior estimates of the test samples.
 92 | 
 93 |         Parameters
 94 |         ----------
 95 |         X_train : scipy.sparse.csc_matrix, (n_samples, n_features)
 96 | 
 97 |         y_train : array, shape (n_samples)
 98 | 
 99 |         X_test : scipy.sparse.csc_matrix, (n_test_samples, n_features)
100 | 
101 |         n_more_iter : int
102 |                 Number of iterations to continue from the current Coefficients.
103 | 
104 |         Returns
105 |         -------
106 |         T : array, shape (n_test_samples)
107 |         """
108 |         self.task = "regression"
109 |         X_train, y_train, X_test = _validate_mcmc_fit_input(X_train, y_train,
110 |                                                             X_test)
111 | 
112 |         self.n_iter = self.n_iter + n_more_iter
113 | 
114 |         if n_more_iter > 0:
115 |             _check_warm_start(self, X_train)
116 |             assert self.prediction_.shape[0] == X_test.shape[0]
117 |             assert self.hyper_param_.shape
118 |             self.warm_start = True
119 |         else:
120 |             self.iter_count = 0
121 | 
122 |         coef, y_pred = ffm.ffm_mcmc_fit_predict(self, X_train,
123 |                                                 X_test, y_train)
124 |         self.w0_, self.w_, self.V_ = coef
125 |         self.prediction_ = y_pred
126 |         self.warm_start = False
127 | 
128 |         if self.iter_count != 0:
129 |             self.iter_count = self.iter_count + n_more_iter
130 |         else:
131 |             self.iter_count = self.n_iter
132 | 
133 |         return y_pred
134 | 
135 | 
136 | class FMClassification(FactorizationMachine):
137 |     """ Factorization Machine Classification with a MCMC solver.
138 | 
139 |     Parameters
140 |     ----------
141 |     n_iter : int, optional
142 |         The number of samples for the MCMC sampler, number or iterations over
143 |         the training set for ALS and number of steps for SGD.
144 | 
145 |     init_stdev: float, optional
146 |         Sets the stdev  for the initialization of the parameter
147 | 
148 |     random_state: int, optional
149 |         The seed of the pseudo random number generator that
150 |         initializes the parameters and mcmc chain.
151 | 
152 |     rank: int
153 |         The rank of the factorization used for the second order interactions.
154 | 
155 |     Attributes
156 |     ----------
157 |     w0_ : float
158 |         bias term
159 | 
160 |     w_ : float | array, shape = (n_features)
161 |         Coefficients for linear combination.
162 | 
163 |     V_ : float | array, shape = (rank_pair, n_features)
164 |         Coefficients of second order factor matrix.
165 |     """
166 | 
167 |     def fit_predict(self, X_train, y_train, X_test):
168 |         """Return average class probabilities of posterior estimates of the
169 |         test samples.
170 |         Use only with MCMC!
171 | 
172 |         Parameters
173 |         ----------
174 |         X_train : scipy.sparse.csc_matrix, (n_samples, n_features)
175 | 
176 |         y_train : array, shape (n_samples)
177 |                 the targets have to be encodes as {-1, 1}.
178 | 
179 |         X_test : scipy.sparse.csc_matrix, (n_test_samples, n_features)
180 | 
181 |         Returns
182 |         -------
183 |         y_pred : array, shape (n_test_samples)
184 |                 Returns predicted class labels.
185 | 
186 |         """
187 |         y_proba = self.fit_predict_proba(X_train, y_train, X_test)
188 |         y_pred = np.zeros_like(y_proba, dtype=np.float64) + self.classes_[0]
189 |         y_pred[y_proba > .5] = self.classes_[1]
190 |         return y_pred
191 | 
192 |     def fit_predict_proba(self, X_train, y_train, X_test):
193 |         """Return average class probabilities of posterior estimates of the
194 |         test samples.
195 |         Use only with MCMC!
196 | 
197 |         Parameters
198 |         ----------
199 |         X_train : scipy.sparse.csc_matrix, (n_samples, n_features)
200 | 
201 |         y_train : array, shape (n_samples)
202 |                 the targets have to be encodes as {-1, 1}.
203 | 
204 |         X_test : scipy.sparse.csc_matrix, (n_test_samples, n_features)
205 | 
206 |         Returns
207 |         -------
208 |         y_pred : array, shape (n_test_samples)
209 |             Returns probability estimates for the class with lowest
210 |             classification label.
211 | 
212 |         """
213 |         self.task = "classification"
214 | 
215 |         self.classes_ = np.unique(y_train)
216 |         if len(self.classes_) != 2:
217 |             raise ValueError("This solver only supports binary classification"
218 |                              " but the data contains"
219 |                              " class: %r" % self.classes_)
220 | 
221 |         # fastFM-core expects labels to be in {-1,1}
222 |         y_train = y_train.copy()
223 |         i_class1 = (y_train == self.classes_[0])
224 |         y_train[i_class1] = -1
225 |         y_train[~i_class1] = 1
226 | 
227 |         X_train, y_train, X_test = _validate_mcmc_fit_input(X_train, y_train,
228 |                                                             X_test)
229 |         y_train = _validate_class_labels(y_train)
230 | 
231 |         coef, y_pred = ffm.ffm_mcmc_fit_predict(self, X_train,
232 |                                                 X_test, y_train)
233 |         self.w0_, self.w_, self.V_ = coef
234 |         return y_pred
235 | 


--------------------------------------------------------------------------------
/fastFM/sgd.py:
--------------------------------------------------------------------------------
  1 | # Author: Immanuel Bayer
  2 | # License: BSD 3 clause
  3 | 
  4 | 
  5 | import ffm
  6 | import numpy as np
  7 | from sklearn.base import RegressorMixin
  8 | from .validation import check_array, check_consistent_length
  9 | from .base import (FactorizationMachine, BaseFMClassifier,
 10 |                    _validate_class_labels)
 11 | 
 12 | 
 13 | class FMRegression(FactorizationMachine, RegressorMixin):
 14 | 
 15 |     """ Factorization Machine Regression trained with a stochastic gradient
 16 |     descent solver.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     n_iter : int, optional
 21 |         The number of interations of individual samples .
 22 | 
 23 |     init_stdev: float, optional
 24 |         Sets the stdev for the initialization of the parameter
 25 | 
 26 |     random_state: int, optional
 27 |         The seed of the pseudo random number generator that
 28 |         initializes the parameters and mcmc chain.
 29 | 
 30 |     rank: int
 31 |         The rank of the factorization used for the second order interactions.
 32 | 
 33 |     l2_reg_w : float
 34 |         L2 penalty weight for linear coefficients.
 35 | 
 36 |     l2_reg_V : float
 37 |         L2 penalty weight for pairwise coefficients.
 38 | 
 39 |     l2_reg : float
 40 |         L2 penalty weight for all coefficients (default=0).
 41 | 
 42 |     step_size : float
 43 |         Stepsize for the SGD solver, the solver uses a fixed step size and
 44 |         might require a tunning of the number of iterations `n_iter`.
 45 | 
 46 |     Attributes
 47 |     ---------
 48 | 
 49 |     w0_ : float
 50 |         bias term
 51 | 
 52 |     w_ : float | array, shape = (n_features)
 53 |         Coefficients for linear combination.
 54 | 
 55 |     V_ : float | array, shape = (rank_pair, n_features)
 56 |         Coefficients of second order factor matrix.
 57 |     """
 58 | 
 59 |     def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
 60 |                  l2_reg_w=0.1, l2_reg_V=0.1, l2_reg=0, step_size=0.1):
 61 |         super(FMRegression, self).\
 62 |             __init__(n_iter=n_iter, init_stdev=init_stdev, rank=rank,
 63 |                      random_state=random_state)
 64 |         if (l2_reg != 0):
 65 |             self.l2_reg_V = l2_reg
 66 |             self.l2_reg_w = l2_reg
 67 |         else:
 68 |             self.l2_reg_w = l2_reg_w
 69 |             self.l2_reg_V = l2_reg_V
 70 |         self.l2_reg = l2_reg
 71 |         self.step_size = step_size
 72 |         self.task = "regression"
 73 | 
 74 |     def fit(self, X, y):
 75 |         """ Fit model with specified loss.
 76 | 
 77 |         Parameters
 78 |         ----------
 79 |         X : scipy.sparse.csc_matrix, (n_samples, n_features)
 80 | 
 81 |         y : float | ndarray, shape = (n_samples, )
 82 | 
 83 |         """
 84 | 
 85 |         check_consistent_length(X, y)
 86 |         y = check_array(y, ensure_2d=False, dtype=np.float64)
 87 | 
 88 |         # The sgd solver expects a transposed design matrix in column major
 89 |         # order (csc_matrix).
 90 |         X = X.T  # creates a copy
 91 |         X = check_array(X, accept_sparse="csc", dtype=np.float64)
 92 | 
 93 |         self.w0_, self.w_, self.V_ = ffm.ffm_sgd_fit(self, X, y)
 94 |         return self
 95 | 
 96 | 
 97 | class FMClassification(BaseFMClassifier):
 98 | 
 99 |     """ Factorization Machine Classification trained with a stochastic gradient
100 |     descent solver.
101 | 
102 |     Parameters
103 |     ----------
104 |     n_iter : int, optional
105 |         The number of interations of individual samples .
106 | 
107 |     init_std: float, optional
108 |         Sets the stdev for the initialization of the parameter
109 | 
110 |     random_state: int, optional
111 |         The seed of the pseudo random number generator that
112 |         initializes the parameters and mcmc chain.
113 | 
114 |     rank: int
115 |         The rank of the factorization used for the second order interactions.
116 | 
117 |     l2_reg_w : float
118 |         L2 penalty weight for linear coefficients.
119 | 
120 |     l2_reg_V : float
121 |         L2 penalty weight for pairwise coefficients.
122 | 
123 |     l2_reg : float
124 |         L2 penalty weight for all coefficients (default=0).
125 | 
126 |     step_size : float
127 |         Stepsize for the SGD solver, the solver uses a fixed step size and
128 |         might require a tunning of the number of iterations `n_iter`.
129 | 
130 |     Attributes
131 |     ---------
132 | 
133 |     w0_ : float
134 |         bias term
135 | 
136 |     w_ : float | array, shape = (n_features)
137 |         Coefficients for linear combination.
138 | 
139 |     V_ : float | array, shape = (rank_pair, n_features)
140 |         Coefficients of second order factor matrix.
141 |     """
142 | 
143 |     def __init__(self, n_iter=100, init_stdev=0.1, rank=8, random_state=123,
144 |                  l2_reg_w=0, l2_reg_V=0, l2_reg=None, step_size=0.1):
145 |         super(FMClassification, self).\
146 |             __init__(n_iter=n_iter, init_stdev=init_stdev, rank=rank,
147 |                      random_state=random_state)
148 |         if (l2_reg is not None):
149 |             self.l2_reg_V = l2_reg
150 |             self.l2_reg_w = l2_reg
151 |         else:
152 |             self.l2_reg_w = l2_reg_w
153 |             self.l2_reg_V = l2_reg_V
154 |         self.l2_reg = l2_reg
155 |         self.step_size = step_size
156 |         self.task = "classification"
157 | 
158 |     def fit(self, X, y):
159 |         """ Fit model with specified loss.
160 | 
161 |         Parameters
162 |         ----------
163 |         X : scipy.sparse.csc_matrix, (n_samples, n_features)
164 | 
165 |         y : float | ndarray, shape = (n_samples, )
166 | 
167 |                 the targets have to be encodes as {-1, 1}.
168 |         """
169 |         y = _validate_class_labels(y)
170 |         self.classes_ = np.unique(y)
171 |         if len(self.classes_) != 2:
172 |             raise ValueError("This solver only supports binary classification"
173 |                              " but the data contains"
174 |                              " class: %r" % self.classes_)
175 | 
176 |         # fastFM-core expects labels to be in {-1,1}
177 |         y_train = y.copy()
178 |         i_class1 = (y_train == self.classes_[0])
179 |         y_train[i_class1] = -1
180 |         y_train[~i_class1] = 1
181 | 
182 |         check_consistent_length(X, y)
183 |         y = y.astype(np.float64)
184 | 
185 |         # The sgd solver expects a transposed design matrix in column major
186 |         # order (csc_matrix).
187 |         X = X.T  # creates a copy
188 |         X = check_array(X, accept_sparse="csc", dtype=np.float64)
189 | 
190 |         self.w0_, self.w_, self.V_ = ffm.ffm_sgd_fit(self, X, y)
191 |         return self
192 | 


--------------------------------------------------------------------------------
/fastFM/tests/test_als.py:
--------------------------------------------------------------------------------
  1 | # Author: Immanuel Bayer
  2 | # License: BSD 3 clause
  3 | 
  4 | import numpy as np
  5 | import scipy.sparse as sp
  6 | from sklearn import metrics
  7 | from fastFM import als
  8 | from fastFM.datasets import make_user_item_regression
  9 | from sklearn.metrics import mean_squared_error
 10 | from numpy.testing import assert_almost_equal
 11 | 
 12 | 
 13 | def get_test_problem(task='regression'):
 14 |     X = sp.csc_matrix(np.array([[6, 1],
 15 |                                 [2, 3],
 16 |                                 [3, 0],
 17 |                                 [6, 1],
 18 |                                 [4, 5]]), dtype=np.float64)
 19 |     y = np.array([298, 266, 29, 298, 848], dtype=np.float64)
 20 |     V = np.array([[6, 0],
 21 |                   [5, 8]], dtype=np.float64)
 22 |     w = np.array([9, 2], dtype=np.float64)
 23 |     w0 = 2
 24 |     if task == 'classification':
 25 |         y_labels = np.ones_like(y)
 26 |         y_labels[y < np.median(y)] = -1
 27 |         y = y_labels
 28 |     return w0, w, V, y, X
 29 | 
 30 | 
 31 | def get_small_data():
 32 |     X = sp.csc_matrix(np.array([[1, 2],
 33 |                                 [3, 4],
 34 |                                 [5, 6]]), dtype=np.float64)
 35 |     y = np.array([600, 2800, 10000], dtype=np.float64)
 36 |     return X, y
 37 | 
 38 | 
 39 | def _test_fm_regression_only_w0():
 40 |     X, y = get_small_data()
 41 | 
 42 |     fm = als.FMRegression(n_iter=0, l2_reg_w=0, l2_reg_V=0, rank=0)
 43 |     fm.ignore_w = True
 44 |     fm.w0_ = 2
 45 |     fm.fit(X, y, warm_start=True)
 46 |     assert_almost_equal(fm.w0_, 2, 6)
 47 | 
 48 |     fm = als.FMRegression(n_iter=1, l2_reg_w=0, l2_reg_V=0, rank=0)
 49 |     fm.ignore_w = True
 50 |     fm.w0_ = 2
 51 |     fm.fit(X, y, warm_start=True)
 52 |     assert_almost_equal(fm.w0_, 4466.6666666666661, 6)
 53 | 
 54 | 
 55 | def _test_raise_when_input_is_dense():
 56 |     fm = als.FMRegression(n_iter=0, l2_reg_w=0, l2_reg_V=0, rank=0)
 57 |     X = np.arange(3, 4, dtype=np.float64)
 58 |     y = np.arange(3, dtype=np.float64)
 59 |     fm.fit(X, y, warm_start=True)
 60 | 
 61 | 
 62 | def test_fm_linear_regression():
 63 |     X, y = get_small_data()
 64 | 
 65 |     fm = als.FMRegression(n_iter=1, l2_reg_w=1, l2_reg_V=1, rank=0)
 66 |     fm.fit(X, y)
 67 | 
 68 | 
 69 | def test_fm_regression():
 70 |     w0, w, V, y, X = get_test_problem()
 71 | 
 72 |     fm = als.FMRegression(n_iter=1000, l2_reg_w=0, l2_reg_V=0, rank=2)
 73 |     fm.fit(X, y)
 74 |     y_pred = fm.predict(X)
 75 |     assert_almost_equal(y_pred, y, 3)
 76 |     # check different size
 77 |     fm = als.FMRegression(n_iter=1000, l2_reg_w=0, l2_reg_V=0, rank=5)
 78 |     X_big = sp.hstack([X, X])
 79 |     fm.fit(X_big, y)
 80 |     y_pred = fm.predict(X_big[:2, ])
 81 | 
 82 | 
 83 | def test_fm_classification():
 84 |     w0, w, V, y, X = get_test_problem(task='classification')
 85 | 
 86 |     fm = als.FMClassification(n_iter=1000,
 87 |                               init_stdev=0.1, l2_reg_w=0, l2_reg_V=0, rank=2)
 88 |     fm.fit(X, y)
 89 |     y_pred = fm.predict(X)
 90 |     print(y_pred)
 91 |     assert metrics.accuracy_score(y, y_pred) > 0.95
 92 |     # check different size
 93 |     fm.fit(X[:2, ], y[:2])
 94 | 
 95 | 
 96 | def test_als_warm_start():
 97 |     X, y, coef = make_user_item_regression(label_stdev=0)
 98 |     from sklearn.model_selection import train_test_split
 99 |     X_train, X_test, y_train, y_test = train_test_split(
100 |         X, y, test_size=0.33, random_state=42)
101 |     X_train = sp.csc_matrix(X_train)
102 |     X_test = sp.csc_matrix(X_test)
103 | 
104 |     fm = als.FMRegression(n_iter=10, l2_reg_w=0, l2_reg_V=0, rank=2)
105 |     fm.fit(X_train, y_train)
106 |     y_pred = fm.predict(X_test)
107 |     error_10_iter = mean_squared_error(y_pred, y_test)
108 | 
109 |     fm = als.FMRegression(n_iter=5, l2_reg_w=0, l2_reg_V=0, rank=2)
110 |     fm.fit(X_train, y_train)
111 |     print(fm.iter_count)
112 |     y_pred = fm.predict(X_test)
113 |     error_5_iter = mean_squared_error(y_pred, y_test)
114 | 
115 |     fm.fit(sp.csc_matrix(X_train), y_train, n_more_iter=5)
116 |     print(fm.iter_count)
117 |     y_pred = fm.predict(X_test)
118 |     error_5_iter_plus_5 = mean_squared_error(y_pred, y_test)
119 | 
120 |     print(error_5_iter, error_5_iter_plus_5, error_10_iter)
121 | 
122 |     assert error_10_iter == error_5_iter_plus_5
123 | 
124 | 
125 | def test_warm_start_path():
126 | 
127 |     X, y, coef = make_user_item_regression(label_stdev=.4)
128 |     from sklearn.model_selection import train_test_split
129 |     X_train, X_test, y_train, y_test = train_test_split(
130 |         X, y, test_size=0.33, random_state=42)
131 |     X_train = sp.csc_matrix(X_train)
132 |     X_test = sp.csc_matrix(X_test)
133 |     n_iter = 10
134 | 
135 |     rank = 4
136 |     seed = 333
137 |     step_size = 1
138 |     l2_reg_w = 0
139 |     l2_reg_V = 0
140 | 
141 |     fm = als.FMRegression(n_iter=0, l2_reg_w=l2_reg_w,
142 |                           l2_reg_V=l2_reg_V, rank=rank, random_state=seed)
143 |     # initalize coefs
144 |     fm.fit(X_train, y_train)
145 | 
146 |     rmse_train = []
147 |     rmse_test = []
148 |     for i in range(1, n_iter):
149 |         fm.fit(X_train, y_train, n_more_iter=step_size)
150 |         rmse_train.append(np.sqrt(mean_squared_error(
151 |             fm.predict(X_train), y_train)))
152 |         rmse_test.append(np.sqrt(mean_squared_error(
153 |             fm.predict(X_test), y_test)))
154 | 
155 |     print('------- restart ----------')
156 |     values = np.arange(1, n_iter)
157 |     rmse_test_re = []
158 |     rmse_train_re = []
159 |     for i in values:
160 |         fm = als.FMRegression(n_iter=i, l2_reg_w=l2_reg_w,
161 |                               l2_reg_V=l2_reg_V, rank=rank, random_state=seed)
162 |         fm.fit(X_train, y_train)
163 |         rmse_test_re.append(np.sqrt(mean_squared_error(
164 |             fm.predict(X_test), y_test)))
165 |         rmse_train_re.append(np.sqrt(mean_squared_error(
166 |             fm.predict(X_train), y_train)))
167 | 
168 |     assert_almost_equal(rmse_train, rmse_train_re)
169 |     assert_almost_equal(rmse_test, rmse_test_re)
170 | 
171 | 
172 | def test_als_classification_warm_start():
173 |     w0, w, V, y, X = get_test_problem(task='classification')
174 | 
175 |     # 10 iter
176 |     fm = als.FMClassification(n_iter=10,
177 |                               init_stdev=0.1, l2_reg_w=0, l2_reg_V=0, rank=2)
178 |     fm.fit(X, y)
179 |     y_pred = fm.predict(X)
180 |     score = metrics.accuracy_score(y, y_pred)
181 | 
182 |     # 5 iter + 5 more iter
183 |     fm = als.FMClassification(n_iter=5,
184 |                               init_stdev=0.1, l2_reg_w=0, l2_reg_V=0, rank=2)
185 |     fm.fit(X, y)
186 |     fm.fit(X, y, n_more_iter=5)
187 |     y_pred = fm.predict(X)
188 |     score_warm_start = metrics.accuracy_score(y, y_pred)
189 | 
190 |     # 0 iter + 10 more iter
191 |     fm = als.FMClassification(n_iter=0,
192 |                               init_stdev=0.1, l2_reg_w=0, l2_reg_V=0, rank=2)
193 |     fm.fit(X, y)
194 |     fm.fit(X, y, n_more_iter=10)
195 |     y_pred = fm.predict(X)
196 |     score_warm_start_2 = metrics.accuracy_score(y, y_pred)
197 | 
198 |     assert_almost_equal(score, score_warm_start)
199 |     assert_almost_equal(score, score_warm_start_2)
200 | 
201 | 
202 | def test_clone():
203 |     from sklearn.base import clone
204 | 
205 |     a = als.FMRegression()
206 |     b = clone(a)
207 |     assert a.get_params() == b.get_params()
208 | 
209 |     a = als.FMClassification()
210 |     b = clone(a)
211 |     assert a.get_params() == b.get_params()
212 | 
213 | 
214 | if __name__ == '__main__':
215 |     # test_fm_regression_only_w0()
216 |     test_fm_linear_regression()
217 | 


--------------------------------------------------------------------------------
/fastFM/tests/test_base.py:
--------------------------------------------------------------------------------
 1 | # Author: Immanuel Bayer
 2 | # License: BSD 3 clause
 3 | 
 4 | import numpy as np
 5 | import scipy.sparse as sp
 6 | from sklearn import metrics
 7 | from fastFM import als
 8 | 
 9 | 
10 | def get_test_problem(task='regression'):
11 |     X = sp.csc_matrix(np.array([[6, 1],
12 |                                 [2, 3],
13 |                                 [3, 0],
14 |                                 [6, 1],
15 |                                 [4, 5]]), dtype=np.float64)
16 |     y = np.array([298, 266, 29, 298, 848], dtype=np.float64)
17 |     V = np.array([[6, 0],
18 |                   [5, 8]], dtype=np.float64)
19 |     w = np.array([9, 2], dtype=np.float64)
20 |     w0 = 2
21 |     if task == 'classification':
22 |         y_labels = np.ones_like(y)
23 |         y_labels[y < np.median(y)] = -1
24 |         y = y_labels
25 |     return w0, w, V, y, X
26 | 
27 | 
28 | def test_fm_classification_predict_proba():
29 |     w0, w, V, y, X = get_test_problem(task='classification')
30 | 
31 |     fm = als.FMClassification(n_iter=1000,
32 |                               init_stdev=0.1, l2_reg_w=0, l2_reg_V=0, rank=2)
33 |     fm.fit(X, y)
34 |     y_pred = fm.predict(X)
35 |     y_pred = fm.predict_proba(X)
36 | 
37 |     y[y == -1] = 0
38 |     assert metrics.roc_auc_score(y, y_pred) > 0.95
39 | 
40 | if __name__ == '__main__':
41 |     test_fm_classification_predict_proba()
42 | 


--------------------------------------------------------------------------------
/fastFM/tests/test_datasets.py:
--------------------------------------------------------------------------------
 1 | # Author: Immanuel Bayer
 2 | # License: BSD 3 clause
 3 | 
 4 | from fastFM.datasets import make_user_item_regression
 5 | from sklearn.metrics import mean_squared_error
 6 | import scipy.sparse as sp
 7 | 
 8 | 
 9 | def test_make_user_item_regression():
10 |     from fastFM.mcmc import FMRegression
11 |     X, y, coef = make_user_item_regression(label_stdev=0)
12 |     from sklearn.model_selection import train_test_split
13 |     X_train, X_test, y_train, y_test = train_test_split(
14 |         X, y, test_size=0.33, random_state=42)
15 | 
16 |     fm = FMRegression(rank=2)
17 |     y_pred = fm.fit_predict(sp.csc_matrix(X_train),
18 |                             y_train, sp.csc_matrix(X_test))
19 | 
20 |     # generate data with noisy lables
21 |     X, y, coef = make_user_item_regression(label_stdev=2)
22 |     from sklearn.model_selection import train_test_split
23 |     X_train, X_test, y_train, y_test = train_test_split(
24 |         X, y, test_size=0.33, random_state=42)
25 | 
26 |     fm = FMRegression(rank=2)
27 |     y_pred_noise = fm.fit_predict(sp.csc_matrix(X_train),
28 |                                   y_train, sp.csc_matrix(X_test))
29 |     assert mean_squared_error(y_pred_noise, y_test) > \
30 |         mean_squared_error(y_pred, y_test)
31 | 


--------------------------------------------------------------------------------
/fastFM/tests/test_ffm.py:
--------------------------------------------------------------------------------
 1 | # Author: Immanuel Bayer
 2 | # License: BSD 3 clause
 3 | 
 4 | import numpy as np
 5 | import scipy.sparse as sp
 6 | from numpy.testing import assert_almost_equal, assert_equal
 7 | import ffm
 8 | 
 9 | 
10 | def get_test_problem():
11 |     X = sp.csc_matrix(np.array([[6, 1],
12 |                                 [2, 3],
13 |                                 [3, 0],
14 |                                 [6, 1],
15 |                                 [4, 5]]), dtype=np.float64)
16 |     y = np.array([298, 266, 29, 298, 848], dtype=np.float64)
17 |     V = np.array([[6, 0],
18 |                   [5, 8]], dtype=np.float64)
19 |     w = np.array([9, 2], dtype=np.float64)
20 |     w0 = 2
21 |     return w0, w, V, y, X
22 | 
23 | 
24 | def test_ffm_predict():
25 |     w0, w, V, y, X = get_test_problem()
26 |     y_pred = ffm.ffm_predict(w0, w, V, X)
27 |     assert_equal(y_pred, y)
28 | 
29 | if __name__ == '__main__':
30 |     pass
31 | 


--------------------------------------------------------------------------------
/fastFM/tests/test_mcmc.py:
--------------------------------------------------------------------------------
  1 | # Author: Immanuel Bayer
  2 | # License: BSD 3 clause
  3 | 
  4 | import numpy as np
  5 | import scipy.sparse as sp
  6 | from sklearn import metrics
  7 | from fastFM import mcmc
  8 | from fastFM.datasets import make_user_item_regression
  9 | from sklearn.metrics import mean_squared_error
 10 | from numpy.testing import assert_array_equal, assert_almost_equal
 11 | 
 12 | def get_test_problem(task='regression'):
 13 |     X = sp.csc_matrix(np.array([[6, 1],
 14 |                                 [2, 3],
 15 |                                 [3, 0],
 16 |                                 [6, 1],
 17 |                                 [4, 5]]), dtype=np.float64)
 18 |     y = np.array([298, 266, 29, 298, 848], dtype=np.float64)
 19 |     V = np.array([[6, 0],
 20 |                   [5, 8]], dtype=np.float64)
 21 |     w = np.array([9, 2], dtype=np.float64)
 22 |     w0 = 2
 23 |     if task == 'classification':
 24 |         y_labels = np.ones_like(y)
 25 |         y_labels[y < np.median(y)] = -1
 26 |         y = y_labels
 27 |     return w0, w, V, y, X
 28 | 
 29 | 
 30 | def test_fm_regression():
 31 |     w0, w, V, y, X = get_test_problem()
 32 | 
 33 |     fm = mcmc.FMRegression(n_iter=1000, rank=2, init_stdev=0.1)
 34 | 
 35 |     y_pred = fm.fit_predict(X, y, X)
 36 |     assert metrics.r2_score(y_pred, y) > 0.99
 37 | 
 38 | 
 39 | def test_fm_classification():
 40 |     w0, w, V, y, X = get_test_problem()
 41 |     # transform to labels easier problem then default one
 42 |     y_labels = np.ones_like(y)
 43 |     y_labels[y < np.mean(y)] = -1
 44 | 
 45 |     fm = mcmc.FMClassification(n_iter=1000, init_stdev=0.1, rank=2)
 46 |     y_pred = fm.fit_predict_proba(X, y_labels, X)
 47 | 
 48 |     fpr, tpr, thresholds = metrics.roc_curve(y_labels, y_pred)
 49 |     auc = metrics.auc(fpr, tpr)
 50 |     assert auc > 0.95
 51 |     y_pred = fm.predict(X[:2, ])
 52 | 
 53 | 
 54 | def test_linear_fm_classification():
 55 |     w0, w, V, y, X = get_test_problem()
 56 |     # transform to labels easier problem then default one
 57 |     y_labels = np.ones_like(y)
 58 |     y_labels[y < np.mean(y)] = -1
 59 | 
 60 |     fm = mcmc.FMClassification(n_iter=1000, init_stdev=0.1, rank=0)
 61 |     y_pred = fm.fit_predict_proba(X, y_labels, X)
 62 | 
 63 |     fpr, tpr, thresholds = metrics.roc_curve(y_labels, y_pred)
 64 |     auc = metrics.auc(fpr, tpr)
 65 |     assert auc > 0.95
 66 |     y_pred = fm.predict(X[:2, ])
 67 | 
 68 | 
 69 | def test_fm_classification_proba():
 70 |     w0, w, V, y, X = get_test_problem()
 71 |     # transform to labels easier problem then default one
 72 |     y_labels = np.ones_like(y)
 73 |     y_labels[y < np.mean(y)] = -1
 74 | 
 75 |     fm = mcmc.FMClassification(n_iter=1000, init_stdev=0.1, rank=2)
 76 |     y_pred_proba = fm.fit_predict_proba(X, y_labels, X)
 77 |     y_pred = fm.fit_predict(X, y_labels, X)
 78 |     y_pred_proba[y_pred_proba < .5] = -1
 79 |     y_pred_proba[y_pred_proba != -1] = 1
 80 |     assert_array_equal(y_pred, y_pred_proba)
 81 | 
 82 | 
 83 | def test_mcmc_warm_start():
 84 |     X, y, coef = make_user_item_regression(label_stdev=0)
 85 |     from sklearn.model_selection import train_test_split
 86 |     X_train, X_test, y_train, y_test = train_test_split(
 87 |         X, y, test_size=0.33, random_state=44)
 88 |     X_train = sp.csc_matrix(X_train)
 89 |     X_test = sp.csc_matrix(X_test)
 90 | 
 91 |     fm = mcmc.FMRegression(n_iter=100, rank=2)
 92 |     y_pred = fm.fit_predict(X_train, y_train, X_test)
 93 |     error_10_iter = mean_squared_error(y_pred, y_test)
 94 | 
 95 |     fm = mcmc.FMRegression(n_iter=50, rank=2)
 96 |     y_pred = fm.fit_predict(X_train, y_train, X_test)
 97 |     error_5_iter = mean_squared_error(y_pred, y_test)
 98 | 
 99 |     y_pred = fm.fit_predict(X_train, y_train, X_test, n_more_iter=50)
100 |     error_5_iter_plus_5 = mean_squared_error(y_pred, y_test)
101 |     print(error_5_iter, error_5_iter_plus_5, error_10_iter)
102 |     print(fm.hyper_param_)
103 |     assert_almost_equal(error_10_iter, error_5_iter_plus_5, decimal=2)
104 | 
105 | 
106 | def test_find_init_stdev():
107 |     X, y, coef = make_user_item_regression(label_stdev=.5)
108 |     from sklearn.model_selection import train_test_split
109 |     X_train, X_test, y_train, y_test = train_test_split(
110 |         X, y, test_size=0.33, random_state=44)
111 |     X_train = sp.csc_matrix(X_train)
112 |     X_test = sp.csc_matrix(X_test)
113 | 
114 |     fm = mcmc.FMRegression(n_iter=10, rank=5)
115 |     best_init_stdev, mse = mcmc.find_init_stdev(fm, X_train, y_train,
116 |                                                 stdev_range=[0.2, 0.5, 1.0])
117 |     best_init_stdev_bad, _ = mcmc.find_init_stdev(fm, X_train, y_train,
118 |                                                   stdev_range=[5.])
119 |     print('--' * 30)
120 |     best_init_stdev_vali, mse_vali = mcmc.find_init_stdev(fm,
121 |                                                           X_train, y_train,
122 |                                                           X_test, y_test,
123 |                                                           stdev_range=[
124 |                                                               0.2, 0.5, 1.0])
125 |     assert best_init_stdev < best_init_stdev_bad
126 |     assert best_init_stdev_vali == best_init_stdev
127 |     assert mse_vali > mse
128 | 
129 | 
130 | def test_clone():
131 |     from sklearn.base import clone
132 | 
133 |     a = mcmc.FMRegression()
134 |     b = clone(a)
135 |     assert a.get_params() == b.get_params()
136 | 
137 |     a = mcmc.FMClassification()
138 |     b = clone(a)
139 |     assert a.get_params() == b.get_params()
140 | 
141 | 
142 | if __name__ == "__main__":
143 |     test_linear_fm_classification()
144 | 


--------------------------------------------------------------------------------
/fastFM/tests/test_ranking.py:
--------------------------------------------------------------------------------
 1 | # Author: Immanuel Bayer
 2 | # License: BSD 3 clause
 3 | 
 4 | import numpy as np
 5 | import scipy.sparse as sp
 6 | from fastFM import bpr
 7 | from fastFM import utils
 8 | 
 9 | 
10 | def get_test_problem(task='regression'):
11 |     X = sp.csc_matrix(np.array([[6, 1],
12 |                                 [2, 3],
13 |                                 [3, 0],
14 |                                 [6, 1],
15 |                                 [4, 5]]), dtype=np.float64)
16 |     y = np.array([298, 266, 29, 298, 848], dtype=np.float64)
17 |     V = np.array([[6, 0],
18 |                   [5, 8]], dtype=np.float64)
19 |     w = np.array([9, 2], dtype=np.float64)
20 |     w0 = 2
21 |     if task == 'classification':
22 |         y_labels = np.ones_like(y)
23 |         y_labels[y < np.median(y)] = -1
24 |         y = y_labels
25 |     return w0, w, V, y, X
26 | 
27 | 
28 | def test_fm_sgr_ranking():
29 |     w0, w, V, y, X = get_test_problem()
30 |     X_test = X.copy()
31 |     X_train = X.copy()
32 | 
33 |     import itertools
34 |     pairs = [p for p in itertools.combinations(range(len(y)), 2)]
35 |     compares = np.zeros((len(pairs), 2), dtype=np.float64)
36 | 
37 |     for i, p in enumerate(pairs):
38 |         if y[p[0]] > y[p[1]]:
39 |             compares[i, 0] = p[0]
40 |             compares[i, 1] = p[1]
41 |         else:
42 |             compares[i, 0] = p[1]
43 |             compares[i, 1] = p[0]
44 | 
45 |     print(compares)
46 |     fm = bpr.FMRecommender(n_iter=2000,
47 |                            init_stdev=0.01, l2_reg_w=.5, l2_reg_V=.5, rank=2,
48 |                            step_size=.002, random_state=11)
49 |     fm.fit(X_train, compares)
50 |     y_pred = fm.predict(X_test)
51 |     y_pred = np.argsort(y_pred)
52 |     print(y)
53 |     print(y_pred)
54 |     print(np.argsort(y))
55 |     assert utils.kendall_tau(np.argsort(y), y_pred) == 1
56 | 


--------------------------------------------------------------------------------
/fastFM/tests/test_sgd.py:
--------------------------------------------------------------------------------
  1 | # Author: Immanuel Bayer
  2 | # License: BSD 3 clause
  3 | 
  4 | import numpy as np
  5 | import scipy.sparse as sp
  6 | from sklearn import metrics
  7 | from sklearn.datasets import make_regression
  8 | from numpy.testing import assert_almost_equal
  9 | from fastFM import sgd
 10 | from fastFM import als
 11 | 
 12 | 
 13 | def get_test_problem(task='regression'):
 14 |     X = sp.csc_matrix(np.array([[6, 1],
 15 |                                 [2, 3],
 16 |                                 [3, 0],
 17 |                                 [6, 1],
 18 |                                 [4, 5]]), dtype=np.float64)
 19 |     y = np.array([298, 266, 29, 298, 848], dtype=np.float64)
 20 |     V = np.array([[6, 0],
 21 |                   [5, 8]], dtype=np.float64)
 22 |     w = np.array([9, 2], dtype=np.float64)
 23 |     w0 = 2
 24 |     if task == 'classification':
 25 |         y_labels = np.ones_like(y)
 26 |         y_labels[y < np.median(y)] = -1
 27 |         y = y_labels
 28 |     return w0, w, V, y, X
 29 | 
 30 | 
 31 | def test_sgd_regression_small_example():
 32 |     w0, w, V, y, X = get_test_problem()
 33 |     X_test = X.copy()
 34 |     X_train = sp.csc_matrix(X)
 35 | 
 36 |     fm = sgd.FMRegression(n_iter=10000,
 37 |                           init_stdev=0.01, l2_reg_w=0.5, l2_reg_V=50.5, rank=2,
 38 |                           step_size=0.0001)
 39 | 
 40 |     fm.fit(X_train, y)
 41 |     y_pred = fm.predict(X_test)
 42 |     assert metrics.r2_score(y_pred, y) > 0.99
 43 | 
 44 | 
 45 | def test_first_order_sgd_vs_als_regression():
 46 |     X, y = make_regression(n_samples=100, n_features=50, random_state=123)
 47 |     X = sp.csc_matrix(X)
 48 | 
 49 |     fm_sgd = sgd.FMRegression(n_iter=900, init_stdev=0.01, l2_reg_w=0.0,
 50 |                               l2_reg_V=50.5, rank=0, step_size=0.01)
 51 |     fm_als = als.FMRegression(n_iter=10, l2_reg_w=0, l2_reg_V=0, rank=0)
 52 | 
 53 |     y_pred_sgd = fm_sgd.fit(X, y).predict(X)
 54 |     y_pred_als = fm_als.fit(X, y).predict(X)
 55 | 
 56 |     score_als = metrics.r2_score(y_pred_als, y)
 57 |     score_sgd = metrics.r2_score(y_pred_sgd, y)
 58 | 
 59 |     assert_almost_equal(score_als, score_sgd, decimal=2)
 60 | 
 61 | 
 62 | def test_second_order_sgd_vs_als_regression():
 63 |     X, y = make_regression(n_samples=100, n_features=50, random_state=123)
 64 |     X = sp.csc_matrix(X)
 65 | 
 66 |     fm_sgd = sgd.FMRegression(n_iter=50000, init_stdev=0.00, l2_reg_w=0.0,
 67 |                               l2_reg_V=50.5, rank=2, step_size=0.0002)
 68 |     fm_als = als.FMRegression(n_iter=10, l2_reg_w=0, l2_reg_V=0, rank=2)
 69 | 
 70 |     y_pred_als = fm_als.fit(X, y).predict(X)
 71 |     y_pred_sgd = fm_sgd.fit(X, y).predict(X)
 72 | 
 73 |     score_als = metrics.r2_score(y_pred_als, y)
 74 |     score_sgd = metrics.r2_score(y_pred_sgd, y)
 75 | 
 76 |     assert_almost_equal(score_sgd, score_als, decimal=2)
 77 | 
 78 | 
 79 | def test_sgd_classification_small_example():
 80 |     w0, w, V, y, X = get_test_problem(task='classification')
 81 |     X_test = X.copy()
 82 |     X_train = sp.csc_matrix(X)
 83 | 
 84 |     fm = sgd.FMClassification(n_iter=1000,
 85 |                               init_stdev=0.1, l2_reg_w=0, l2_reg_V=0, rank=2,
 86 |                               step_size=0.1)
 87 |     fm.fit(X_train, y)
 88 |     y_pred = fm.predict(X_test)
 89 |     print(y_pred)
 90 |     assert metrics.accuracy_score(y, y_pred) > 0.95
 91 | 
 92 | 
 93 | def test_clone():
 94 |     from sklearn.base import clone
 95 | 
 96 |     a = sgd.FMRegression()
 97 |     b = clone(a)
 98 |     assert a.get_params() == b.get_params()
 99 | 
100 |     a = sgd.FMClassification()
101 |     b = clone(a)
102 |     assert a.get_params() == b.get_params()
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     test_sgd_regression_small_example()
107 |     test_first_order_sgd_vs_als_regression()
108 |     test_second_order_sgd_vs_als_regression()
109 | 


--------------------------------------------------------------------------------
/fastFM/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | # Author: Immanuel Bayer
 2 | # License: BSD 3 clause
 3 | 
 4 | import numpy as np
 5 | from fastFM.utils import kendall_tau
 6 | 
 7 | 
 8 | def test_ffm_vector_kendall_tau():
 9 |     order = np.array([1, 2, 3, 4, 5])
10 |     order_wrong = np.array([5, 3, 4, 2, 1])
11 |     order_inv = np.array([5, 4, 3, 2, 1])
12 | 
13 |     assert kendall_tau(order, order) == 1
14 |     assert kendall_tau(order, order_inv) == -1
15 |     assert kendall_tau(order, order_wrong) != -1
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     test_ffm_vector_kendall_tau()
20 | 


--------------------------------------------------------------------------------
/fastFM/utils.py:
--------------------------------------------------------------------------------
 1 | # Author: Immanuel Bayer
 2 | # License: BSD 3 clause
 3 | 
 4 | 
 5 | def kendall_tau(a, b):
 6 |     n_samples = a.shape[0]
 7 |     assert a.shape == b.shape
 8 |     n_concordant = 0
 9 |     n_disconcordant = 0
10 | 
11 |     for i in range(n_samples):
12 |         for j in range(i+1, n_samples):
13 |             if a[i] > a[j] and b[i] > b[j]:
14 |                 n_concordant = n_concordant + 1
15 |             if a[i] < a[j] and b[i] < b[j]:
16 |                 n_concordant = n_concordant + 1
17 | 
18 |             if a[i] > a[j] and b[i] < b[j]:
19 |                 n_disconcordant = n_disconcordant + 1
20 |             if a[i] < a[j] and b[i] > b[j]:
21 |                 n_disconcordant = n_disconcordant + 1
22 |     return (n_concordant - n_disconcordant) / (.5 * n_samples *
23 |                                                (n_samples - 1))
24 | 


--------------------------------------------------------------------------------
/fastFM/validation.py:
--------------------------------------------------------------------------------
  1 | # Static versions of non-core sklearn.utils functions.
  2 | # Placed here since they are subject to change.
  3 | 
  4 | """Utilities for input validation"""
  5 | 
  6 | # Authors: Olivier Grisel
  7 | #          Gael Varoquaux
  8 | #          Andreas Mueller
  9 | #          Lars Buitinck
 10 | #          Alexandre Gramfort
 11 | #          Nicolas Tresegnie
 12 | # License: BSD 3 clause
 13 | 
 14 | import numbers
 15 | import warnings
 16 | 
 17 | import numpy as np
 18 | import scipy.sparse as sparse
 19 | from functools import wraps
 20 | 
 21 | 
 22 | def _check_matrix_is_sparse(func):
 23 |     """
 24 |     Check that input is a scipy sparse matrix and raise warning otherwise.
 25 |     """
 26 |     @wraps(func)
 27 |     def wrapper(*args, **kwargs):
 28 |         if 'accept_sparse' in kwargs and not sparse.isspmatrix(args[0]):
 29 |             raise TypeError('A dense matrix was passed in, but sparse'
 30 |                             'data is required.')
 31 |         result = func(*args, **kwargs)
 32 |         return result
 33 |     return wrapper
 34 | 
 35 | 
 36 | def _ensure_sparse_format(spmatrix, accept_sparse, dtype, order, copy,
 37 |                           force_all_finite):
 38 |     """Convert a sparse matrix to a given format.
 39 |     Checks the sparse format of spmatrix and converts if necessary.
 40 |     Parameters
 41 |     ----------
 42 |     spmatrix : scipy sparse matrix
 43 |         Input to validate and convert.
 44 |     accept_sparse : string, list of string or None (default=None)
 45 |         String[s] representing allowed sparse matrix formats ('csc',
 46 |         'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). None means that sparse
 47 |         matrix input will raise an error.  If the input is sparse but not in
 48 |         the allowed format, it will be converted to the first listed format.
 49 |     dtype : string, type or None (default=none)
 50 |         Data type of result. If None, the dtype of the input is preserved.
 51 |     order : 'F', 'C' or None (default=None)
 52 |         Whether an array will be forced to be fortran or c-style.
 53 |     copy : boolean (default=False)
 54 |         Whether a forced copy will be triggered. If copy=False, a copy might
 55 |         be triggered by a conversion.
 56 |     force_all_finite : boolean (default=True)
 57 |         Whether to raise an error on np.inf and np.nan in X.
 58 |     Returns
 59 |     -------
 60 |     spmatrix_converted : scipy sparse matrix.
 61 |         Matrix that is ensured to have an allowed type.
 62 |     """
 63 |     if accept_sparse is None:
 64 |         raise TypeError('A sparse matrix was passed, but dense '
 65 |                         'data is required. Use X.toarray() to '
 66 |                         'convert to a dense numpy array.')
 67 |     sparse_type = spmatrix.format
 68 |     if dtype is None:
 69 |         dtype = spmatrix.dtype
 70 |     if sparse_type in accept_sparse:
 71 |         # correct type
 72 |         if dtype == spmatrix.dtype:
 73 |             # correct dtype
 74 |             if copy:
 75 |                 spmatrix = spmatrix.copy()
 76 |         else:
 77 |             # convert dtype
 78 |             spmatrix = spmatrix.astype(dtype)
 79 |     else:
 80 |         # create new
 81 |         spmatrix = spmatrix.asformat(accept_sparse[0]).astype(dtype)
 82 |     if force_all_finite:
 83 |         if not hasattr(spmatrix, "data"):
 84 |             warnings.warn("Can't check %s sparse matrix for nan or inf."
 85 |                           % spmatrix.format)
 86 |         else:
 87 |             assert_all_finite(spmatrix.data)
 88 |     if hasattr(spmatrix, "data"):
 89 |         spmatrix.data = np.array(spmatrix.data, copy=False, order=order)
 90 |     return spmatrix
 91 | 
 92 | 
 93 | def assert_all_finite(X):
 94 |     """Like assert_all_finite, but only for ndarray."""
 95 |     X = np.asanyarray(X)
 96 |     # First try an O(n) time, O(1) space solution for the common case that
 97 |     # everything is finite; fall back to O(n) space np.isfinite to prevent
 98 |     # false positives from overflow in sum method.
 99 |     if (X.dtype.char in np.typecodes['AllFloat'] and
100 |             not np.isfinite(X.sum()) and not np.isfinite(X).all()):
101 |         raise ValueError("Input contains NaN, infinity"
102 |                          " or a value too large for %r." % X.dtype)
103 | 
104 | 
105 | @_check_matrix_is_sparse
106 | def check_array(array, accept_sparse=None, dtype="numeric", order=None,
107 |                 copy=False, force_all_finite=True, ensure_2d=True,
108 |                 allow_nd=False, ensure_min_samples=1, ensure_min_features=1):
109 |     """Input validation on an array, list, sparse matrix or similar.
110 |     By default, the input is converted to an at least 2nd numpy array.
111 |     If the dtype of the array is object, attempt converting to float,
112 |     raising on failure.
113 |     Parameters
114 |     ----------
115 |     array : object
116 |         Input object to check / convert.
117 |     accept_sparse : string, list of string or None (default=None)
118 |         String[s] representing allowed sparse matrix formats, such as 'csc',
119 |         'csr', etc.  None means that sparse matrix input will raise an error.
120 |         If the input is sparse but not in the allowed format, it will be
121 |         converted to the first listed format.
122 |     dtype : string, type or None (default="numeric")
123 |         Data type of result. If None, the dtype of the input is preserved.
124 |         If "numeric", dtype is preserved unless array.dtype is object.
125 |     order : 'F', 'C' or None (default=None)
126 |         Whether an array will be forced to be fortran or c-style.
127 |     copy : boolean (default=False)
128 |         Whether a forced copy will be triggered. If copy=False, a copy might
129 |         be triggered by a conversion.
130 |     force_all_finite : boolean (default=True)
131 |         Whether to raise an error on np.inf and np.nan in X.
132 |     ensure_2d : boolean (default=True)
133 |         Whether to make X at least 2d.
134 |     allow_nd : boolean (default=False)
135 |         Whether to allow X.ndim > 2.
136 |     ensure_min_samples : int (default=1)
137 |         Make sure that the array has a minimum number of samples in its first
138 |         axis (rows for a 2D array). Setting to 0 disables this check.
139 |     ensure_min_features : int (default=1)
140 |         Make sure that the 2D array has some minimum number of features
141 |         (columns). The default value of 1 rejects empty datasets.
142 |         This check is only enforced when the input data has effectively 2
143 |         dimensions or is originally 1D and ``ensure_2d`` is True. Setting to 0
144 |         disables this check.
145 |     Returns
146 |     -------
147 |     X_converted : object
148 |         The converted and validated X.
149 |     """
150 |     if isinstance(accept_sparse, str):
151 |         accept_sparse = [accept_sparse]
152 | 
153 |     # store whether originally we wanted numeric dtype
154 |     dtype_numeric = dtype == "numeric"
155 | 
156 |     if sparse.issparse(array):
157 |         if dtype_numeric:
158 |             dtype = None
159 |         array = _ensure_sparse_format(array, accept_sparse, dtype, order,
160 |                                       copy, force_all_finite)
161 |     else:
162 |         if ensure_2d:
163 |             array = np.atleast_2d(array)
164 |         if dtype_numeric:
165 |             if (hasattr(array, "dtype") and
166 |                     getattr(array.dtype, "kind", None) == "O"):
167 |                 # if input is object, convert to float.
168 |                 dtype = np.float64
169 |             else:
170 |                 dtype = None
171 |         array = np.array(array, dtype=dtype, order=order, copy=copy)
172 |         # make sure we actually converted to numeric:
173 |         if dtype_numeric and array.dtype.kind == "O":
174 |             array = array.astype(np.float64)
175 |         if not allow_nd and array.ndim >= 3:
176 |             raise ValueError("Found array with dim %d. Expected <= 2" %
177 |                              array.ndim)
178 |         if force_all_finite:
179 |             assert_all_finite(array)
180 | 
181 |     shape_repr = _shape_repr(array.shape)
182 |     if ensure_min_samples > 0:
183 |         n_samples = _num_samples(array)
184 |         if n_samples < ensure_min_samples:
185 |             raise ValueError("Found array with %d sample(s) (shape=%s) while a"
186 |                              " minimum of %d is required."
187 |                              % (n_samples, shape_repr, ensure_min_samples))
188 | 
189 |     if ensure_min_features > 0 and array.ndim == 2:
190 |         n_features = array.shape[1]
191 |         if n_features < ensure_min_features:
192 |             raise ValueError("Found array with %d feature(s) (shape=%s) while"
193 |                              " a minimum of %d is required."
194 |                              % (n_features, shape_repr, ensure_min_features))
195 |     return array
196 | 
197 | 
198 | def check_consistent_length(x1, x2):
199 |     return x1.shape[0] == x2.shape[0]
200 | 
201 | 
202 | def check_random_state(seed):
203 |     """Turn seed into a np.random.RandomState instance
204 |     If seed is None, return the RandomState singleton used by np.random.
205 |     If seed is an int, return a new RandomState instance seeded with seed.
206 |     If seed is already a RandomState instance, return it.
207 |     Otherwise raise ValueError.
208 |     """
209 |     if seed is None or seed is np.random:
210 |         return np.random.mtrand._rand
211 |     if isinstance(seed, (numbers.Integral, np.integer)):
212 |         return np.random.RandomState(seed)
213 |     if isinstance(seed, np.random.RandomState):
214 |         return seed
215 |     raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
216 |                      ' instance' % seed)
217 | 
218 | 
219 | def _shape_repr(shape):
220 |     """Return a platform independent reprensentation of an array shape
221 |     Under Python 2, the `long` type introduces an 'L' suffix when using the
222 |     default %r format for tuples of integers (typically used to store the shape
223 |     of an array).
224 |     Under Windows 64 bit (and Python 2), the `long` type is used by default
225 |     in numpy shapes even when the integer dimensions are well below 32 bit.
226 |     The platform specific type causes string messages or doctests to change
227 |     from one platform to another which is not desirable.
228 |     Under Python 3, there is no more `long` type so the `L` suffix is never
229 |     introduced in string representation.
230 |     >>> _shape_repr((1, 2))
231 |     '(1, 2)'
232 |     >>> one = 2 ** 64 / 2 ** 64  # force an upcast to `long` under Python 2
233 |     >>> _shape_repr((one, 2 * one))
234 |     '(1, 2)'
235 |     >>> _shape_repr((1,))
236 |     '(1,)'
237 |     >>> _shape_repr(())
238 |     '()'
239 |     """
240 |     if len(shape) == 0:
241 |         return "()"
242 |     joined = ", ".join("%d" % e for e in shape)
243 |     if len(shape) == 1:
244 |         # special notation for singleton tuples
245 |         joined += ','
246 |     return "(%s)" % joined
247 | 
248 | 
249 | def _num_samples(x):
250 |     """Return number of samples in array-like x."""
251 |     if hasattr(x, 'fit'):
252 |         # Don't get num_samples from an ensembles length!
253 |         raise TypeError('Expected sequence or array-like, got '
254 |                         'estimator %s' % x)
255 |     if not hasattr(x, '__len__') and not hasattr(x, 'shape'):
256 |         if hasattr(x, '__array__'):
257 |             x = np.asarray(x)
258 |         else:
259 |             raise TypeError("Expected sequence or array-like, got %s" %
260 |                             type(x))
261 |     if hasattr(x, 'shape'):
262 |         if len(x.shape) == 0:
263 |             raise TypeError("Singleton array %r cannot be considered"
264 |                             " a valid collection." % x)
265 |         return x.shape[0]
266 |     else:
267 |         return len(x)
268 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Cython>=0.22
2 | numpy>=1.9.1
3 | scipy>=0.16.0
4 | scikit-learn>=0.18.0
5 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | # This flag says that the code is written to work on both Python 2 and Python
3 | # 3. If at all possible, it is good practice to do this. If you cannot, you
4 | # will need to generate wheels for each Python version that you support.
5 | universal=1
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from distutils.extension import Extension
 3 | from Cython.Distutils import build_ext
 4 | import numpy
 5 | 
 6 | ext_modules = [
 7 |     Extension('ffm', ['fastFM/ffm.pyx'],
 8 |               libraries=['m', 'fastfm'],
 9 |               library_dirs=['fastFM/', 'fastFM-core/bin/'],
10 |               include_dirs=['fastFM/', 'fastFM-core/include/',
11 |                             'fastFM-core/externals/CXSparse/Include/',
12 |               numpy.get_include()])]
13 | 
14 | setup(
15 |     name='fastFM',
16 |     cmdclass={'build_ext': build_ext},
17 |     ext_modules=ext_modules,
18 | 
19 |     packages=['fastFM'],
20 | 
21 |     package_data={'fastFM': ['fastFM/*.pxd']},
22 | 
23 |     version='0.2.11',
24 |     url='http://ibayer.github.io/fastFM',
25 |     author='Immanuel Bayer',
26 |     author_email='immanuel.bayer@uni-konstanz.de',
27 | 
28 |     # Choose your license
29 |     license='BSD',
30 | 
31 |     # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
32 |     classifiers=[
33 |         # How mature is this project? Common values are
34 |         #   3 - Alpha
35 |         #   4 - Beta
36 |         #   5 - Production/Stable
37 |         'Development Status :: 4 - Beta',
38 | 
39 |         # Indicate who your project is intended for
40 |         'Intended Audience :: Developers',
41 |         'Intended Audience :: Science/Research',
42 |         'Topic :: Scientific/Engineering',
43 | 
44 |         'License :: OSI Approved :: BSD License',
45 |         'Operating System :: Unix',
46 | 
47 |         # Specify the Python versions you support here. In particular, ensure
48 |         # that you indicate whether you support Python 2, Python 3 or both.
49 |         'Programming Language :: Python :: 2',
50 |         'Programming Language :: Python :: 2.6',
51 |         'Programming Language :: Python :: 2.7',
52 |         'Programming Language :: Python :: 3',
53 |         'Programming Language :: Python :: 3.2',
54 |         'Programming Language :: Python :: 3.3',
55 |         'Programming Language :: Python :: 3.4',
56 |         'Programming Language :: Python :: 3.5',
57 |         'Programming Language :: Python :: 3.6',
58 |     ],
59 | 
60 |     # List run-time dependencies here.  These will be installed by pip when
61 |     # your project is installed. For an analysis of "install_requires" vs pip's
62 |     # requirements files see:
63 |     # https://packaging.python.org/en/latest/requirements.html
64 |     install_requires=['numpy', 'scikit-learn', 'scipy', 'cython']
65 | )
66 | 


--------------------------------------------------------------------------------