is one of"
26 | @echo " html to make standalone HTML files"
27 | @echo " dirhtml to make HTML files named index.html in directories"
28 | @echo " singlehtml to make a single large HTML file"
29 | @echo " pickle to make pickle files"
30 | @echo " json to make JSON files"
31 | @echo " htmlhelp to make HTML files and a HTML help project"
32 | @echo " qthelp to make HTML files and a qthelp project"
33 | @echo " devhelp to make HTML files and a Devhelp project"
34 | @echo " epub to make an epub"
35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
36 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
38 | @echo " text to make text files"
39 | @echo " man to make manual pages"
40 | @echo " texinfo to make Texinfo files"
41 | @echo " info to make Texinfo files and run them through makeinfo"
42 | @echo " gettext to make PO message catalogs"
43 | @echo " changes to make an overview of all changed/added/deprecated items"
44 | @echo " xml to make Docutils-native XML files"
45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
46 | @echo " linkcheck to check all external links for integrity"
47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
48 |
49 | clean:
50 | -rm -rf $(BUILDDIR)/*
51 | -rm -rf auto_examples/
52 | -rm -rf generated/*
53 | -rm -rf modules/generated/*
54 |
55 | html:
56 | # These two lines make the build a bit more lengthy, and the
57 | # the embedding of images more robust
58 | rm -rf $(BUILDDIR)/html/_images
59 | #rm -rf _build/doctrees/
60 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
61 | @echo
62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
63 |
64 | dirhtml:
65 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
66 | @echo
67 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
68 |
69 | singlehtml:
70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
71 | @echo
72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
73 |
74 | pickle:
75 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
76 | @echo
77 | @echo "Build finished; now you can process the pickle files."
78 |
79 | json:
80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
81 | @echo
82 | @echo "Build finished; now you can process the JSON files."
83 |
84 | htmlhelp:
85 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
86 | @echo
87 | @echo "Build finished; now you can run HTML Help Workshop with the" \
88 | ".hhp project file in $(BUILDDIR)/htmlhelp."
89 |
90 | qthelp:
91 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
92 | @echo
93 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
94 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
95 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/polylearn.qhcp"
96 | @echo "To view the help file:"
97 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/polylearn.qhc"
98 |
99 | devhelp:
100 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
101 | @echo
102 | @echo "Build finished."
103 | @echo "To view the help file:"
104 | @echo "# mkdir -p $$HOME/.local/share/devhelp/polylearn"
105 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/polylearn"
106 | @echo "# devhelp"
107 |
108 | epub:
109 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
110 | @echo
111 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
112 |
113 | latex:
114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | @echo
116 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
117 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
118 | "(use \`make latexpdf' here to do that automatically)."
119 |
120 | latexpdf:
121 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
122 | @echo "Running LaTeX files through pdflatex..."
123 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
124 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
125 |
126 | latexpdfja:
127 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
128 | @echo "Running LaTeX files through platex and dvipdfmx..."
129 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
130 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
131 |
132 | text:
133 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
134 | @echo
135 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
136 |
137 | man:
138 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
139 | @echo
140 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
141 |
142 | texinfo:
143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | @echo
145 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
146 | @echo "Run \`make' in that directory to run these through makeinfo" \
147 | "(use \`make info' here to do that automatically)."
148 |
149 | info:
150 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
151 | @echo "Running Texinfo files through makeinfo..."
152 | make -C $(BUILDDIR)/texinfo info
153 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
154 |
155 | gettext:
156 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
157 | @echo
158 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
159 |
160 | changes:
161 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
162 | @echo
163 | @echo "The overview file is in $(BUILDDIR)/changes."
164 |
165 | linkcheck:
166 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
167 | @echo
168 | @echo "Link check complete; look for any errors in the above output " \
169 | "or in $(BUILDDIR)/linkcheck/output.txt."
170 |
171 | doctest:
172 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
173 | @echo "Testing of doctests in the sources finished, look at the " \
174 | "results in $(BUILDDIR)/doctest/output.txt."
175 |
176 | xml:
177 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
178 | @echo
179 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
180 |
181 | pseudoxml:
182 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
183 | @echo
184 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
185 |
--------------------------------------------------------------------------------
/doc/_templates/class.rst:
--------------------------------------------------------------------------------
1 | {{ fullname }}
2 | {{ underline }}
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 |
8 | {% block methods %}
9 | .. automethod:: __init__
10 | {% endblock %}
11 |
12 |
13 |
--------------------------------------------------------------------------------
/doc/_templates/function.rst:
--------------------------------------------------------------------------------
1 | {{ fullname }}
2 | {{ underline }}
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autofunction:: {{ objname }}
7 |
8 |
9 |
--------------------------------------------------------------------------------
/doc/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {# Import the theme's layout. #}
2 | {% extends "!layout.html" %}
3 |
4 | {# remove site and page menus #}
5 | {%- block sidebartoc %}
6 | {% endblock %}
7 | {%- block sidebarrel %}
8 | {% endblock %}
9 |
10 | {%- block navbartoc %}
11 | {% endblock %}
12 |
13 | {# Include our new CSS file into existing ones. #}
14 | {% set css_files = css_files + ['_static/lightning.css']%}
15 | {% set css_files = css_files + ['_static/bootstrap.min.css']%}
16 |
17 | {%- block content %}
18 | {{ navBar() }}
19 |
20 | {% block body %}{% endblock %}
21 |
22 |
23 | {%- endblock %}
24 |
25 |
--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # polylearn documentation build configuration file, created by
4 | # sphinx-quickstart on Mon Jan 18 14:44:12 2016.
5 | #
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | import sys
16 | import os
17 |
18 | import sphinx_bootstrap_theme
19 |
20 | # If extensions (or modules to document with autodoc) are in another directory,
21 | # add these directories to sys.path here. If the directory is relative to the
22 | # documentation root, use os.path.abspath to make it absolute, like shown here.
23 | sys.path.insert(0, os.path.abspath('sphinxext'))
24 |
25 |
26 | # -- General configuration ---------------------------------------------------
27 |
28 | # Try to override the matplotlib configuration as early as possible
29 | try:
30 | import gen_rst
31 | except:
32 | pass
33 | # -- General configuration ------------------------------------------------
34 |
35 | # If your documentation needs a minimal Sphinx version, state it here.
36 | #needs_sphinx = '1.0'
37 |
38 | # Add any Sphinx extension module names here, as strings. They can be
39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
40 | # ones.
41 | extensions = [
42 | 'gen_rst',
43 | 'sphinx.ext.autodoc',
44 | 'sphinx.ext.autosummary',
45 | 'sphinx.ext.doctest',
46 | 'sphinx.ext.intersphinx',
47 | 'sphinx.ext.todo',
48 | 'numpy_ext.numpydoc',
49 | 'sphinx.ext.pngmath',
50 | 'sphinx.ext.ifconfig',
51 | 'sphinx.ext.viewcode',
52 | # 'sphinx_gallery.gen_gallery'
53 |
54 | ]
55 |
56 | sphinx_gallery_conf = {
57 | # path to your examples scripts
58 | 'examples_dirs': '../examples',
59 | # path where to save gallery generated examples
60 | 'gallery_dirs': 'auto_examples'}
61 |
62 | autosummary_generate = True
63 |
64 | autodoc_default_flags = ['members', 'inherited-members']
65 |
66 | # Add any paths that contain templates here, relative to this directory.
67 | templates_path = ['_templates']
68 |
69 | # The suffix of source filenames.
70 | source_suffix = '.rst'
71 |
72 | # The encoding of source files.
73 | #source_encoding = 'utf-8-sig'
74 |
75 | # Generate the plots for the gallery
76 | plot_gallery = True
77 |
78 | # The master toctree document.
79 | master_doc = 'index'
80 |
81 | # General information about the project.
82 | project = u'polylearn'
83 | copyright = u'2016, Vlad Niculae'
84 |
85 | # The version info for the project you're documenting, acts as replacement for
86 | # |version| and |release|, also used in various other places throughout the
87 | # built documents.
88 | #
89 | # The short X.Y version.
90 | version = '0.1'
91 | # The full version, including alpha/beta/rc tags.
92 | release = '0.1.0'
93 |
94 | # The language for content autogenerated by Sphinx. Refer to documentation
95 | # for a list of supported languages.
96 | #language = None
97 |
98 | # There are two options for replacing |today|: either, you set today to some
99 | # non-false value, then it is used:
100 | #today = ''
101 | # Else, today_fmt is used as the format for a strftime call.
102 | #today_fmt = '%B %d, %Y'
103 |
104 | # List of patterns, relative to source directory, that match files and
105 | # directories to ignore when looking for source files.
106 | exclude_patterns = ['_build']
107 |
108 | # The reST default role (used for this markup: `text`) to use for all
109 | # documents.
110 | #default_role = None
111 |
112 | # If true, '()' will be appended to :func: etc. cross-reference text.
113 | #add_function_parentheses = True
114 |
115 | # If true, the current module name will be prepended to all description
116 | # unit titles (such as .. function::).
117 | #add_module_names = True
118 |
119 | # If true, sectionauthor and moduleauthor directives will be shown in the
120 | # output. They are ignored by default.
121 | #show_authors = False
122 |
123 | # The name of the Pygments (syntax highlighting) style to use.
124 | pygments_style = 'sphinx'
125 |
126 | # A list of ignored prefixes for module index sorting.
127 | #modindex_common_prefix = []
128 |
129 | # If true, keep warnings as "system message" paragraphs in the built documents.
130 | #keep_warnings = False
131 |
132 |
133 | # -- Options for HTML output ----------------------------------------------
134 |
135 | # The theme to use for HTML and HTML Help pages. See the documentation for
136 | # a list of builtin themes.
137 | html_theme = 'bootstrap'
138 |
139 | # Theme options are theme-specific and customize the look and feel of a theme
140 | # further. For a list of options available for each theme, see the
141 | # documentation.
142 | html_theme_options = {
143 | 'navbar_links': [
144 | # ('Introduction', 'intro'),
145 | ('References', 'references'),
146 | ('Examples', 'auto_examples/index'),
147 | ],
148 | 'globaltoc_includehidden': "true",
149 |
150 | # Render the next and previous page links in navbar. (Default: true)
151 | 'navbar_sidebarrel': False,
152 |
153 | # Render the current pages TOC in the navbar. (Default: true)
154 | 'navbar_pagenav': False,
155 |
156 | }
157 |
158 | # Add any paths that contain custom themes here, relative to this directory.
159 |
160 | html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
161 |
162 | # The name for this set of Sphinx documents. If None, it defaults to
163 | # " v documentation".
164 | #html_title = None
165 |
166 | # A shorter title for the navigation bar. Default is the same as html_title.
167 | #html_short_title = None
168 |
169 | # The name of an image file (relative to this directory) to place at the top
170 | # of the sidebar.
171 | #html_logo = None
172 |
173 | # The name of an image file (within the static path) to use as favicon of the
174 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
175 | # pixels large.
176 | #html_favicon = None
177 |
178 | # Add any paths that contain custom static files (such as style sheets) here,
179 | # relative to this directory. They are copied after the builtin static files,
180 | # so a file named "default.css" will overwrite the builtin "default.css".
181 | html_static_path = ['_static']
182 |
183 | # Add any extra paths that contain custom files (such as robots.txt or
184 | # .htaccess) here, relative to this directory. These files are copied
185 | # directly to the root of the documentation.
186 | #html_extra_path = []
187 |
188 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
189 | # using the given strftime format.
190 | #html_last_updated_fmt = '%b %d, %Y'
191 |
192 | # If true, SmartyPants will be used to convert quotes and dashes to
193 | # typographically correct entities.
194 | #html_use_smartypants = True
195 |
196 | # Custom sidebar templates, maps document names to template names.
197 | #html_sidebars = {}
198 |
199 | # Additional templates that should be rendered to pages, maps page names to
200 | # template names.
201 | #html_additional_pages = {}
202 |
203 | # If false, no module index is generated.
204 | #html_domain_indices = True
205 |
206 | # If false, no index is generated.
207 | #html_use_index = True
208 |
209 | # If true, the index is split into individual pages for each letter.
210 | #html_split_index = False
211 |
212 | # If true, links to the reST sources are added to the pages.
213 | #html_show_sourcelink = True
214 |
215 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
216 | #html_show_sphinx = True
217 |
218 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
219 | #html_show_copyright = True
220 |
221 | # If true, an OpenSearch description file will be output, and all pages will
222 | # contain a tag referring to it. The value of this option must be the
223 | # base URL from which the finished HTML is served.
224 | #html_use_opensearch = ''
225 |
226 | # This is the file name suffix for HTML files (e.g. ".xhtml").
227 | #html_file_suffix = None
228 |
229 | # Output file base name for HTML help builder.
230 | htmlhelp_basename = 'polylearndoc'
231 |
232 |
233 | # -- Options for LaTeX output ---------------------------------------------
234 |
235 | latex_elements = {
236 | # The paper size ('letterpaper' or 'a4paper').
237 | #'papersize': 'letterpaper',
238 |
239 | # The font size ('10pt', '11pt' or '12pt').
240 | #'pointsize': '10pt',
241 |
242 | # Additional stuff for the LaTeX preamble.
243 | #'preamble': '',
244 | }
245 |
246 | # Grouping the document tree into LaTeX files. List of tuples
247 | # (source start file, target name, title,
248 | # author, documentclass [howto, manual, or own class]).
249 | latex_documents = [
250 | ('index', 'polylearn.tex', u'polylearn documentation',
251 | u'Vlad Niculae', 'manual'),
252 | ]
253 |
254 | # The name of an image file (relative to this directory) to place at the top of
255 | # the title page.
256 | #latex_logo = None
257 |
258 | # For "manual" documents, if this is true, then toplevel headings are parts,
259 | # not chapters.
260 | #latex_use_parts = False
261 |
262 | # If true, show page references after internal links.
263 | #latex_show_pagerefs = False
264 |
265 | # If true, show URL addresses after external links.
266 | #latex_show_urls = False
267 |
268 | # Documents to append as an appendix to all manuals.
269 | #latex_appendices = []
270 |
271 | # If false, no module index is generated.
272 | #latex_domain_indices = True
273 |
274 |
275 | # -- Options for manual page output ---------------------------------------
276 |
277 | # One entry per manual page. List of tuples
278 | # (source start file, name, description, authors, manual section).
279 | man_pages = [
280 | ('index', 'polylearn', u'polylearn documentation',
281 | [u'Vlad Niculae'], 1)
282 | ]
283 |
284 | # If true, show URL addresses after external links.
285 | #man_show_urls = False
286 |
287 |
288 | # -- Options for Texinfo output -------------------------------------------
289 |
290 | # Grouping the document tree into Texinfo files. List of tuples
291 | # (source start file, target name, title, author,
292 | # dir menu entry, description, category)
293 | texinfo_documents = [
294 | ('index', 'polylearn', u'polylearn documentation',
295 | u'Vlad Niculae', 'polylearn',
296 | 'Factorization machines and polynomial models for machine learning.',
297 | 'Miscellaneous'),
298 | ]
299 |
300 | def generate_example_rst(app, what, name, obj, options, lines):
301 | # generate empty examples files, so that we don't get
302 | # inclusion errors if there are no examples for a class / module
303 | examples_path = os.path.join(app.srcdir, "modules", "generated",
304 | "%s.examples" % name)
305 | if not os.path.exists(examples_path):
306 | # touch file
307 | open(examples_path, 'w').close()
308 |
309 |
310 | def setup(app):
311 | app.connect('autodoc-process-docstring', generate_example_rst)
312 |
313 | # Documents to append as an appendix to all manuals.
314 | #texinfo_appendices = []
315 |
316 | # If false, no module index is generated.
317 | #texinfo_domain_indices = True
318 |
319 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
320 | #texinfo_show_urls = 'footnote'
321 |
322 | # If true, do not generate a @detailmenu in the "Top" node's menu.
323 | #texinfo_no_detailmenu = False
324 |
325 |
326 | # Example configuration for intersphinx: refer to the Python standard library.
327 | intersphinx_mapping = {'http://docs.python.org/': None}
328 |
--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 |
3 | .. toctree::
4 | :hidden:
5 |
6 | auto_examples/index
7 | references.rst
8 |
--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | REM Command file for Sphinx documentation
4 |
5 | if "%SPHINXBUILD%" == "" (
6 | set SPHINXBUILD=sphinx-build
7 | )
8 | set BUILDDIR=_build
9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
11 | if NOT "%PAPER%" == "" (
12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
14 | )
15 |
16 | if "%1" == "" goto help
17 |
18 | if "%1" == "help" (
19 | :help
20 | echo.Please use `make ^` where ^ is one of
21 | echo. html to make standalone HTML files
22 | echo. dirhtml to make HTML files named index.html in directories
23 | echo. singlehtml to make a single large HTML file
24 | echo. pickle to make pickle files
25 | echo. json to make JSON files
26 | echo. htmlhelp to make HTML files and a HTML help project
27 | echo. qthelp to make HTML files and a qthelp project
28 | echo. devhelp to make HTML files and a Devhelp project
29 | echo. epub to make an epub
30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
31 | echo. text to make text files
32 | echo. man to make manual pages
33 | echo. texinfo to make Texinfo files
34 | echo. gettext to make PO message catalogs
35 | echo. changes to make an overview over all changed/added/deprecated items
36 | echo. xml to make Docutils-native XML files
37 | echo. pseudoxml to make pseudoxml-XML files for display purposes
38 | echo. linkcheck to check all external links for integrity
39 | echo. doctest to run all doctests embedded in the documentation if enabled
40 | goto end
41 | )
42 |
43 | if "%1" == "clean" (
44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
45 | del /q /s %BUILDDIR%\*
46 | goto end
47 | )
48 |
49 |
50 | %SPHINXBUILD% 2> nul
51 | if errorlevel 9009 (
52 | echo.
53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
54 | echo.installed, then set the SPHINXBUILD environment variable to point
55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
56 | echo.may add the Sphinx directory to PATH.
57 | echo.
58 | echo.If you don't have Sphinx installed, grab it from
59 | echo.http://sphinx-doc.org/
60 | exit /b 1
61 | )
62 |
63 | if "%1" == "html" (
64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
65 | if errorlevel 1 exit /b 1
66 | echo.
67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html.
68 | goto end
69 | )
70 |
71 | if "%1" == "dirhtml" (
72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
73 | if errorlevel 1 exit /b 1
74 | echo.
75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
76 | goto end
77 | )
78 |
79 | if "%1" == "singlehtml" (
80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
81 | if errorlevel 1 exit /b 1
82 | echo.
83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
84 | goto end
85 | )
86 |
87 | if "%1" == "pickle" (
88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
89 | if errorlevel 1 exit /b 1
90 | echo.
91 | echo.Build finished; now you can process the pickle files.
92 | goto end
93 | )
94 |
95 | if "%1" == "json" (
96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
97 | if errorlevel 1 exit /b 1
98 | echo.
99 | echo.Build finished; now you can process the JSON files.
100 | goto end
101 | )
102 |
103 | if "%1" == "htmlhelp" (
104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | if errorlevel 1 exit /b 1
106 | echo.
107 | echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | goto end
110 | )
111 |
112 | if "%1" == "qthelp" (
113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | if errorlevel 1 exit /b 1
115 | echo.
116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\polylearn.qhcp
119 | echo.To view the help file:
120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\polylearn.ghc
121 | goto end
122 | )
123 |
124 | if "%1" == "devhelp" (
125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | if errorlevel 1 exit /b 1
127 | echo.
128 | echo.Build finished.
129 | goto end
130 | )
131 |
132 | if "%1" == "epub" (
133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | if errorlevel 1 exit /b 1
135 | echo.
136 | echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | goto end
138 | )
139 |
140 | if "%1" == "latex" (
141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | if errorlevel 1 exit /b 1
143 | echo.
144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | goto end
146 | )
147 |
148 | if "%1" == "latexpdf" (
149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | cd %BUILDDIR%/latex
151 | make all-pdf
152 | cd %BUILDDIR%/..
153 | echo.
154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | goto end
156 | )
157 |
158 | if "%1" == "latexpdfja" (
159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | cd %BUILDDIR%/latex
161 | make all-pdf-ja
162 | cd %BUILDDIR%/..
163 | echo.
164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | goto end
166 | )
167 |
168 | if "%1" == "text" (
169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | if errorlevel 1 exit /b 1
171 | echo.
172 | echo.Build finished. The text files are in %BUILDDIR%/text.
173 | goto end
174 | )
175 |
176 | if "%1" == "man" (
177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | if errorlevel 1 exit /b 1
179 | echo.
180 | echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | goto end
182 | )
183 |
184 | if "%1" == "texinfo" (
185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | if errorlevel 1 exit /b 1
187 | echo.
188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | goto end
190 | )
191 |
192 | if "%1" == "gettext" (
193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | if errorlevel 1 exit /b 1
195 | echo.
196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | goto end
198 | )
199 |
200 | if "%1" == "changes" (
201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | if errorlevel 1 exit /b 1
203 | echo.
204 | echo.The overview file is in %BUILDDIR%/changes.
205 | goto end
206 | )
207 |
208 | if "%1" == "linkcheck" (
209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | if errorlevel 1 exit /b 1
211 | echo.
212 | echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | goto end
215 | )
216 |
217 | if "%1" == "doctest" (
218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | if errorlevel 1 exit /b 1
220 | echo.
221 | echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | goto end
224 | )
225 |
226 | if "%1" == "xml" (
227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | if errorlevel 1 exit /b 1
229 | echo.
230 | echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | goto end
232 | )
233 |
234 | if "%1" == "pseudoxml" (
235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | if errorlevel 1 exit /b 1
237 | echo.
238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | goto end
240 | )
241 |
242 | :end
243 |
--------------------------------------------------------------------------------
/doc/references.rst:
--------------------------------------------------------------------------------
1 | .. toctree::
2 | :maxdepth: 2
3 |
4 | polylearn reference
5 | ===================
6 |
7 | .. _factorization_machine:
8 |
9 | Factorization Machines
10 | ----------------------
11 |
12 | .. automodule:: polylearn.factorization_machine
13 | :no-members:
14 | :no-inherited-members:
15 |
16 | .. currentmodule:: polylearn
17 |
18 | .. autosummary::
19 | :toctree: generated/
20 | :template: class.rst
21 |
22 | FactorizationMachineClassifier
23 | FactorizationMachineRegressor
24 |
25 |
26 | .. _polynomial_network:
27 |
28 | Polynomial Networks
29 | -------------------
30 |
31 | .. automodule:: polylearn.polynomial_network
32 | :no-members:
33 | :no-inherited-members:
34 |
35 | .. currentmodule:: polylearn
36 |
37 | .. autosummary::
38 | :toctree: generated/
39 | :template: class.rst
40 |
41 | PolynomialNetworkClassifier
42 | PolynomialNetworkRegressor
43 |
44 |
45 | .. kernels:
46 |
47 | Utilities for computing kernels
48 | -------------------------------
49 |
50 | .. currentmodule:: polylearn
51 |
52 | .. autosummary::
53 | :toctree: generated/
54 | :template: function.rst
55 |
56 | kernels.anova_kernel
57 | kernels.homogeneous_kernel
58 | kernels.safe_power
--------------------------------------------------------------------------------
/doc/sphinxext/LICENSE.txt:
--------------------------------------------------------------------------------
1 | -------------------------------------------------------------------------------
2 | The files
3 | - numpydoc.py
4 | - autosummary.py
5 | - autosummary_generate.py
6 | - docscrape.py
7 | - docscrape_sphinx.py
8 | - phantom_import.py
9 | have the following license:
10 |
11 | Copyright (C) 2008 Stefan van der Walt , Pauli Virtanen
12 |
13 | Redistribution and use in source and binary forms, with or without
14 | modification, are permitted provided that the following conditions are
15 | met:
16 |
17 | 1. Redistributions of source code must retain the above copyright
18 | notice, this list of conditions and the following disclaimer.
19 | 2. Redistributions in binary form must reproduce the above copyright
20 | notice, this list of conditions and the following disclaimer in
21 | the documentation and/or other materials provided with the
22 | distribution.
23 |
24 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
28 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 | POSSIBILITY OF SUCH DAMAGE.
35 |
36 | -------------------------------------------------------------------------------
37 | The files
38 | - compiler_unparse.py
39 | - comment_eater.py
40 | - traitsdoc.py
41 | have the following license:
42 |
43 | This software is OSI Certified Open Source Software.
44 | OSI Certified is a certification mark of the Open Source Initiative.
45 |
46 | Copyright (c) 2006, Enthought, Inc.
47 | All rights reserved.
48 |
49 | Redistribution and use in source and binary forms, with or without
50 | modification, are permitted provided that the following conditions are met:
51 |
52 | * Redistributions of source code must retain the above copyright notice, this
53 | list of conditions and the following disclaimer.
54 | * Redistributions in binary form must reproduce the above copyright notice,
55 | this list of conditions and the following disclaimer in the documentation
56 | and/or other materials provided with the distribution.
57 | * Neither the name of Enthought, Inc. nor the names of its contributors may
58 | be used to endorse or promote products derived from this software without
59 | specific prior written permission.
60 |
61 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
62 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
63 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
64 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
65 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
66 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
67 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
68 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
69 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
70 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
71 |
72 |
73 | -------------------------------------------------------------------------------
74 | The files
75 | - only_directives.py
76 | - plot_directive.py
77 | originate from Matplotlib (http://matplotlib.sf.net/) which has
78 | the following license:
79 |
80 | Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved.
81 |
82 | 1. This LICENSE AGREEMENT is between John D. Hunter (“JDH”), and the Individual or Organization (“Licensee”) accessing and otherwise using matplotlib software in source or binary form and its associated documentation.
83 |
84 | 2. Subject to the terms and conditions of this License Agreement, JDH hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use matplotlib 0.98.3 alone or in any derivative version, provided, however, that JDH’s License Agreement and JDH’s notice of copyright, i.e., “Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved” are retained in matplotlib 0.98.3 alone or in any derivative version prepared by Licensee.
85 |
86 | 3. In the event Licensee prepares a derivative work that is based on or incorporates matplotlib 0.98.3 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to matplotlib 0.98.3.
87 |
88 | 4. JDH is making matplotlib 0.98.3 available to Licensee on an “AS IS” basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB 0.98.3 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
89 |
90 | 5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB 0.98.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING MATPLOTLIB 0.98.3, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
91 |
92 | 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
93 |
94 | 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between JDH and Licensee. This License Agreement does not grant permission to use JDH trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
95 |
96 | 8. By copying, installing or otherwise using matplotlib 0.98.3, Licensee agrees to be bound by the terms and conditions of this License Agreement.
97 |
98 |
--------------------------------------------------------------------------------
/doc/sphinxext/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tests *.py
2 | include *.txt
3 |
--------------------------------------------------------------------------------
/doc/sphinxext/README.txt:
--------------------------------------------------------------------------------
1 | =====================================
2 | numpydoc -- Numpy's Sphinx extensions
3 | =====================================
4 |
5 | Numpy's documentation uses several custom extensions to Sphinx. These
6 | are shipped in this ``numpydoc`` package, in case you want to make use
7 | of them in third-party projects.
8 |
9 | The following extensions are available:
10 |
11 | - ``numpydoc``: support for the Numpy docstring format in Sphinx, and add
12 | the code description directives ``np-function``, ``np-cfunction``, etc.
13 | that support the Numpy docstring syntax.
14 |
15 | - ``numpydoc.traitsdoc``: For gathering documentation about Traits attributes.
16 |
17 | - ``numpydoc.plot_directives``: Adaptation of Matplotlib's ``plot::``
18 | directive. Note that this implementation may still undergo severe
19 | changes or eventually be deprecated.
20 |
21 | - ``numpydoc.only_directives``: (DEPRECATED)
22 |
23 | - ``numpydoc.autosummary``: (DEPRECATED) An ``autosummary::`` directive.
24 | Available in Sphinx 0.6.2 and (to-be) 1.0 as ``sphinx.ext.autosummary``,
25 | and it the Sphinx 1.0 version is recommended over that included in
26 | Numpydoc.
27 |
28 |
29 | numpydoc
30 | ========
31 |
32 | Numpydoc inserts a hook into Sphinx's autodoc that converts docstrings
33 | following the Numpy/Scipy format to a form palatable to Sphinx.
34 |
35 | Options
36 | -------
37 |
38 | The following options can be set in conf.py:
39 |
40 | - numpydoc_use_plots: bool
41 |
42 | Whether to produce ``plot::`` directives for Examples sections that
43 | contain ``import matplotlib``.
44 |
45 | - numpydoc_show_class_members: bool
46 |
47 | Whether to show all members of a class in the Methods and Attributes
48 | sections automatically.
49 |
50 | - numpydoc_edit_link: bool (DEPRECATED -- edit your HTML template instead)
51 |
52 | Whether to insert an edit link after docstrings.
53 |
--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/polylearn/4dd9d4b8aca029628a4c934829526b8552db2e1b/doc/sphinxext/numpy_ext/__init__.py
--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/docscrape.py:
--------------------------------------------------------------------------------
1 | """Extract reference documentation from the NumPy source tree.
2 |
3 | """
4 |
5 | import inspect
6 | import textwrap
7 | import re
8 | import pydoc
9 | from warnings import warn
10 | # Try Python 2 first, otherwise load from Python 3
11 | try:
12 | from StringIO import StringIO
13 | except:
14 | from io import StringIO
15 |
16 |
17 | class Reader(object):
18 | """A line-based string reader.
19 |
20 | """
21 | def __init__(self, data):
22 | """
23 | Parameters
24 | ----------
25 | data : str
26 | String with lines separated by '\n'.
27 |
28 | """
29 | if isinstance(data, list):
30 | self._str = data
31 | else:
32 | self._str = data.split('\n') # store string as list of lines
33 |
34 | self.reset()
35 |
36 | def __getitem__(self, n):
37 | return self._str[n]
38 |
39 | def reset(self):
40 | self._l = 0 # current line nr
41 |
42 | def read(self):
43 | if not self.eof():
44 | out = self[self._l]
45 | self._l += 1
46 | return out
47 | else:
48 | return ''
49 |
50 | def seek_next_non_empty_line(self):
51 | for l in self[self._l:]:
52 | if l.strip():
53 | break
54 | else:
55 | self._l += 1
56 |
57 | def eof(self):
58 | return self._l >= len(self._str)
59 |
60 | def read_to_condition(self, condition_func):
61 | start = self._l
62 | for line in self[start:]:
63 | if condition_func(line):
64 | return self[start:self._l]
65 | self._l += 1
66 | if self.eof():
67 | return self[start:self._l + 1]
68 | return []
69 |
70 | def read_to_next_empty_line(self):
71 | self.seek_next_non_empty_line()
72 |
73 | def is_empty(line):
74 | return not line.strip()
75 | return self.read_to_condition(is_empty)
76 |
77 | def read_to_next_unindented_line(self):
78 | def is_unindented(line):
79 | return (line.strip() and (len(line.lstrip()) == len(line)))
80 | return self.read_to_condition(is_unindented)
81 |
82 | def peek(self, n=0):
83 | if self._l + n < len(self._str):
84 | return self[self._l + n]
85 | else:
86 | return ''
87 |
88 | def is_empty(self):
89 | return not ''.join(self._str).strip()
90 |
91 |
92 | class NumpyDocString(object):
93 | def __init__(self, docstring, config={}):
94 | docstring = textwrap.dedent(docstring).split('\n')
95 |
96 | self._doc = Reader(docstring)
97 | self._parsed_data = {
98 | 'Signature': '',
99 | 'Summary': [''],
100 | 'Extended Summary': [],
101 | 'Parameters': [],
102 | 'Returns': [],
103 | 'Raises': [],
104 | 'Warns': [],
105 | 'Other Parameters': [],
106 | 'Attributes': [],
107 | 'Methods': [],
108 | 'See Also': [],
109 | 'Notes': [],
110 | 'Warnings': [],
111 | 'References': '',
112 | 'Examples': '',
113 | 'index': {}
114 | }
115 |
116 | self._parse()
117 |
118 | def __getitem__(self, key):
119 | return self._parsed_data[key]
120 |
121 | def __setitem__(self, key, val):
122 | if key not in self._parsed_data:
123 | warn("Unknown section %s" % key)
124 | else:
125 | self._parsed_data[key] = val
126 |
127 | def _is_at_section(self):
128 | self._doc.seek_next_non_empty_line()
129 |
130 | if self._doc.eof():
131 | return False
132 |
133 | l1 = self._doc.peek().strip() # e.g. Parameters
134 |
135 | if l1.startswith('.. index::'):
136 | return True
137 |
138 | l2 = self._doc.peek(1).strip() # ---------- or ==========
139 | return l2.startswith('-' * len(l1)) or l2.startswith('=' * len(l1))
140 |
141 | def _strip(self, doc):
142 | i = 0
143 | j = 0
144 | for i, line in enumerate(doc):
145 | if line.strip():
146 | break
147 |
148 | for j, line in enumerate(doc[::-1]):
149 | if line.strip():
150 | break
151 |
152 | return doc[i:len(doc) - j]
153 |
154 | def _read_to_next_section(self):
155 | section = self._doc.read_to_next_empty_line()
156 |
157 | while not self._is_at_section() and not self._doc.eof():
158 | if not self._doc.peek(-1).strip(): # previous line was empty
159 | section += ['']
160 |
161 | section += self._doc.read_to_next_empty_line()
162 |
163 | return section
164 |
165 | def _read_sections(self):
166 | while not self._doc.eof():
167 | data = self._read_to_next_section()
168 | name = data[0].strip()
169 |
170 | if name.startswith('..'): # index section
171 | yield name, data[1:]
172 | elif len(data) < 2:
173 | yield StopIteration
174 | else:
175 | yield name, self._strip(data[2:])
176 |
177 | def _parse_param_list(self, content):
178 | r = Reader(content)
179 | params = []
180 | while not r.eof():
181 | header = r.read().strip()
182 | if ' : ' in header:
183 | arg_name, arg_type = header.split(' : ')[:2]
184 | else:
185 | arg_name, arg_type = header, ''
186 |
187 | desc = r.read_to_next_unindented_line()
188 | desc = dedent_lines(desc)
189 |
190 | params.append((arg_name, arg_type, desc))
191 |
192 | return params
193 |
194 | _name_rgx = re.compile(r"^\s*(:(?P\w+):`(?P[a-zA-Z0-9_.-]+)`|"
195 | r" (?P[a-zA-Z0-9_.-]+))\s*", re.X)
196 |
197 | def _parse_see_also(self, content):
198 | """
199 | func_name : Descriptive text
200 | continued text
201 | another_func_name : Descriptive text
202 | func_name1, func_name2, :meth:`func_name`, func_name3
203 |
204 | """
205 | items = []
206 |
207 | def parse_item_name(text):
208 | """Match ':role:`name`' or 'name'"""
209 | m = self._name_rgx.match(text)
210 | if m:
211 | g = m.groups()
212 | if g[1] is None:
213 | return g[3], None
214 | else:
215 | return g[2], g[1]
216 | raise ValueError("%s is not a item name" % text)
217 |
218 | def push_item(name, rest):
219 | if not name:
220 | return
221 | name, role = parse_item_name(name)
222 | items.append((name, list(rest), role))
223 | del rest[:]
224 |
225 | current_func = None
226 | rest = []
227 |
228 | for line in content:
229 | if not line.strip():
230 | continue
231 |
232 | m = self._name_rgx.match(line)
233 | if m and line[m.end():].strip().startswith(':'):
234 | push_item(current_func, rest)
235 | current_func, line = line[:m.end()], line[m.end():]
236 | rest = [line.split(':', 1)[1].strip()]
237 | if not rest[0]:
238 | rest = []
239 | elif not line.startswith(' '):
240 | push_item(current_func, rest)
241 | current_func = None
242 | if ',' in line:
243 | for func in line.split(','):
244 | push_item(func, [])
245 | elif line.strip():
246 | current_func = line
247 | elif current_func is not None:
248 | rest.append(line.strip())
249 | push_item(current_func, rest)
250 | return items
251 |
252 | def _parse_index(self, section, content):
253 | """
254 | .. index: default
255 | :refguide: something, else, and more
256 |
257 | """
258 | def strip_each_in(lst):
259 | return [s.strip() for s in lst]
260 |
261 | out = {}
262 | section = section.split('::')
263 | if len(section) > 1:
264 | out['default'] = strip_each_in(section[1].split(','))[0]
265 | for line in content:
266 | line = line.split(':')
267 | if len(line) > 2:
268 | out[line[1]] = strip_each_in(line[2].split(','))
269 | return out
270 |
271 | def _parse_summary(self):
272 | """Grab signature (if given) and summary"""
273 | if self._is_at_section():
274 | return
275 |
276 | summary = self._doc.read_to_next_empty_line()
277 | summary_str = " ".join([s.strip() for s in summary]).strip()
278 | if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str):
279 | self['Signature'] = summary_str
280 | if not self._is_at_section():
281 | self['Summary'] = self._doc.read_to_next_empty_line()
282 | else:
283 | self['Summary'] = summary
284 |
285 | if not self._is_at_section():
286 | self['Extended Summary'] = self._read_to_next_section()
287 |
288 | def _parse(self):
289 | self._doc.reset()
290 | self._parse_summary()
291 |
292 | for (section, content) in self._read_sections():
293 | if not section.startswith('..'):
294 | section = ' '.join([s.capitalize()
295 | for s in section.split(' ')])
296 | if section in ('Parameters', 'Attributes', 'Methods',
297 | 'Returns', 'Raises', 'Warns'):
298 | self[section] = self._parse_param_list(content)
299 | elif section.startswith('.. index::'):
300 | self['index'] = self._parse_index(section, content)
301 | elif section == 'See Also':
302 | self['See Also'] = self._parse_see_also(content)
303 | else:
304 | self[section] = content
305 |
306 | # string conversion routines
307 |
308 | def _str_header(self, name, symbol='-'):
309 | return [name, len(name) * symbol]
310 |
311 | def _str_indent(self, doc, indent=4):
312 | out = []
313 | for line in doc:
314 | out += [' ' * indent + line]
315 | return out
316 |
317 | def _str_signature(self):
318 | if self['Signature']:
319 | return [self['Signature'].replace('*', '\*')] + ['']
320 | else:
321 | return ['']
322 |
323 | def _str_summary(self):
324 | if self['Summary']:
325 | return self['Summary'] + ['']
326 | else:
327 | return []
328 |
329 | def _str_extended_summary(self):
330 | if self['Extended Summary']:
331 | return self['Extended Summary'] + ['']
332 | else:
333 | return []
334 |
335 | def _str_param_list(self, name):
336 | out = []
337 | if self[name]:
338 | out += self._str_header(name)
339 | for param, param_type, desc in self[name]:
340 | out += ['%s : %s' % (param, param_type)]
341 | out += self._str_indent(desc)
342 | out += ['']
343 | return out
344 |
345 | def _str_section(self, name):
346 | out = []
347 | if self[name]:
348 | out += self._str_header(name)
349 | out += self[name]
350 | out += ['']
351 | return out
352 |
353 | def _str_see_also(self, func_role):
354 | if not self['See Also']:
355 | return []
356 | out = []
357 | out += self._str_header("See Also")
358 | last_had_desc = True
359 | for func, desc, role in self['See Also']:
360 | if role:
361 | link = ':%s:`%s`' % (role, func)
362 | elif func_role:
363 | link = ':%s:`%s`' % (func_role, func)
364 | else:
365 | link = "`%s`_" % func
366 | if desc or last_had_desc:
367 | out += ['']
368 | out += [link]
369 | else:
370 | out[-1] += ", %s" % link
371 | if desc:
372 | out += self._str_indent([' '.join(desc)])
373 | last_had_desc = True
374 | else:
375 | last_had_desc = False
376 | out += ['']
377 | return out
378 |
379 | def _str_index(self):
380 | idx = self['index']
381 | out = []
382 | out += ['.. index:: %s' % idx.get('default', '')]
383 | for section, references in idx.iteritems():
384 | if section == 'default':
385 | continue
386 | out += [' :%s: %s' % (section, ', '.join(references))]
387 | return out
388 |
389 | def __str__(self, func_role=''):
390 | out = []
391 | out += self._str_signature()
392 | out += self._str_summary()
393 | out += self._str_extended_summary()
394 | for param_list in ('Parameters', 'Returns', 'Raises'):
395 | out += self._str_param_list(param_list)
396 | out += self._str_section('Warnings')
397 | out += self._str_see_also(func_role)
398 | for s in ('Notes', 'References', 'Examples'):
399 | out += self._str_section(s)
400 | for param_list in ('Attributes', 'Methods'):
401 | out += self._str_param_list(param_list)
402 | out += self._str_index()
403 | return '\n'.join(out)
404 |
405 |
406 | def indent(str, indent=4):
407 | indent_str = ' ' * indent
408 | if str is None:
409 | return indent_str
410 | lines = str.split('\n')
411 | return '\n'.join(indent_str + l for l in lines)
412 |
413 |
414 | def dedent_lines(lines):
415 | """Deindent a list of lines maximally"""
416 | return textwrap.dedent("\n".join(lines)).split("\n")
417 |
418 |
419 | def header(text, style='-'):
420 | return text + '\n' + style * len(text) + '\n'
421 |
422 |
423 | class FunctionDoc(NumpyDocString):
424 | def __init__(self, func, role='func', doc=None, config={}):
425 | self._f = func
426 | self._role = role # e.g. "func" or "meth"
427 |
428 | if doc is None:
429 | if func is None:
430 | raise ValueError("No function or docstring given")
431 | doc = inspect.getdoc(func) or ''
432 | NumpyDocString.__init__(self, doc)
433 |
434 | if not self['Signature'] and func is not None:
435 | func, func_name = self.get_func()
436 | try:
437 | # try to read signature
438 | argspec = inspect.getargspec(func)
439 | argspec = inspect.formatargspec(*argspec)
440 | argspec = argspec.replace('*', '\*')
441 | signature = '%s%s' % (func_name, argspec)
442 | except TypeError as e:
443 | signature = '%s()' % func_name
444 | self['Signature'] = signature
445 |
446 | def get_func(self):
447 | func_name = getattr(self._f, '__name__', self.__class__.__name__)
448 | if inspect.isclass(self._f):
449 | func = getattr(self._f, '__call__', self._f.__init__)
450 | else:
451 | func = self._f
452 | return func, func_name
453 |
454 | def __str__(self):
455 | out = ''
456 |
457 | func, func_name = self.get_func()
458 | signature = self['Signature'].replace('*', '\*')
459 |
460 | roles = {'func': 'function',
461 | 'meth': 'method'}
462 |
463 | if self._role:
464 | if self._role not in roles:
465 | print("Warning: invalid role %s" % self._role)
466 | out += '.. %s:: %s\n \n\n' % (roles.get(self._role, ''),
467 | func_name)
468 |
469 | out += super(FunctionDoc, self).__str__(func_role=self._role)
470 | return out
471 |
472 |
473 | class ClassDoc(NumpyDocString):
474 | def __init__(self, cls, doc=None, modulename='', func_doc=FunctionDoc,
475 | config=None):
476 | if not inspect.isclass(cls) and cls is not None:
477 | raise ValueError("Expected a class or None, but got %r" % cls)
478 | self._cls = cls
479 |
480 | if modulename and not modulename.endswith('.'):
481 | modulename += '.'
482 | self._mod = modulename
483 |
484 | if doc is None:
485 | if cls is None:
486 | raise ValueError("No class or documentation string given")
487 | doc = pydoc.getdoc(cls)
488 |
489 | NumpyDocString.__init__(self, doc)
490 |
491 | if config is not None and config.get('show_class_members', True):
492 | if not self['Methods']:
493 | self['Methods'] = [(name, '', '')
494 | for name in sorted(self.methods)]
495 | if not self['Attributes']:
496 | self['Attributes'] = [(name, '', '')
497 | for name in sorted(self.properties)]
498 |
499 | @property
500 | def methods(self):
501 | if self._cls is None:
502 | return []
503 | return [name for name, func in inspect.getmembers(self._cls)
504 | if not name.startswith('_') and callable(func)]
505 |
506 | @property
507 | def properties(self):
508 | if self._cls is None:
509 | return []
510 | return [name for name, func in inspect.getmembers(self._cls)
511 | if not name.startswith('_') and func is None]
512 |
--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/docscrape_sphinx.py:
--------------------------------------------------------------------------------
1 | import re
2 | import inspect
3 | import textwrap
4 | import pydoc
5 | from .docscrape import NumpyDocString
6 | from .docscrape import FunctionDoc
7 | from .docscrape import ClassDoc
8 |
9 |
10 | class SphinxDocString(NumpyDocString):
11 | def __init__(self, docstring, config=None):
12 | config = {} if config is None else config
13 | self.use_plots = config.get('use_plots', False)
14 | NumpyDocString.__init__(self, docstring, config=config)
15 |
16 | # string conversion routines
17 | def _str_header(self, name, symbol='`'):
18 | return ['.. rubric:: ' + name, '']
19 |
20 | def _str_field_list(self, name):
21 | return [':' + name + ':']
22 |
23 | def _str_indent(self, doc, indent=4):
24 | out = []
25 | for line in doc:
26 | out += [' ' * indent + line]
27 | return out
28 |
29 | def _str_signature(self):
30 | return ['']
31 | if self['Signature']:
32 | return ['``%s``' % self['Signature']] + ['']
33 | else:
34 | return ['']
35 |
36 | def _str_summary(self):
37 | return self['Summary'] + ['']
38 |
39 | def _str_extended_summary(self):
40 | return self['Extended Summary'] + ['']
41 |
42 | def _str_param_list(self, name):
43 | out = []
44 | if self[name]:
45 | out += self._str_field_list(name)
46 | out += ['']
47 | for param, param_type, desc in self[name]:
48 | out += self._str_indent(['**%s** : %s' % (param.strip(),
49 | param_type)])
50 | out += ['']
51 | out += self._str_indent(desc, 8)
52 | out += ['']
53 | return out
54 |
55 | @property
56 | def _obj(self):
57 | if hasattr(self, '_cls'):
58 | return self._cls
59 | elif hasattr(self, '_f'):
60 | return self._f
61 | return None
62 |
63 | def _str_member_list(self, name):
64 | """
65 | Generate a member listing, autosummary:: table where possible,
66 | and a table where not.
67 |
68 | """
69 | out = []
70 | if self[name]:
71 | out += ['.. rubric:: %s' % name, '']
72 | prefix = getattr(self, '_name', '')
73 |
74 | if prefix:
75 | prefix = '~%s.' % prefix
76 |
77 | autosum = []
78 | others = []
79 | for param, param_type, desc in self[name]:
80 | param = param.strip()
81 | if not self._obj or hasattr(self._obj, param):
82 | autosum += [" %s%s" % (prefix, param)]
83 | else:
84 | others.append((param, param_type, desc))
85 |
86 | if autosum:
87 | # GAEL: Toctree commented out below because it creates
88 | # hundreds of sphinx warnings
89 | # out += ['.. autosummary::', ' :toctree:', '']
90 | out += ['.. autosummary::', '']
91 | out += autosum
92 |
93 | if others:
94 | maxlen_0 = max([len(x[0]) for x in others])
95 | maxlen_1 = max([len(x[1]) for x in others])
96 | hdr = "=" * maxlen_0 + " " + "=" * maxlen_1 + " " + "=" * 10
97 | fmt = '%%%ds %%%ds ' % (maxlen_0, maxlen_1)
98 | n_indent = maxlen_0 + maxlen_1 + 4
99 | out += [hdr]
100 | for param, param_type, desc in others:
101 | out += [fmt % (param.strip(), param_type)]
102 | out += self._str_indent(desc, n_indent)
103 | out += [hdr]
104 | out += ['']
105 | return out
106 |
107 | def _str_section(self, name):
108 | out = []
109 | if self[name]:
110 | out += self._str_header(name)
111 | out += ['']
112 | content = textwrap.dedent("\n".join(self[name])).split("\n")
113 | out += content
114 | out += ['']
115 | return out
116 |
117 | def _str_see_also(self, func_role):
118 | out = []
119 | if self['See Also']:
120 | see_also = super(SphinxDocString, self)._str_see_also(func_role)
121 | out = ['.. seealso::', '']
122 | out += self._str_indent(see_also[2:])
123 | return out
124 |
125 | def _str_warnings(self):
126 | out = []
127 | if self['Warnings']:
128 | out = ['.. warning::', '']
129 | out += self._str_indent(self['Warnings'])
130 | return out
131 |
132 | def _str_index(self):
133 | idx = self['index']
134 | out = []
135 | if len(idx) == 0:
136 | return out
137 |
138 | out += ['.. index:: %s' % idx.get('default', '')]
139 | for section, references in idx.iteritems():
140 | if section == 'default':
141 | continue
142 | elif section == 'refguide':
143 | out += [' single: %s' % (', '.join(references))]
144 | else:
145 | out += [' %s: %s' % (section, ','.join(references))]
146 | return out
147 |
148 | def _str_references(self):
149 | out = []
150 | if self['References']:
151 | out += self._str_header('References')
152 | if isinstance(self['References'], str):
153 | self['References'] = [self['References']]
154 | out.extend(self['References'])
155 | out += ['']
156 | # Latex collects all references to a separate bibliography,
157 | # so we need to insert links to it
158 | import sphinx # local import to avoid test dependency
159 | if sphinx.__version__ >= "0.6":
160 | out += ['.. only:: latex', '']
161 | else:
162 | out += ['.. latexonly::', '']
163 | items = []
164 | for line in self['References']:
165 | m = re.match(r'.. \[([a-z0-9._-]+)\]', line, re.I)
166 | if m:
167 | items.append(m.group(1))
168 | out += [' ' + ", ".join(["[%s]_" % item for item in items]), '']
169 | return out
170 |
171 | def _str_examples(self):
172 | examples_str = "\n".join(self['Examples'])
173 |
174 | if (self.use_plots and 'import matplotlib' in examples_str
175 | and 'plot::' not in examples_str):
176 | out = []
177 | out += self._str_header('Examples')
178 | out += ['.. plot::', '']
179 | out += self._str_indent(self['Examples'])
180 | out += ['']
181 | return out
182 | else:
183 | return self._str_section('Examples')
184 |
185 | def __str__(self, indent=0, func_role="obj"):
186 | out = []
187 | out += self._str_signature()
188 | out += self._str_index() + ['']
189 | out += self._str_summary()
190 | out += self._str_extended_summary()
191 | for param_list in ('Parameters', 'Returns', 'Raises', 'Attributes'):
192 | out += self._str_param_list(param_list)
193 | out += self._str_warnings()
194 | out += self._str_see_also(func_role)
195 | out += self._str_section('Notes')
196 | out += self._str_references()
197 | out += self._str_examples()
198 | for param_list in ('Methods',):
199 | out += self._str_member_list(param_list)
200 | out = self._str_indent(out, indent)
201 | return '\n'.join(out)
202 |
203 |
204 | class SphinxFunctionDoc(SphinxDocString, FunctionDoc):
205 | def __init__(self, obj, doc=None, config={}):
206 | self.use_plots = config.get('use_plots', False)
207 | FunctionDoc.__init__(self, obj, doc=doc, config=config)
208 |
209 |
210 | class SphinxClassDoc(SphinxDocString, ClassDoc):
211 | def __init__(self, obj, doc=None, func_doc=None, config={}):
212 | self.use_plots = config.get('use_plots', False)
213 | ClassDoc.__init__(self, obj, doc=doc, func_doc=None, config=config)
214 |
215 |
216 | class SphinxObjDoc(SphinxDocString):
217 | def __init__(self, obj, doc=None, config=None):
218 | self._f = obj
219 | SphinxDocString.__init__(self, doc, config=config)
220 |
221 |
222 | def get_doc_object(obj, what=None, doc=None, config={}):
223 | if what is None:
224 | if inspect.isclass(obj):
225 | what = 'class'
226 | elif inspect.ismodule(obj):
227 | what = 'module'
228 | elif callable(obj):
229 | what = 'function'
230 | else:
231 | what = 'object'
232 | if what == 'class':
233 | return SphinxClassDoc(obj, func_doc=SphinxFunctionDoc, doc=doc,
234 | config=config)
235 | elif what in ('function', 'method'):
236 | return SphinxFunctionDoc(obj, doc=doc, config=config)
237 | else:
238 | if doc is None:
239 | doc = pydoc.getdoc(obj)
240 | return SphinxObjDoc(obj, doc, config=config)
241 |
--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/numpydoc.py:
--------------------------------------------------------------------------------
1 | """
2 | ========
3 | numpydoc
4 | ========
5 |
6 | Sphinx extension that handles docstrings in the Numpy standard format. [1]
7 |
8 | It will:
9 |
10 | - Convert Parameters etc. sections to field lists.
11 | - Convert See Also section to a See also entry.
12 | - Renumber references.
13 | - Extract the signature from the docstring, if it can't be determined
14 | otherwise.
15 |
16 | .. [1] http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines#docstring-standard
17 |
18 | """
19 |
20 | from __future__ import unicode_literals
21 |
22 | import sys # Only needed to check Python version
23 | import os
24 | import re
25 | import pydoc
26 | from .docscrape_sphinx import get_doc_object
27 | from .docscrape_sphinx import SphinxDocString
28 | import inspect
29 |
30 |
31 | def mangle_docstrings(app, what, name, obj, options, lines,
32 | reference_offset=[0]):
33 |
34 | cfg = dict(use_plots=app.config.numpydoc_use_plots,
35 | show_class_members=app.config.numpydoc_show_class_members)
36 |
37 | if what == 'module':
38 | # Strip top title
39 | title_re = re.compile(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*',
40 | re.I | re.S)
41 | lines[:] = title_re.sub('', "\n".join(lines)).split("\n")
42 | else:
43 | doc = get_doc_object(obj, what, "\n".join(lines), config=cfg)
44 | if sys.version_info[0] < 3:
45 | lines[:] = unicode(doc).splitlines()
46 | else:
47 | lines[:] = str(doc).splitlines()
48 |
49 | if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \
50 | obj.__name__:
51 | if hasattr(obj, '__module__'):
52 | v = dict(full_name="%s.%s" % (obj.__module__, obj.__name__))
53 | else:
54 | v = dict(full_name=obj.__name__)
55 | lines += [u'', u'.. htmlonly::', '']
56 | lines += [u' %s' % x for x in
57 | (app.config.numpydoc_edit_link % v).split("\n")]
58 |
59 | # replace reference numbers so that there are no duplicates
60 | references = []
61 | for line in lines:
62 | line = line.strip()
63 | m = re.match(r'^.. \[([a-z0-9_.-])\]', line, re.I)
64 | if m:
65 | references.append(m.group(1))
66 |
67 | # start renaming from the longest string, to avoid overwriting parts
68 | references.sort(key=lambda x: -len(x))
69 | if references:
70 | for i, line in enumerate(lines):
71 | for r in references:
72 | if re.match(r'^\d+$', r):
73 | new_r = "R%d" % (reference_offset[0] + int(r))
74 | else:
75 | new_r = u"%s%d" % (r, reference_offset[0])
76 | lines[i] = lines[i].replace(u'[%s]_' % r,
77 | u'[%s]_' % new_r)
78 | lines[i] = lines[i].replace(u'.. [%s]' % r,
79 | u'.. [%s]' % new_r)
80 |
81 | reference_offset[0] += len(references)
82 |
83 |
84 | def mangle_signature(app, what, name, obj,
85 | options, sig, retann):
86 | # Do not try to inspect classes that don't define `__init__`
87 | if (inspect.isclass(obj) and
88 | (not hasattr(obj, '__init__') or
89 | 'initializes x; see ' in pydoc.getdoc(obj.__init__))):
90 | return '', ''
91 |
92 | if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')):
93 | return
94 | if not hasattr(obj, '__doc__'):
95 | return
96 |
97 | doc = SphinxDocString(pydoc.getdoc(obj))
98 | if doc['Signature']:
99 | sig = re.sub("^[^(]*", "", doc['Signature'])
100 | return sig, ''
101 |
102 |
103 | def setup(app, get_doc_object_=get_doc_object):
104 | global get_doc_object
105 | get_doc_object = get_doc_object_
106 |
107 | if sys.version_info[0] < 3:
108 | app.connect(b'autodoc-process-docstring', mangle_docstrings)
109 | app.connect(b'autodoc-process-signature', mangle_signature)
110 | else:
111 | app.connect('autodoc-process-docstring', mangle_docstrings)
112 | app.connect('autodoc-process-signature', mangle_signature)
113 | app.add_config_value('numpydoc_edit_link', None, False)
114 | app.add_config_value('numpydoc_use_plots', None, False)
115 | app.add_config_value('numpydoc_show_class_members', True, True)
116 |
117 | # Extra mangling domains
118 | app.add_domain(NumpyPythonDomain)
119 | app.add_domain(NumpyCDomain)
120 |
121 | #-----------------------------------------------------------------------------
122 | # Docstring-mangling domains
123 | #-----------------------------------------------------------------------------
124 |
125 | try:
126 | import sphinx # lazy to avoid test dependency
127 | except ImportError:
128 | CDomain = PythonDomain = object
129 | else:
130 | from sphinx.domains.c import CDomain
131 | from sphinx.domains.python import PythonDomain
132 |
133 |
134 | class ManglingDomainBase(object):
135 | directive_mangling_map = {}
136 |
137 | def __init__(self, *a, **kw):
138 | super(ManglingDomainBase, self).__init__(*a, **kw)
139 | self.wrap_mangling_directives()
140 |
141 | def wrap_mangling_directives(self):
142 | for name, objtype in self.directive_mangling_map.items():
143 | self.directives[name] = wrap_mangling_directive(
144 | self.directives[name], objtype)
145 |
146 |
147 | class NumpyPythonDomain(ManglingDomainBase, PythonDomain):
148 | name = 'np'
149 | directive_mangling_map = {
150 | 'function': 'function',
151 | 'class': 'class',
152 | 'exception': 'class',
153 | 'method': 'function',
154 | 'classmethod': 'function',
155 | 'staticmethod': 'function',
156 | 'attribute': 'attribute',
157 | }
158 |
159 |
160 | class NumpyCDomain(ManglingDomainBase, CDomain):
161 | name = 'np-c'
162 | directive_mangling_map = {
163 | 'function': 'function',
164 | 'member': 'attribute',
165 | 'macro': 'function',
166 | 'type': 'class',
167 | 'var': 'object',
168 | }
169 |
170 |
171 | def wrap_mangling_directive(base_directive, objtype):
172 | class directive(base_directive):
173 | def run(self):
174 | env = self.state.document.settings.env
175 |
176 | name = None
177 | if self.arguments:
178 | m = re.match(r'^(.*\s+)?(.*?)(\(.*)?', self.arguments[0])
179 | name = m.group(2).strip()
180 |
181 | if not name:
182 | name = self.arguments[0]
183 |
184 | lines = list(self.content)
185 | mangle_docstrings(env.app, objtype, name, None, None, lines)
186 | # local import to avoid testing dependency
187 | from docutils.statemachine import ViewList
188 | self.content = ViewList(lines, self.content.parent)
189 |
190 | return base_directive.run(self)
191 |
192 | return directive
193 |
--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | Examples of using factorization machines and polynomial networks with the polylearn library.
--------------------------------------------------------------------------------
/examples/plot_regularization_path.py:
--------------------------------------------------------------------------------
1 | """
2 | ==================================================
3 | Plotting regularization paths using warm restarts.
4 | ==================================================
5 |
6 | In this example we show how to use the `warm_start` attribute to efficiently
7 | compute the regularization path for a polynomial network when optimizing
8 | for the `beta` regularization hyperparameter.
9 | """
10 | print(__doc__)
11 |
12 | # Author: Vlad Niculae
13 | # License: Simplified BSD
14 |
15 | import numpy as np
16 |
17 | import matplotlib.pyplot as plt
18 |
19 | from sklearn.linear_model import Ridge
20 | from sklearn.kernel_ridge import KernelRidge
21 | from sklearn.datasets import load_boston
22 | from sklearn.cross_validation import train_test_split
23 | from sklearn.metrics.scorer import mean_squared_error_scorer
24 | from sklearn.preprocessing import StandardScaler
25 |
26 | from polylearn import PolynomialNetworkRegressor
27 |
28 | boston = load_boston()
29 | X, y = boston.data, boston.target
30 | data_split = train_test_split(X, y, test_size=100, random_state=0)
31 | X_train, X_test, y_train, y_test = data_split
32 |
33 | # Scale both the features (X) and the target (y) to zero mean, unit variance
34 | # (This is not necessary but makes the plots clearer)
35 |
36 | scaler_X = StandardScaler(with_mean=True, with_std=True)
37 | X_train_sc = scaler_X.fit_transform(X_train)
38 | X_test_sc = scaler_X.transform(X_test)
39 |
40 | scaler_y = StandardScaler(with_mean=True, with_std=True)
41 | y_train_sc = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
42 | y_test_sc = scaler_y.transform(y_test.reshape(-1, 1)).ravel()
43 |
44 | n_alphas = 50
45 | alphas = np.logspace(-1, 8, n_alphas)
46 | ridge = Ridge(fit_intercept=True)
47 | kernel_ridge = KernelRidge(kernel='poly', gamma=1, degree=3, coef0=1)
48 |
49 | test_scores_ridge = []
50 | test_scores_kernel = []
51 |
52 | for alpha in alphas:
53 | ridge.set_params(alpha=alpha)
54 | ridge.fit(X_train_sc, y_train_sc)
55 | test_mse = mean_squared_error_scorer(ridge, X_test_sc, y_test_sc)
56 | test_scores_ridge.append(test_mse)
57 |
58 | kernel_ridge.set_params(alpha=alpha)
59 | kernel_ridge.fit(X_train_sc, y_train_sc)
60 | test_mse = mean_squared_error_scorer(kernel_ridge, X_test_sc, y_test_sc)
61 | test_scores_kernel.append(test_mse)
62 |
63 |
64 | poly = PolynomialNetworkRegressor(degree=3, n_components=2, tol=1e-3,
65 | warm_start=True, random_state=0)
66 |
67 | test_scores_poly = []
68 |
69 | for alpha in alphas:
70 | poly.set_params(beta=alpha)
71 | poly.fit(X_train_sc, y_train_sc)
72 | test_mse = mean_squared_error_scorer(poly, X_test_sc, y_test_sc)
73 | test_scores_poly.append(test_mse)
74 |
75 | plt.plot(alphas, test_scores_ridge, label="Linear ridge")
76 | plt.plot(alphas, test_scores_kernel, label="Kernel ridge")
77 | plt.plot(alphas, test_scores_poly, label="Poly. network (rank=2)")
78 | plt.ylabel("Negated mean squared error (higher is better)")
79 | plt.xlabel("Regularization amount")
80 | plt.ylim(-1, -0.15)
81 | plt.semilogx()
82 | plt.legend()
83 | plt.show()
84 |
--------------------------------------------------------------------------------
/examples/plot_xor.py:
--------------------------------------------------------------------------------
1 | """
2 | ===============================================
3 | Factorization machine decision boundary for XOR
4 | ===============================================
5 |
6 | Plots the decision function learned by a factorization machine for a noisy
7 | non-linearly separable XOR problem
8 |
9 | This problem is a perfect example of feature interactions. As such,
10 | factorization machines can model it very robustly with a very small number of
11 | parameters. (In this case, n_features * n_components = 2 * 1 = 2 params.)
12 |
13 | Example based on:
14 | http://scikit-learn.org/stable/auto_examples/svm/plot_svm_nonlinear.html
15 | """
16 | print(__doc__)
17 |
18 | # Author: Vlad Niculae
19 | # License: Simplified BSD
20 |
21 | import numpy as np
22 | import matplotlib.pyplot as plt
23 | from sklearn.svm import NuSVC
24 |
25 | from polylearn import FactorizationMachineClassifier
26 |
27 | xx, yy = np.meshgrid(np.linspace(-3, 3, 500),
28 | np.linspace(-3, 3, 500))
29 |
30 | rng = np.random.RandomState(42)
31 | X = rng.randn(300, 2)
32 | y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)
33 |
34 | # XOR is too easy for factorization machines, so add noise :)
35 | flip = rng.randint(300, size=15)
36 | y[flip] = ~y[flip]
37 |
38 | # fit the model
39 | fm = FactorizationMachineClassifier(n_components=1, fit_linear=False,
40 | random_state=0)
41 | fm.fit(X, y)
42 |
43 | # fit a NuSVC for comparison
44 | svc = NuSVC(kernel='poly', degree=2)
45 | svc.fit(X, y)
46 |
47 | # plot the decision function for each datapoint on the grid
48 | Z = fm.decision_function(np.c_[xx.ravel(), yy.ravel()])
49 | Z = Z.reshape(xx.shape)
50 |
51 | Z_svc = svc.decision_function(np.c_[xx.ravel(), yy.ravel()])
52 | Z_svc = Z_svc.reshape(xx.shape)
53 |
54 | plt.imshow(Z, interpolation='nearest',
55 | extent=(xx.min(), xx.max(), yy.min(), yy.max()), aspect='auto',
56 | origin='lower', cmap=plt.cm.PuOr_r)
57 |
58 | contour_fm = plt.contour(xx, yy, Z, levels=[0], linewidths=2)
59 |
60 | contour_svc = plt.contour(xx, yy, Z_svc, levels=[0], linestyles='dashed')
61 |
62 | plt.scatter(X[:, 0], X[:, 1], s=30, c=y, cmap=plt.cm.Paired)
63 | plt.xticks(())
64 | plt.yticks(())
65 | plt.axis([-3, 3, -3, 3])
66 | plt.legend((contour_fm.collections[0], contour_svc.collections[0]),
67 | ('rank-1 factorization machine', 'SVC with polynomial kernel'))
68 | plt.show()
69 |
--------------------------------------------------------------------------------
/polylearn/__init__.py:
--------------------------------------------------------------------------------
1 | from .factorization_machine import FactorizationMachineRegressor
2 | from .factorization_machine import FactorizationMachineClassifier
3 | from .polynomial_network import PolynomialNetworkRegressor
4 | from .polynomial_network import PolynomialNetworkClassifier
5 |
--------------------------------------------------------------------------------
/polylearn/base.py:
--------------------------------------------------------------------------------
1 | # Author: Vlad Niculae
2 | # License: Simplified BSD
3 |
4 | from abc import ABCMeta
5 | import numpy as np
6 | from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
7 | from sklearn.preprocessing import LabelBinarizer
8 | from sklearn.utils.validation import check_X_y
9 | from sklearn.utils.multiclass import type_of_target
10 | import six
11 |
12 | from .loss import CLASSIFICATION_LOSSES, REGRESSION_LOSSES
13 |
14 |
15 | class _BasePoly(six.with_metaclass(ABCMeta, BaseEstimator)):
16 |
17 | def _get_loss(self, loss):
18 | # classification losses
19 | if loss not in self._LOSSES:
20 | raise ValueError(
21 | 'Loss function "{}" not supported. The available options '
22 | 'are: "{}".'.format(loss,
23 | '", "'.join(self._LOSSES)))
24 | return self._LOSSES[loss]
25 |
26 |
27 | class _PolyRegressorMixin(RegressorMixin):
28 |
29 | _LOSSES = REGRESSION_LOSSES
30 |
31 | def _check_X_y(self, X, y):
32 | X, y = check_X_y(X, y, accept_sparse='csc', multi_output=False,
33 | dtype=np.double, y_numeric=True)
34 | y = y.astype(np.double).ravel()
35 | return X, y
36 |
37 | def predict(self, X):
38 | """Predict regression output for the samples in X.
39 |
40 | Parameters
41 | ----------
42 | X : {array-like, sparse matrix}, shape = [n_samples, n_features]
43 | Samples.
44 |
45 | Returns
46 | -------
47 | y_pred : array, shape = [n_samples]
48 | Returns predicted values.
49 | """
50 | return self._predict(X)
51 |
52 |
53 | class _PolyClassifierMixin(ClassifierMixin):
54 |
55 | _LOSSES = CLASSIFICATION_LOSSES
56 |
57 | def decision_function(self, X):
58 | """Compute the output of the factorization machine before thresholding.
59 |
60 | Parameters
61 | ----------
62 | X : {array-like, sparse matrix}, shape = [n_samples, n_features]
63 | Samples.
64 |
65 | Returns
66 | -------
67 | y_scores : array, shape = [n_samples]
68 | Returns predicted values.
69 | """
70 | return self._predict(X)
71 |
72 | def predict(self, X):
73 | """Predict using the factorization machine
74 |
75 | Parameters
76 | ----------
77 | X : {array-like, sparse matrix}, shape = [n_samples, n_features]
78 | Samples.
79 |
80 | Returns
81 | -------
82 | y_pred : array, shape = [n_samples]
83 | Returns predicted values.
84 | """
85 | y_pred = self.decision_function(X) > 0
86 | return self.label_binarizer_.inverse_transform(y_pred)
87 |
88 | def predict_proba(self, X):
89 | """Compute probability estimates for the test samples.
90 |
91 | Only available if `loss='logistic'`.
92 |
93 | Parameters
94 | ----------
95 | X : {array-like, sparse matrix}, shape = [n_samples, n_features]
96 | Samples.
97 |
98 | Returns
99 | -------
100 | y_scores : array, shape = [n_samples]
101 | Probability estimates that the samples are from the positive class.
102 | """
103 | if self.loss == 'logistic':
104 | return 1 / (1 + np.exp(-self.decision_function(X)))
105 | else:
106 | raise ValueError("Probability estimates only available for "
107 | "loss='logistic'. You may use probability "
108 | "calibration methods from scikit-learn instead.")
109 |
110 | def _check_X_y(self, X, y):
111 |
112 | # helpful error message for sklearn < 1.17
113 | is_2d = hasattr(y, 'shape') and len(y.shape) > 1 and y.shape[1] >= 2
114 |
115 | if is_2d or type_of_target(y) != 'binary':
116 | raise TypeError("Only binary targets supported. For training "
117 | "multiclass or multilabel models, you may use the "
118 | "OneVsRest or OneVsAll metaestimators in "
119 | "scikit-learn.")
120 |
121 | X, Y = check_X_y(X, y, dtype=np.double, accept_sparse='csc',
122 | multi_output=False)
123 |
124 | self.label_binarizer_ = LabelBinarizer(pos_label=1, neg_label=-1)
125 | y = self.label_binarizer_.fit_transform(Y).ravel().astype(np.double)
126 | return X, y
127 |
--------------------------------------------------------------------------------
/polylearn/cd_direct_fast.pyx:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | # cython: language_level=3
3 | # cython: cdivision=True
4 | # cython: boundscheck=False
5 | # cython: wraparound=False
6 | #
7 | # Author: Vlad Niculae
8 | # License: BSD
9 |
10 | from libc.math cimport fabs
11 | from cython.view cimport array
12 |
13 | from lightning.impl.dataset_fast cimport ColumnDataset
14 |
15 | from .loss_fast cimport LossFunction
16 | from .cd_linear_fast cimport _cd_linear_epoch
17 |
18 |
19 | cdef void _precompute(ColumnDataset X,
20 | double[:, :, ::1] P,
21 | Py_ssize_t order,
22 | double[:, ::1] out,
23 | Py_ssize_t s,
24 | unsigned int degree):
25 |
26 | cdef Py_ssize_t n_samples = X.get_n_samples()
27 | cdef Py_ssize_t n_features = P.shape[2]
28 |
29 | # Data pointers
30 | cdef double* data
31 | cdef int* indices
32 | cdef int n_nz
33 |
34 | cdef Py_ssize_t i, j, ii
35 | cdef unsigned int d
36 | cdef double tmp
37 |
38 | for i in range(n_samples):
39 | out[degree - 1, i] = 0
40 |
41 | for j in range(n_features):
42 | X.get_column_ptr(j, &indices, &data, &n_nz)
43 | for ii in range(n_nz):
44 | i = indices[ii]
45 | out[degree - 1, i] += (data[ii] * P[order, s, j]) ** degree
46 |
47 |
48 | cdef inline double _update(int* indices,
49 | double* data,
50 | int n_nz,
51 | double p_js,
52 | double[:] y,
53 | double[:] y_pred,
54 | LossFunction loss,
55 | unsigned int degree,
56 | double lam,
57 | double beta,
58 | double[:, ::1] D,
59 | double[:] cache_kp):
60 |
61 | cdef double l1_reg = 2 * beta * fabs(lam)
62 |
63 | cdef Py_ssize_t i, ii
64 |
65 | cdef double inv_step_size = 0
66 |
67 | cdef double kp # derivative of the ANOVA kernel
68 | cdef double update = 0
69 |
70 | for ii in range(n_nz):
71 | i = indices[ii]
72 |
73 | if degree == 2:
74 | kp = D[0, i] - p_js * data[ii]
75 | else: # degree == 3:
76 | kp = 0.5 * (D[0, i] ** 2 - D[1, i])
77 | kp -= p_js * data[ii] * D[0, i]
78 | kp += p_js ** 2 * data[ii] ** 2
79 |
80 | kp *= lam * data[ii]
81 | cache_kp[ii] = kp
82 |
83 | update += loss.dloss(y_pred[i], y[i]) * kp
84 | inv_step_size += kp ** 2
85 |
86 | inv_step_size *= loss.mu
87 | inv_step_size += l1_reg
88 |
89 | update += l1_reg * p_js
90 | update /= inv_step_size
91 |
92 | return update
93 |
94 |
95 | cdef inline double _cd_direct_epoch(double[:, :, ::1] P,
96 | Py_ssize_t order,
97 | ColumnDataset X,
98 | double[:] y,
99 | double[:] y_pred,
100 | double[:] lams,
101 | unsigned int degree,
102 | double beta,
103 | LossFunction loss,
104 | double[:, ::1] D,
105 | double[:] cache_kp):
106 |
107 | cdef Py_ssize_t s, j
108 | cdef double p_old, update, offset
109 | cdef double sum_viol = 0
110 | cdef Py_ssize_t n_components = P.shape[1]
111 | cdef Py_ssize_t n_features = P.shape[2]
112 |
113 | # Data pointers
114 | cdef double* data
115 | cdef int* indices
116 | cdef int n_nz
117 |
118 | for s in range(n_components):
119 |
120 | # initialize the cached ds for this s
121 | _precompute(X, P, order, D, s, 1)
122 | if degree == 3:
123 | _precompute(X, P, order, D, s, 2)
124 |
125 | for j in range(n_features):
126 |
127 | X.get_column_ptr(j, &indices, &data, &n_nz)
128 |
129 | # compute coordinate update
130 | p_old = P[order, s, j]
131 | update = _update(indices, data, n_nz, p_old, y, y_pred,
132 | loss, degree, lams[s], beta, D, cache_kp)
133 | P[order, s, j] -= update
134 | sum_viol += fabs(update)
135 |
136 | # Synchronize predictions and ds
137 | for ii in range(n_nz):
138 | i = indices[ii]
139 |
140 | if degree == 3:
141 | D[1, i] -= ((p_old ** 2 - P[order, s, j] ** 2) *
142 | data[ii] ** 2)
143 |
144 | D[0, i] -= update * data[ii]
145 | y_pred[i] -= update * cache_kp[ii]
146 | return sum_viol
147 |
148 |
149 | def _cd_direct_ho(double[:, :, ::1] P not None,
150 | double[:] w not None,
151 | ColumnDataset X,
152 | double[:] col_norm_sq not None,
153 | double[:] y not None,
154 | double[:] y_pred not None,
155 | double[:] lams not None,
156 | unsigned int degree,
157 | double alpha,
158 | double beta,
159 | bint fit_linear,
160 | bint fit_lower,
161 | LossFunction loss,
162 | unsigned int max_iter,
163 | double tol,
164 | int verbose):
165 |
166 | cdef Py_ssize_t n_samples = X.get_n_samples()
167 | cdef unsigned int it
168 |
169 | cdef double viol
170 | cdef bint converged = False
171 |
172 | # precomputed values
173 | cdef double[:, ::1] D = array((degree - 1, n_samples), sizeof(double), 'd')
174 | cdef double[:] cache_kp = array((n_samples,), sizeof(double), 'd')
175 |
176 | for it in range(max_iter):
177 | viol = 0
178 |
179 | if fit_linear:
180 | viol += _cd_linear_epoch(w, X, y, y_pred, col_norm_sq, alpha, loss)
181 |
182 | if fit_lower and degree == 3: # fit degree 2. Will be looped later.
183 | viol += _cd_direct_epoch(P, 1, X, y, y_pred, lams, 2, beta, loss,
184 | D, cache_kp)
185 |
186 | viol += _cd_direct_epoch(P, 0, X, y, y_pred, lams, degree, beta, loss,
187 | D, cache_kp)
188 |
189 | if verbose:
190 | print("Iteration", it + 1, "violation sum", viol)
191 |
192 | if viol < tol:
193 | if verbose:
194 | print("Converged at iteration", it + 1)
195 | converged = True
196 | break
197 |
198 | return converged, it
199 |
--------------------------------------------------------------------------------
/polylearn/cd_lifted_fast.pyx:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | # cython: language_level=3
3 | # cython: cdivision=True
4 | # cython: boundscheck=False
5 | # cython: wraparound=False
6 | #
7 | # Author: Vlad Niculae
8 | # License: BSD
9 |
10 | from libc.math cimport fabs
11 | from cython.view cimport array
12 |
13 | from lightning.impl.dataset_fast cimport ColumnDataset
14 |
15 | from .loss_fast cimport LossFunction
16 |
17 |
18 | def _fast_lifted_predict(double[:, :, ::1] U,
19 | ColumnDataset X,
20 | double[:] out):
21 |
22 | # np.product(safe_sparse_dot(U, X.T), axis=0).sum(axis=0)
23 | #
24 | # a bit of a misnomer, since at least for dense data it's a bit slower,
25 | # but it's more memory efficient.
26 |
27 | cdef Py_ssize_t degree = U.shape[0]
28 | cdef Py_ssize_t n_components = U.shape[1]
29 |
30 | cdef Py_ssize_t n_samples = X.get_n_samples()
31 | cdef Py_ssize_t n_features = X.get_n_features()
32 |
33 | cdef double* data
34 | cdef int* indices
35 | cdef int n_nz
36 |
37 | cdef Py_ssize_t i, j, ii
38 |
39 | cdef double[:] middle = array((n_samples,), sizeof(double), 'd')
40 | cdef double[:] inner = array((n_samples,), sizeof(double), 'd')
41 |
42 | for s in range(n_components):
43 |
44 | for i in range(n_samples):
45 | middle[i] = 1
46 |
47 | for t in range(degree):
48 | # inner = np.dot(U[t, s, :], X.T)
49 |
50 | for i in range(n_samples):
51 | inner[i] = 0
52 |
53 | for j in range(n_features):
54 | X.get_column_ptr(j, &indices, &data, &n_nz)
55 | for ii in range(n_nz):
56 | i = indices[ii]
57 | inner[i] += data[ii] * U[t, s, j]
58 |
59 | # middle *= inner
60 | for i in range(n_samples):
61 | middle[i] *= inner[i]
62 |
63 | for i in range(n_samples):
64 | out[i] += middle[i]
65 |
66 |
67 | cdef void _precompute(double[:, :, ::1] U,
68 | ColumnDataset X,
69 | Py_ssize_t s,
70 | Py_ssize_t t,
71 | double[:] out,
72 | double[:] tmp):
73 |
74 | cdef Py_ssize_t degree = U.shape[0]
75 | cdef Py_ssize_t n_components = U.shape[1]
76 |
77 | cdef Py_ssize_t n_samples = X.get_n_samples()
78 | cdef Py_ssize_t n_features = X.get_n_features()
79 |
80 | cdef double* data
81 | cdef int* indices
82 | cdef int n_nz
83 |
84 | cdef Py_ssize_t i, j, ii
85 |
86 | for i in range(n_samples):
87 | out[i] = 1
88 |
89 | for t_prime in range(degree):
90 |
91 | if t == t_prime:
92 | continue
93 |
94 | for i in range(n_samples):
95 | tmp[i] = 0
96 |
97 | for j in range(n_features):
98 | X.get_column_ptr(j, &indices, &data, &n_nz)
99 | for ii in range(n_nz):
100 | i = indices[ii]
101 | tmp[i] += data[ii] * U[t_prime, s, j]
102 | for i in range(n_samples):
103 | out[i] *= tmp[i]
104 |
105 |
106 | def _cd_lifted(double[:, :, ::1] U,
107 | ColumnDataset X,
108 | double[:] y,
109 | double[:] y_pred,
110 | double beta,
111 | LossFunction loss,
112 | int max_iter,
113 | double tol,
114 | int verbose):
115 |
116 | cdef Py_ssize_t n_samples = X.get_n_samples()
117 | cdef Py_ssize_t n_features = X.get_n_features()
118 | cdef Py_ssize_t degree = U.shape[0]
119 | cdef Py_ssize_t n_components = U.shape[1]
120 | cdef Py_ssize_t t, s, j
121 | cdef int it
122 |
123 | cdef double sum_viol
124 | cdef bint converged = False
125 |
126 | cdef double inv_step_size
127 | cdef double update
128 | cdef double u_old
129 |
130 | cdef double[:] xi = array((n_samples,), sizeof(double), 'd')
131 | cdef double[:] tmp = array((n_samples,), sizeof(double), 'd')
132 |
133 | # Data pointers
134 | cdef double* data
135 | cdef int* indices
136 | cdef int n_nz
137 |
138 | for it in range(max_iter):
139 | sum_viol = 0
140 | for t in range(degree):
141 | for s in range(n_components):
142 | _precompute(U, X, s, t, xi, tmp)
143 | for j in range(n_features):
144 |
145 | u_old = U[t, s, j]
146 | X.get_column_ptr(j, &indices, &data, &n_nz)
147 |
148 | inv_step_size = 0
149 | update = 0
150 |
151 | for ii in range(n_nz):
152 | i = indices[ii]
153 | inv_step_size += xi[i] ** 2 * data[ii] ** 2
154 | update += xi[i] * data[ii] * loss.dloss(y_pred[i],
155 | y[i])
156 |
157 | inv_step_size *= loss.mu
158 | inv_step_size += beta
159 |
160 | update += beta * u_old
161 | update /= inv_step_size
162 |
163 | U[t, s, j] -= update
164 | sum_viol += fabs(update)
165 |
166 | # synchronize predictions
167 | for ii in range(n_nz):
168 | i = indices[ii]
169 | y_pred[i] -= data[ii] * xi[i] * update
170 |
171 | if verbose:
172 | print("Iteration", it + 1, "violation sum", sum_viol)
173 |
174 | if sum_viol < tol:
175 | if verbose:
176 | print("Converged at iteration", it + 1)
177 | converged = True
178 | break
179 |
180 | return converged, it
181 |
--------------------------------------------------------------------------------
/polylearn/cd_linear_fast.pxd:
--------------------------------------------------------------------------------
1 | # cython: language_level=3
2 |
3 | from lightning.impl.dataset_fast cimport ColumnDataset
4 | from .loss_fast cimport LossFunction
5 |
6 | cpdef double _cd_linear_epoch(double[:] w, ColumnDataset X,
7 | double[:] y,
8 | double[:] y_pred,
9 | double[:] col_norm_sq,
10 | double alpha,
11 | LossFunction loss)
12 |
--------------------------------------------------------------------------------
/polylearn/cd_linear_fast.pyx:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | # cython: language_level=3
3 | # cython: cdivision=True
4 | # cython: boundscheck=False
5 | # cython: wraparound=False
6 | #
7 | # Author: Vlad Niculae
8 | # License: BSD
9 |
10 | from libc.math cimport fabs
11 |
12 | from lightning.impl.dataset_fast cimport ColumnDataset
13 |
14 | from .loss_fast cimport LossFunction
15 |
16 |
17 | cpdef double _cd_linear_epoch(double[:] w,
18 | ColumnDataset X,
19 | double[:] y,
20 | double[:] y_pred,
21 | double[:] col_norm_sq,
22 | double alpha,
23 | LossFunction loss):
24 |
25 | cdef Py_ssize_t i, j, ii
26 | cdef double sum_viol = 0
27 | cdef Py_ssize_t n_features = w.shape[0]
28 | cdef double update
29 | cdef double inv_step_size
30 |
31 | # Data pointers
32 | cdef double* data
33 | cdef int* indices
34 | cdef int n_nz
35 |
36 | for j in range(n_features):
37 | X.get_column_ptr(j, &indices, &data, &n_nz)
38 |
39 | # compute gradient with respect to w_j
40 | update = alpha * w[j]
41 | for ii in range(n_nz):
42 | i = indices[ii]
43 | update += loss.dloss(y_pred[i], y[i]) * data[ii]
44 |
45 | # compute second derivative upper bound
46 | inv_step_size = loss.mu * col_norm_sq[j] + alpha
47 | update /= inv_step_size
48 |
49 | w[j] -= update
50 | sum_viol += fabs(update)
51 |
52 | # update predictions
53 | for ii in range(n_nz):
54 | i = indices[ii]
55 | y_pred[i] -= update * data[ii]
56 |
57 | return sum_viol
58 |
--------------------------------------------------------------------------------
/polylearn/factorization_machine.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | # Author: Vlad Niculae
4 | # License: Simplified BSD
5 |
6 | import warnings
7 | from abc import ABCMeta, abstractmethod
8 |
9 | import numpy as np
10 | from sklearn.preprocessing import add_dummy_feature
11 | from sklearn.utils import check_random_state
12 | from sklearn.utils.validation import check_array
13 | from sklearn.utils.extmath import safe_sparse_dot, row_norms
14 | import six
15 |
16 | try:
17 | from sklearn.exceptions import NotFittedError
18 | except ImportError:
19 | class NotFittedError(ValueError, AttributeError):
20 | pass
21 |
22 | from lightning.impl.dataset_fast import get_dataset
23 |
24 | from .base import _BasePoly, _PolyClassifierMixin, _PolyRegressorMixin
25 | from .kernels import _poly_predict
26 | from .cd_direct_fast import _cd_direct_ho
27 |
28 |
29 | class _BaseFactorizationMachine(six.with_metaclass(ABCMeta, _BasePoly)):
30 |
31 | @abstractmethod
32 | def __init__(self, degree=2, loss='squared', n_components=2, alpha=1,
33 | beta=1, tol=1e-6, fit_lower='explicit', fit_linear=True,
34 | warm_start=False, init_lambdas='ones', max_iter=10000,
35 | verbose=False, random_state=None):
36 | self.degree = degree
37 | self.loss = loss
38 | self.n_components = n_components
39 | self.alpha = alpha
40 | self.beta = beta
41 | self.tol = tol
42 | self.fit_lower = fit_lower
43 | self.fit_linear = fit_linear
44 | self.warm_start = warm_start
45 | self.init_lambdas = init_lambdas
46 | self.max_iter = max_iter
47 | self.verbose = verbose
48 | self.random_state = random_state
49 |
50 | def _augment(self, X):
51 | # for factorization machines, we add a dummy column for each order.
52 |
53 | if self.fit_lower == 'augment':
54 | k = 2 if self.fit_linear else 1
55 | for _ in range(self.degree - k):
56 | X = add_dummy_feature(X, value=1)
57 | return X
58 |
59 | def fit(self, X, y):
60 | """Fit factorization machine to training data.
61 |
62 | Parameters
63 | ----------
64 | X : array-like or sparse, shape = [n_samples, n_features]
65 | Training vectors, where n_samples is the number of samples
66 | and n_features is the number of features.
67 |
68 | y : array-like, shape = [n_samples]
69 | Target values.
70 |
71 | Returns
72 | -------
73 | self : Estimator
74 | Returns self.
75 | """
76 | if self.degree > 3:
77 | raise ValueError("FMs with degree >3 not yet supported.")
78 |
79 | X, y = self._check_X_y(X, y)
80 | X = self._augment(X)
81 | n_features = X.shape[1] # augmented
82 | X_col_norms = row_norms(X.T, squared=True)
83 | dataset = get_dataset(X, order="fortran")
84 | rng = check_random_state(self.random_state)
85 | loss_obj = self._get_loss(self.loss)
86 |
87 | if not (self.warm_start and hasattr(self, 'w_')):
88 | self.w_ = np.zeros(n_features, dtype=np.double)
89 |
90 | if self.fit_lower == 'explicit':
91 | n_orders = self.degree - 1
92 | else:
93 | n_orders = 1
94 |
95 | if not (self.warm_start and hasattr(self, 'P_')):
96 | self.P_ = 0.01 * rng.randn(n_orders, self.n_components, n_features)
97 |
98 | if not (self.warm_start and hasattr(self, 'lams_')):
99 | if self.init_lambdas == 'ones':
100 | self.lams_ = np.ones(self.n_components)
101 | elif self.init_lambdas == 'random_signs':
102 | self.lams_ = np.sign(rng.randn(self.n_components))
103 | else:
104 | raise ValueError("Lambdas must be initialized as ones "
105 | "(init_lambdas='ones') or as random "
106 | "+/- 1 (init_lambdas='random_signs').")
107 |
108 | y_pred = self._get_output(X)
109 |
110 | converged, self.n_iter_ = _cd_direct_ho(
111 | self.P_, self.w_, dataset, X_col_norms, y, y_pred,
112 | self.lams_, self.degree, self.alpha, self.beta, self.fit_linear,
113 | self.fit_lower == 'explicit', loss_obj, self.max_iter,
114 | self.tol, self.verbose)
115 | if not converged:
116 | warnings.warn("Objective did not converge. Increase max_iter.")
117 |
118 | return self
119 |
120 | def _get_output(self, X):
121 | y_pred = _poly_predict(X, self.P_[0, :, :], self.lams_, kernel='anova',
122 | degree=self.degree)
123 |
124 | if self.fit_linear:
125 | y_pred += safe_sparse_dot(X, self.w_)
126 |
127 | if self.fit_lower == 'explicit' and self.degree == 3:
128 | # degree cannot currently be > 3
129 | y_pred += _poly_predict(X, self.P_[1, :, :], self.lams_,
130 | kernel='anova', degree=2)
131 |
132 | return y_pred
133 |
134 | def _predict(self, X):
135 | if not hasattr(self, "P_"):
136 | raise NotFittedError("Estimator not fitted.")
137 | X = check_array(X, accept_sparse='csc', dtype=np.double)
138 | X = self._augment(X)
139 | return self._get_output(X)
140 |
141 |
142 | class FactorizationMachineRegressor(_BaseFactorizationMachine,
143 | _PolyRegressorMixin):
144 | """Factorization machine for regression (with squared loss).
145 |
146 | Parameters
147 | ----------
148 |
149 | degree : int >= 2, default: 2
150 | Degree of the polynomial. Corresponds to the order of feature
151 | interactions captured by the model. Currently only supports
152 | degrees up to 3.
153 |
154 | n_components : int, default: 2
155 | Number of basis vectors to learn, a.k.a. the dimension of the
156 | low-rank parametrization.
157 |
158 | alpha : float, default: 1
159 | Regularization amount for linear term (if ``fit_linear=True``).
160 |
161 | beta : float, default: 1
162 | Regularization amount for higher-order weights.
163 |
164 | tol : float, default: 1e-6
165 | Tolerance for the stopping condition.
166 |
167 | fit_lower : {'explicit'|'augment'|None}, default: 'explicit'
168 | Whether and how to fit lower-order, non-homogeneous terms.
169 |
170 | - 'explicit': fits a separate P directly for each lower order.
171 |
172 | - 'augment': adds the required number of dummy columns (columns
173 | that are 1 everywhere) in order to capture lower-order terms.
174 | Adds ``degree - 2`` columns if ``fit_linear`` is true, or
175 | ``degree - 1`` columns otherwise, to account for the linear term.
176 |
177 | - None: only learns weights for the degree given. If ``degree == 3``,
178 | for example, the model will only have weights for third-order
179 | feature interactions.
180 |
181 | fit_linear : {True|False}, default: True
182 | Whether to fit an explicit linear term to the model, using
183 | coordinate descent. If False, the model can still capture linear
184 | effects if ``fit_lower == 'augment'``.
185 |
186 | warm_start : boolean, optional, default: False
187 | Whether to use the existing solution, if available. Useful for
188 | computing regularization paths or pre-initializing the model.
189 |
190 | init_lambdas : {'ones'|'random_signs'}, default: 'ones'
191 | How to initialize the predictive weights of each learned basis. The
192 | lambdas are not trained; using alternate signs can theoretically
193 | improve performance if the kernel degree is even. The default value
194 | of 'ones' matches the original formulation of factorization machines
195 | (Rendle, 2010).
196 |
197 | To use custom values for the lambdas, ``warm_start`` may be used.
198 |
199 | max_iter : int, optional, default: 10000
200 | Maximum number of passes over the dataset to perform.
201 |
202 | verbose : boolean, optional, default: False
203 | Whether to print debugging information.
204 |
205 | random_state : int seed, RandomState instance, or None (default)
206 | The seed of the pseudo random number generator to use for
207 | initializing the parameters.
208 |
209 | Attributes
210 | ----------
211 |
212 | self.P_ : array, shape [n_orders, n_components, n_features]
213 | The learned basis functions.
214 |
215 | ``self.P_[0, :, :]`` is always available, and corresponds to
216 | interactions of order ``self.degree``.
217 |
218 | ``self.P_[i, :, :]`` for i > 0 corresponds to interactions of order
219 | ``self.degree - i``, available only if ``self.fit_lower='explicit'``.
220 |
221 | self.w_ : array, shape [n_features]
222 | The learned linear model, completing the FM.
223 |
224 | Only present if ``self.fit_linear`` is true.
225 |
226 | self.lams_ : array, shape [n_components]
227 | The predictive weights.
228 |
229 | References
230 | ----------
231 | Polynomial Networks and Factorization Machines:
232 | New Insights and Efficient Training Algorithms.
233 | Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda.
234 | In: Proceedings of ICML 2016.
235 | http://mblondel.org/publications/mblondel-icml2016.pdf
236 |
237 | Factorization machines.
238 | Steffen Rendle
239 | In: Proceedings of IEEE 2010.
240 | """
241 | def __init__(self, degree=2, n_components=2, alpha=1, beta=1, tol=1e-6,
242 | fit_lower='explicit', fit_linear=True, warm_start=False,
243 | init_lambdas='ones', max_iter=10000, verbose=False,
244 | random_state=None):
245 |
246 | super(FactorizationMachineRegressor, self).__init__(
247 | degree, 'squared', n_components, alpha, beta, tol, fit_lower,
248 | fit_linear, warm_start, init_lambdas, max_iter, verbose,
249 | random_state)
250 |
251 |
252 | class FactorizationMachineClassifier(_BaseFactorizationMachine,
253 | _PolyClassifierMixin):
254 | """Factorization machine for classification.
255 |
256 | Parameters
257 | ----------
258 |
259 | degree : int >= 2, default: 2
260 | Degree of the polynomial. Corresponds to the order of feature
261 | interactions captured by the model. Currently only supports
262 | degrees up to 3.
263 |
264 | loss : {'logistic'|'squared_hinge'|'squared'}, default: 'squared_hinge'
265 | Which loss function to use.
266 |
267 | - logistic: L(y, p) = log(1 + exp(-yp))
268 |
269 | - squared hinge: L(y, p) = max(1 - yp, 0)²
270 |
271 | - squared: L(y, p) = 0.5 * (y - p)²
272 |
273 | n_components : int, default: 2
274 | Number of basis vectors to learn, a.k.a. the dimension of the
275 | low-rank parametrization.
276 |
277 | alpha : float, default: 1
278 | Regularization amount for linear term (if ``fit_linear=True``).
279 |
280 | beta : float, default: 1
281 | Regularization amount for higher-order weights.
282 |
283 | tol : float, default: 1e-6
284 | Tolerance for the stopping condition.
285 |
286 | fit_lower : {'explicit'|'augment'|None}, default: 'explicit'
287 | Whether and how to fit lower-order, non-homogeneous terms.
288 |
289 | - 'explicit': fits a separate P directly for each lower order.
290 |
291 | - 'augment': adds the required number of dummy columns (columns
292 | that are 1 everywhere) in order to capture lower-order terms.
293 | Adds ``degree - 2`` columns if ``fit_linear`` is true, or
294 | ``degree - 1`` columns otherwise, to account for the linear term.
295 |
296 | - None: only learns weights for the degree given. If ``degree == 3``,
297 | for example, the model will only have weights for third-order
298 | feature interactions.
299 |
300 | fit_linear : {True|False}, default: True
301 | Whether to fit an explicit linear term to the model, using
302 | coordinate descent. If False, the model can still capture linear
303 | effects if ``fit_lower == 'augment'``.
304 |
305 | warm_start : boolean, optional, default: False
306 | Whether to use the existing solution, if available. Useful for
307 | computing regularization paths or pre-initializing the model.
308 |
309 | init_lambdas : {'ones'|'random_signs'}, default: 'ones'
310 | How to initialize the predictive weights of each learned basis. The
311 | lambdas are not trained; using alternate signs can theoretically
312 | improve performance if the kernel degree is even. The default value
313 | of 'ones' matches the original formulation of factorization machines
314 | (Rendle, 2010).
315 |
316 | To use custom values for the lambdas, ``warm_start`` may be used.
317 |
318 | max_iter : int, optional, default: 10000
319 | Maximum number of passes over the dataset to perform.
320 |
321 | verbose : boolean, optional, default: False
322 | Whether to print debugging information.
323 |
324 | random_state : int seed, RandomState instance, or None (default)
325 | The seed of the pseudo random number generator to use for
326 | initializing the parameters.
327 |
328 | Attributes
329 | ----------
330 |
331 | self.P_ : array, shape [n_orders, n_components, n_features]
332 | The learned basis functions.
333 |
334 | ``self.P_[0, :, :]`` is always available, and corresponds to
335 | interactions of order ``self.degree``.
336 |
337 | ``self.P_[i, :, :]`` for i > 0 corresponds to interactions of order
338 | ``self.degree - i``, available only if ``self.fit_lower='explicit'``.
339 |
340 | self.w_ : array, shape [n_features]
341 | The learned linear model, completing the FM.
342 |
343 | Only present if ``self.fit_linear`` is true.
344 |
345 | self.lams_ : array, shape [n_components]
346 | The predictive weights.
347 |
348 | References
349 | ----------
350 | Polynomial Networks and Factorization Machines:
351 | New Insights and Efficient Training Algorithms.
352 | Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda.
353 | In: Proceedings of ICML 2016.
354 | http://mblondel.org/publications/mblondel-icml2016.pdf
355 |
356 | Factorization machines.
357 | Steffen Rendle
358 | In: Proceedings of IEEE 2010.
359 | """
360 |
361 | def __init__(self, degree=2, loss='squared_hinge', n_components=2, alpha=1,
362 | beta=1, tol=1e-6, fit_lower='explicit', fit_linear=True,
363 | warm_start=False, init_lambdas='ones', max_iter=10000,
364 | verbose=False, random_state=None):
365 |
366 | super(FactorizationMachineClassifier, self).__init__(
367 | degree, loss, n_components, alpha, beta, tol, fit_lower,
368 | fit_linear, warm_start, init_lambdas, max_iter, verbose,
369 | random_state)
370 |
--------------------------------------------------------------------------------
/polylearn/kernels.py:
--------------------------------------------------------------------------------
1 | # Author: Vlad Niculae
2 | # License: Simplified BSD
3 |
4 | from sklearn.metrics.pairwise import polynomial_kernel
5 | from sklearn.utils.extmath import safe_sparse_dot
6 | from scipy.sparse import issparse
7 |
8 | import numpy as np
9 |
10 |
11 | def safe_power(X, degree=2):
12 | """Element-wise power supporting both sparse and dense data.
13 |
14 | Parameters
15 | ----------
16 | X : ndarray or sparse
17 | The array whose entries to raise to the power.
18 |
19 | degree : int, default: 2
20 | The power to which to raise the elements.
21 |
22 | Returns
23 | -------
24 |
25 | X_ret : ndarray or sparse
26 | Same shape as X, but (x_ret)_ij = (x)_ij ^ degree
27 | """
28 | if issparse(X):
29 | if hasattr(X, 'power'):
30 | return X.power(degree)
31 | else:
32 | # old scipy
33 | X = X.copy()
34 | X.data **= degree
35 | return X
36 | else:
37 | return X ** degree
38 |
39 |
40 | def _D(X, P, degree=2):
41 | """The "replacement" part of the homogeneous polynomial kernel.
42 |
43 | D[i, j] = sum_k [(X_ik * P_jk) ** degree]
44 | """
45 | return safe_sparse_dot(safe_power(X, degree), P.T ** degree)
46 |
47 |
48 | def homogeneous_kernel(X, P, degree=2):
49 | """Convenience alias for homogeneous polynomial kernel between X and P::
50 |
51 | K_P(x, p) = ^ degree
52 |
53 | Parameters
54 | ----------
55 | X : ndarray of shape (n_samples_1, n_features)
56 |
57 | Y : ndarray of shape (n_samples_2, n_features)
58 |
59 | degree : int, default 2
60 |
61 | Returns
62 | -------
63 | Gram matrix : array of shape (n_samples_1, n_samples_2)
64 | """
65 | return polynomial_kernel(X, P, degree=degree, gamma=1, coef0=0)
66 |
67 |
68 | def anova_kernel(X, P, degree=2):
69 | """ANOVA kernel between X and P::
70 |
71 | K_A(x, p) = sum_i1>i2>...>id x_i1 p_i1 x_i2 p_i2 ... x_id p_id
72 |
73 | See John Shawe-Taylor and Nello Cristianini,
74 | Kernel Methods for Pattern Analysis section 9.2.
75 |
76 | Parameters
77 | ----------
78 | X : ndarray of shape (n_samples_1, n_features)
79 |
80 | Y : ndarray of shape (n_samples_2, n_features)
81 |
82 | degree : int, default 2
83 |
84 | Returns
85 | -------
86 | Gram matrix : array of shape (n_samples_1, n_samples_2)
87 | """
88 | if degree == 2:
89 | K = homogeneous_kernel(X, P, degree=2)
90 | K -= _D(X, P, degree=2)
91 | K /= 2
92 | elif degree == 3:
93 | K = homogeneous_kernel(X, P, degree=3)
94 | K -= 3 * _D(X, P, degree=2) * _D(X, P, degree=1)
95 | K += 2 * _D(X, P, degree=3)
96 | K /= 6
97 | else:
98 | raise NotImplementedError("ANOVA kernel for degree >= 4 not yet "
99 | "implemented efficiently.")
100 | return K
101 |
102 |
103 | def _poly_predict(X, P, lams, kernel, degree=2):
104 | if kernel == "anova":
105 | K = anova_kernel(X, P, degree)
106 | elif kernel == "poly":
107 | K = homogeneous_kernel(X, P, degree)
108 | else:
109 | raise ValueError(("Unsuppported kernel: {}. Use one "
110 | "of {{'anova'|'poly'}}").format(kernel))
111 |
112 | return np.dot(K, lams)
113 |
--------------------------------------------------------------------------------
/polylearn/loss.py:
--------------------------------------------------------------------------------
1 | # Author: Vlad Niculae
2 | # License: Simplified BSD
3 |
4 | from .loss_fast import Squared, SquaredHinge, Logistic
5 |
6 |
7 | REGRESSION_LOSSES = {
8 | 'squared': Squared()
9 | }
10 |
11 | CLASSIFICATION_LOSSES = {
12 | 'squared': Squared(),
13 | 'squared_hinge': SquaredHinge(),
14 | 'logistic': Logistic()
15 | }
16 |
--------------------------------------------------------------------------------
/polylearn/loss_fast.pxd:
--------------------------------------------------------------------------------
1 | # cython: language_level=3
2 |
3 | cdef class LossFunction:
4 |
5 | cdef double mu
6 | cdef double loss(self, double p, double y)
7 | cdef double dloss(self, double p, double y)
8 |
--------------------------------------------------------------------------------
/polylearn/loss_fast.pyx:
--------------------------------------------------------------------------------
1 | # cython: language_level=3
2 | # cython: cdivision=True
3 |
4 | from libc.math cimport log, exp
5 |
6 | cdef class LossFunction:
7 |
8 | cdef double loss(self, double p, double y):
9 | raise NotImplementedError()
10 |
11 | cdef double dloss(self, double p, double y):
12 | raise NotImplementedError()
13 |
14 |
15 | cdef class Squared(LossFunction):
16 | """Squared loss: L(p, y) = 0.5 * (y - p)²"""
17 |
18 | def __init__(self):
19 | self.mu = 1
20 |
21 | cdef double loss(self, double p, double y):
22 | return 0.5 * (p - y) ** 2
23 |
24 | cdef double dloss(self, double p, double y):
25 | return p - y
26 |
27 |
28 | cdef class Logistic(LossFunction):
29 | """Logistic loss: L(p, y) = log(1 + exp(-yp))"""
30 |
31 | def __init__(self):
32 | self.mu = 0.25
33 |
34 | cdef double loss(self, double p, double y):
35 | cdef double z = p * y
36 | # log(1 + exp(-z))
37 | if z > 18:
38 | return exp(-z)
39 | if z < -18:
40 | return -z
41 | return log(1.0 + exp(-z))
42 |
43 | cdef double dloss(self, double p, double y):
44 | cdef double z = p * y
45 | # cdef double tau = 1 / (1 + exp(-z))
46 | # return y * (tau - 1)
47 | if z > 18.0:
48 | return -y * exp(-z)
49 | if z < -18.0:
50 | return -y
51 | return -y / (exp(z) + 1.0)
52 |
53 |
54 | cdef class SquaredHinge(LossFunction):
55 | """Squared hinge loss: L(p, y) = max(1 - yp, 0)²"""
56 |
57 | def __init__(self):
58 | self.mu = 2
59 |
60 | cdef double loss(self, double p, double y):
61 | cdef double z = 1 - p * y
62 | if z > 0:
63 | return z * z
64 | return 0.0
65 |
66 | cdef double dloss(self, double p, double y):
67 | cdef double z = 1 - p * y
68 | if z > 0:
69 | return -2 * y * z
70 | return 0.0
71 |
--------------------------------------------------------------------------------
/polylearn/polynomial_network.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | """Polynomial networks for regression and classification."""
4 |
5 | # Author: Vlad Niculae
6 | # License: Simplified BSD
7 |
8 | import warnings
9 | from abc import ABCMeta, abstractmethod
10 |
11 | import numpy as np
12 | from sklearn.preprocessing import add_dummy_feature
13 | from sklearn.utils import check_random_state
14 | from sklearn.utils.validation import check_array
15 | import six
16 |
17 | try:
18 | from sklearn.exceptions import NotFittedError
19 | except ImportError:
20 | class NotFittedError(ValueError, AttributeError):
21 | pass
22 |
23 | from lightning.impl.dataset_fast import get_dataset
24 |
25 | from .base import _BasePoly, _PolyClassifierMixin, _PolyRegressorMixin
26 | from .cd_lifted_fast import _cd_lifted, _fast_lifted_predict
27 |
28 |
29 | def _lifted_predict(U, dataset):
30 | out = np.zeros(dataset.get_n_samples(), dtype=np.double)
31 | _fast_lifted_predict(U, dataset, out)
32 | return out
33 |
34 |
35 | class _BasePolynomialNetwork(six.with_metaclass(ABCMeta, _BasePoly)):
36 | @abstractmethod
37 | def __init__(self, degree=2, loss='squared', n_components=5, beta=1,
38 | tol=1e-6, fit_lower='augment', warm_start=False,
39 | max_iter=10000, verbose=False, random_state=None):
40 | self.degree = degree
41 | self.loss = loss
42 | self.n_components = n_components
43 | self.beta = beta
44 | self.tol = tol
45 | self.fit_lower = fit_lower
46 | self.warm_start = warm_start
47 | self.max_iter = max_iter
48 | self.verbose = verbose
49 | self.random_state = random_state
50 |
51 | def _augment(self, X):
52 | # for polynomial nets, we add a single dummy column
53 | if self.fit_lower == 'augment':
54 | X = add_dummy_feature(X, value=1)
55 | return X
56 |
57 | def fit(self, X, y):
58 | """Fit polynomial network to training data.
59 |
60 | Parameters
61 | ----------
62 | X : array-like or sparse, shape = [n_samples, n_features]
63 | Training vectors, where n_samples is the number of samples
64 | and n_features is the number of features.
65 |
66 | y : array-like, shape = [n_samples]
67 | Target values.
68 |
69 | Returns
70 | -------
71 | self : Estimator
72 | Returns self.
73 | """
74 | if self.fit_lower == 'explicit':
75 | raise NotImplementedError('Explicit fitting of lower orders '
76 | 'not yet implemented for polynomial'
77 | 'network models.')
78 |
79 | X, y = self._check_X_y(X, y)
80 | X = self._augment(X)
81 | n_features = X.shape[1] # augmented
82 | dataset = get_dataset(X, order="fortran")
83 | rng = check_random_state(self.random_state)
84 | loss_obj = self._get_loss(self.loss)
85 |
86 | if not (self.warm_start and hasattr(self, 'U_')):
87 | self.U_ = 0.01 * rng.randn(self.degree, self.n_components,
88 | n_features)
89 |
90 | y_pred = _lifted_predict(self.U_, dataset)
91 |
92 | converged, self.n_iter_ = _cd_lifted(
93 | self.U_, dataset, y, y_pred, self.beta, loss_obj, self.max_iter,
94 | self.tol, self.verbose)
95 |
96 | if not converged:
97 | warnings.warn("Objective did not converge. Increase max_iter.")
98 |
99 | return self
100 |
101 | def _predict(self, X):
102 | if not hasattr(self, "U_"):
103 | raise NotFittedError("Estimator not fitted.")
104 |
105 | X = check_array(X, accept_sparse='csc', dtype=np.double)
106 | X = self._augment(X)
107 | X = get_dataset(X, order='fortran')
108 | return _lifted_predict(self.U_, X)
109 |
110 |
111 | class PolynomialNetworkRegressor(_BasePolynomialNetwork, _PolyRegressorMixin):
112 | """Polynomial network for regression (with squared loss).
113 |
114 | Parameters
115 | ----------
116 |
117 | degree : int >= 2, default: 2
118 | Degree of the polynomial. Corresponds to the order of feature
119 | interactions captured by the model. Currently only supports
120 | degrees up to 3.
121 |
122 | n_components : int, default: 2
123 | Dimension of the lifted tensor.
124 |
125 | beta : float, default: 1
126 | Regularization amount for higher-order weights.
127 |
128 | tol : float, default: 1e-6
129 | Tolerance for the stopping condition.
130 |
131 | fit_lower : {'augment'|None}, default: 'augment'
132 | Whether and how to fit lower-order, non-homogeneous terms.
133 |
134 | - 'augment': adds a dummy column (1 everywhere) in order to capture
135 | lower-order terms (including linear terms).
136 |
137 | - None: only learns weights for the degree given.
138 |
139 | warm_start : boolean, optional, default: False
140 | Whether to use the existing solution, if available. Useful for
141 | computing regularization paths or pre-initializing the model.
142 |
143 | max_iter : int, optional, default: 10000
144 | Maximum number of passes over the dataset to perform.
145 |
146 | verbose : boolean, optional, default: False
147 | Whether to print debugging information.
148 |
149 | random_state : int seed, RandomState instance, or None (default)
150 | The seed of the pseudo random number generator to use for
151 | initializing the parameters.
152 |
153 | Attributes
154 | ----------
155 |
156 | self.U_ : array, shape [n_components, n_features, degree]
157 | The learned weights in the lifted tensor parametrization.
158 |
159 | References
160 | ----------
161 | Polynomial Networks and Factorization Machines:
162 | New Insights and Efficient Training Algorithms.
163 | Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda.
164 | In: Proceedings of ICML 2016.
165 | http://mblondel.org/publications/mblondel-icml2016.pdf
166 |
167 | On the computational efficiency of training neural networks.
168 | Roi Livni, Shai Shalev-Shwartz, Ohad Shamir.
169 | In: Proceedings of NIPS 2014.
170 | """
171 |
172 | def __init__(self, degree=2, n_components=2, beta=1, tol=1e-6,
173 | fit_lower='augment', warm_start=False,
174 | max_iter=10000, verbose=False, random_state=None):
175 |
176 | super(PolynomialNetworkRegressor, self).__init__(
177 | degree, 'squared', n_components, beta, tol, fit_lower,
178 | warm_start, max_iter, verbose, random_state)
179 |
180 |
181 | class PolynomialNetworkClassifier(_BasePolynomialNetwork,
182 | _PolyClassifierMixin):
183 | """Polynomial network for classification.
184 |
185 | Parameters
186 | ----------
187 |
188 | degree : int >= 2, default: 2
189 | Degree of the polynomial. Corresponds to the order of feature
190 | interactions captured by the model. Currently only supports
191 | degrees up to 3.
192 |
193 | loss : {'logistic'|'squared_hinge'|'squared'}, default: 'squared_hinge'
194 | Which loss function to use.
195 |
196 | - logistic: L(y, p) = log(1 + exp(-yp))
197 |
198 | - squared hinge: L(y, p) = max(1 - yp, 0)²
199 |
200 | - squared: L(y, p) = 0.5 * (y - p)²
201 |
202 | n_components : int, default: 2
203 | Dimension of the lifted tensor.
204 |
205 | beta : float, default: 1
206 | Regularization amount for higher-order weights.
207 |
208 | tol : float, default: 1e-6
209 | Tolerance for the stopping condition.
210 |
211 | fit_lower : {'augment'|None}, default: 'augment'
212 | Whether and how to fit lower-order, non-homogeneous terms.
213 |
214 | - 'augment': adds a dummy column (1 everywhere) in order to capture
215 | lower-order terms (including linear terms).
216 |
217 | - None: only learns weights for the degree given.
218 |
219 | warm_start : boolean, optional, default: False
220 | Whether to use the existing solution, if available. Useful for
221 | computing regularization paths or pre-initializing the model.
222 |
223 | max_iter : int, optional, default: 10000
224 | Maximum number of passes over the dataset to perform.
225 |
226 | verbose : boolean, optional, default: False
227 | Whether to print debugging information.
228 |
229 | random_state : int seed, RandomState instance, or None (default)
230 | The seed of the pseudo random number generator to use for
231 | initializing the parameters.
232 |
233 | Attributes
234 | ----------
235 |
236 | self.U_ : array, shape [n_components, n_features, degree]
237 | The learned weights in the lifted tensor parametrization.
238 |
239 | References
240 | ----------
241 | Polynomial Networks and Factorization Machines:
242 | New Insights and Efficient Training Algorithms.
243 | Mathieu Blondel, Masakazu Ishihata, Akinori Fujino, Naonori Ueda.
244 | In: Proceedings of ICML 2016.
245 | http://mblondel.org/publications/mblondel-icml2016.pdf
246 |
247 | On the computational efficiency of training neural networks.
248 | Roi Livni, Shai Shalev-Shwartz, Ohad Shamir.
249 | In: Proceedings of NIPS 2014.
250 | """
251 |
252 | def __init__(self, degree=2, loss='squared_hinge', n_components=2, beta=1,
253 | tol=1e-6, fit_lower='augment', warm_start=False,
254 | max_iter=10000, verbose=False, random_state=None):
255 |
256 | super(PolynomialNetworkClassifier, self).__init__(
257 | degree, loss, n_components, beta, tol, fit_lower,
258 | warm_start, max_iter, verbose, random_state)
259 |
--------------------------------------------------------------------------------
/polylearn/setup.py:
--------------------------------------------------------------------------------
1 | import os.path
2 |
3 | import numpy
4 |
5 |
6 | def configuration(parent_package='', top_path=None):
7 | from numpy.distutils.misc_util import Configuration
8 |
9 | config = Configuration('polylearn', parent_package, top_path)
10 |
11 | config.add_extension('loss_fast', sources=['loss_fast.cpp'],
12 | include_dirs=[numpy.get_include()])
13 |
14 | config.add_extension('cd_direct_fast', sources=['cd_direct_fast.cpp'],
15 | include_dirs=[numpy.get_include()])
16 |
17 | config.add_extension('cd_linear_fast', sources=['cd_linear_fast.cpp'],
18 | include_dirs=[numpy.get_include()])
19 |
20 | config.add_extension('cd_lifted_fast', sources=['cd_lifted_fast.cpp'],
21 | include_dirs=[numpy.get_include()])
22 |
23 | config.add_subpackage('tests')
24 |
25 | return config
26 |
27 |
28 | if __name__ == '__main__':
29 | from numpy.distutils.core import setup
30 | setup(**configuration(top_path='').todict())
31 |
--------------------------------------------------------------------------------
/polylearn/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/polylearn/4dd9d4b8aca029628a4c934829526b8552db2e1b/polylearn/tests/__init__.py
--------------------------------------------------------------------------------
/polylearn/tests/test_cd_linear.py:
--------------------------------------------------------------------------------
1 | from nose.tools import assert_less_equal, assert_greater_equal
2 | from numpy.testing import assert_array_almost_equal
3 |
4 | import numpy as np
5 | from sklearn.utils.validation import assert_all_finite
6 | from polylearn.cd_linear_fast import _cd_linear_epoch
7 | from polylearn.loss_fast import Squared, SquaredHinge, Logistic
8 | from lightning.impl.dataset_fast import get_dataset
9 |
10 | rng = np.random.RandomState(0)
11 | X = rng.randn(50, 10)
12 | w_true = rng.randn(10)
13 |
14 | y = np.dot(X, w_true)
15 | X_ds = get_dataset(X, order='fortran')
16 | X_col_norm_sq = (X ** 2).sum(axis=0)
17 |
18 | n_iter = 100
19 |
20 |
21 | def _fit_linear(X, y, alpha, n_iter, loss, callback=None):
22 | n_samples, n_features = X.shape
23 | X_col_norm_sq = (X ** 2).sum(axis=0)
24 | X_ds = get_dataset(X, order='fortran')
25 | w_init = np.zeros(n_features)
26 | y_pred = np.zeros(n_samples)
27 |
28 | for _ in range(n_iter):
29 | viol = _cd_linear_epoch(w_init, X_ds, y, y_pred, X_col_norm_sq,
30 | alpha, loss)
31 | if callback is not None:
32 | callback(w_init, viol)
33 | return w_init
34 |
35 |
36 | class Callback(object):
37 | def __init__(self, X, y, alpha):
38 | self.X = X
39 | self.y = y
40 | self.alpha = alpha
41 |
42 | self.losses_ = []
43 |
44 | def __call__(self, w, viol):
45 | y_pred = np.dot(self.X, w)
46 | lv = np.mean((y_pred - self.y) ** 2)
47 | lv += 2 * self.alpha * np.sum(w ** 2)
48 | self.losses_.append(lv)
49 |
50 |
51 | def test_cd_linear_fit():
52 | loss = Squared()
53 | alpha = 1e-6
54 | cb = Callback(X, y, alpha)
55 | w = _fit_linear(X, y, alpha, n_iter, loss, cb)
56 |
57 | assert_array_almost_equal(w_true, w)
58 | assert_less_equal(cb.losses_[1], cb.losses_[0])
59 | assert_less_equal(cb.losses_[-1], cb.losses_[0])
60 |
61 |
62 | def check_cd_linear_clf(loss):
63 | alpha = 1e-3
64 | y_bin = np.sign(y)
65 |
66 | w = _fit_linear(X, y_bin, alpha, n_iter, loss)
67 | y_pred = np.dot(X, w)
68 | accuracy = np.mean(np.sign(y_pred) == y_bin)
69 |
70 | assert_greater_equal(accuracy, 0.97,
71 | msg="classification loss {}".format(loss))
72 |
73 |
74 | def test_cd_linear_clf():
75 | for loss in (Squared(), SquaredHinge(), Logistic()):
76 | yield check_cd_linear_clf, loss
77 |
78 |
79 | def test_cd_linear_offset():
80 | loss = Squared()
81 | alpha = 1e-3
82 | w_a = np.zeros_like(w_true)
83 | w_b = np.zeros_like(w_true)
84 |
85 | n_features = X.shape[0]
86 | y_pred_a = np.zeros(n_features)
87 | y_pred_b = np.zeros(n_features)
88 | y_offset = np.arange(n_features).astype(np.double)
89 |
90 | # one epoch with offset
91 | _cd_linear_epoch(w_a, X_ds, y, y_pred_a + y_offset, X_col_norm_sq, alpha,
92 | loss)
93 |
94 | # one epoch with shifted target
95 | _cd_linear_epoch(w_b, X_ds, y - y_offset, y_pred_b, X_col_norm_sq, alpha,
96 | loss)
97 |
98 | assert_array_almost_equal(w_a, w_b)
99 |
100 |
101 | def test_cd_linear_trivial():
102 | # trivial example that failed due to gh#4
103 | loss = Squared()
104 | alpha = 1e-5
105 | n_features = 100
106 | x = np.zeros((1, n_features))
107 | x[0, 1] = 1
108 | y = np.ones(1)
109 | cb = Callback(x, y, alpha)
110 | w = _fit_linear(x, y, alpha, n_iter=20, loss=loss, callback=cb)
111 |
112 | assert_all_finite(w)
113 | assert_all_finite(cb.losses_)
--------------------------------------------------------------------------------
/polylearn/tests/test_common.py:
--------------------------------------------------------------------------------
1 | from nose import SkipTest
2 | from nose.tools import assert_raises, assert_greater
3 | from nose.tools import assert_equal
4 | import numpy as np
5 | from numpy.testing import assert_array_almost_equal
6 | from scipy.sparse import csc_matrix
7 |
8 | from polylearn import (PolynomialNetworkClassifier, PolynomialNetworkRegressor,
9 | FactorizationMachineClassifier,
10 | FactorizationMachineRegressor)
11 |
12 |
13 | def test_check_estimator():
14 | # TODO: classifiers that provide predict_proba but are not multiclass fail
15 | # No trivial way to use OneVsRestClassifier even if it actually works.
16 |
17 | try:
18 | from sklearn.utils.estimator_checks import check_estimator
19 | except ImportError:
20 | raise SkipTest('Common scikit-learn tests not available. '
21 | 'You must be running an older version of scikit-learn.')
22 | yield check_estimator, PolynomialNetworkRegressor
23 | # FM Regressor fails because 5 iter is not enough :(
24 | # yield check_estimator, FactorizationMachineRegressor
25 |
26 |
27 | X = np.array([[-10, -10], [-10, 10], [10, -10], [10, 10]])
28 | y = np.array(['true', 'false', 'false', 'true'])
29 |
30 |
31 | def check_classify_xor(Clf):
32 | """Tests that the factorization machine can solve XOR"""
33 | clf = Clf(tol=1e-2, fit_lower=None, random_state=0)
34 |
35 | # temporary workaround until fit_linear is implemented
36 | try:
37 | clf.set_params(fit_linear=False)
38 | except ValueError:
39 | pass
40 |
41 | assert_equal(clf.fit(X, y).score(X, y), 1.0)
42 |
43 |
44 | def test_classify_xor():
45 | yield check_classify_xor, PolynomialNetworkClassifier
46 | yield check_classify_xor, FactorizationMachineClassifier
47 |
48 |
49 | def check_predict_proba(Clf):
50 | clf = Clf(loss='logistic', tol=1e-2, random_state=0).fit(X, y)
51 | y_proba = clf.predict_proba(X)
52 | assert_greater(y_proba[0], y_proba[1])
53 | assert_greater(y_proba[3], y_proba[2])
54 |
55 |
56 | def test_predict_proba():
57 | yield check_predict_proba, FactorizationMachineClassifier
58 | yield check_predict_proba, PolynomialNetworkClassifier
59 |
60 |
61 | def check_predict_proba_raises(Clf):
62 | """Test that predict_proba doesn't work with hinge loss"""
63 | pp = Clf(loss='squared_hinge', random_state=0).predict_proba
64 | assert_raises(ValueError, pp, X)
65 |
66 |
67 | def test_predict_proba_raises():
68 | yield check_predict_proba_raises, FactorizationMachineClassifier
69 | yield check_predict_proba_raises, PolynomialNetworkClassifier
70 |
71 |
72 | def check_loss_raises(Clf):
73 | """Test error on unsupported loss"""
74 | clf = Clf(loss='hinge', random_state=0)
75 | assert_raises(ValueError, clf.fit, X, y)
76 |
77 |
78 | def test_loss_raises():
79 | yield check_loss_raises, FactorizationMachineClassifier
80 | yield check_loss_raises, PolynomialNetworkClassifier
81 |
82 |
83 | def check_clf_multiclass_error(Clf):
84 | """Test that classifier raises TypeError on multiclass/multilabel y"""
85 | y_ = np.column_stack([y, y])
86 | clf = Clf(random_state=0)
87 | assert_raises(TypeError, clf.fit, X, y_)
88 |
89 |
90 | def test_clf_multiclass_error():
91 | yield check_clf_multiclass_error, FactorizationMachineClassifier
92 | yield check_clf_multiclass_error, PolynomialNetworkClassifier
93 |
94 |
95 | def check_clf_float_error(Clf):
96 | """Test that classifier raises TypeError on multiclass/multilabel y"""
97 | y_ = [0.1, 0.2, 0.3, 0.4]
98 | clf = Clf(random_state=0)
99 | assert_raises(TypeError, clf.fit, X, y_)
100 |
101 |
102 | def test_clf_float_error():
103 | yield check_clf_float_error, FactorizationMachineClassifier
104 | yield check_clf_float_error, PolynomialNetworkClassifier
105 |
106 |
107 | def check_not_fitted(Est):
108 | est = Est()
109 | assert_raises(ValueError, est.predict, X)
110 |
111 |
112 | def test_not_fitted():
113 | yield check_not_fitted, FactorizationMachineClassifier
114 | yield check_not_fitted, PolynomialNetworkClassifier
115 | yield check_not_fitted, FactorizationMachineRegressor
116 | yield check_not_fitted, PolynomialNetworkRegressor
117 |
118 |
119 | def test_augment():
120 | # The following linear separable dataset cannot be modeled with just an FM
121 | X_evil = np.array([[-1, -1], [1, 1]])
122 | y_evil = np.array([-1, 1])
123 | clf = FactorizationMachineClassifier(fit_linear=False, fit_lower=None,
124 | random_state=0)
125 | clf.fit(X_evil, y_evil)
126 | assert_equal(0.5, clf.score(X_evil, y_evil))
127 |
128 | # However, by adding a dummy feature (a column of all ones), the linear
129 | # effect can be captured.
130 | clf = FactorizationMachineClassifier(fit_linear=False, fit_lower='augment',
131 | random_state=0)
132 | clf.fit(X_evil, y_evil)
133 | assert_equal(1.0, clf.score(X_evil, y_evil))
134 |
135 |
136 | def check_sparse(Clf):
137 | X_sp = csc_matrix(X)
138 | # simple y that works for both clf and regression
139 | y_simple = [0, 1, 0, 1]
140 | clf = Clf(tol=1e-2, random_state=0)
141 | assert_array_almost_equal(clf.fit(X, y_simple).predict(X),
142 | clf.fit(X_sp, y_simple).predict(X_sp))
143 |
144 |
145 | def test_sparse():
146 | yield check_sparse, FactorizationMachineClassifier
147 | yield check_sparse, PolynomialNetworkClassifier
148 | yield check_sparse, FactorizationMachineRegressor
149 | yield check_sparse, PolynomialNetworkRegressor
150 |
--------------------------------------------------------------------------------
/polylearn/tests/test_factorization_machine.py:
--------------------------------------------------------------------------------
1 | # Author: Vlad Niculae
2 | # License: Simplified BSD
3 |
4 | import warnings
5 |
6 | from nose.tools import assert_less_equal, assert_equal
7 |
8 | import numpy as np
9 | from numpy.testing import assert_array_almost_equal
10 |
11 | from sklearn.metrics import mean_squared_error
12 | from sklearn.utils.testing import assert_warns_message
13 |
14 | from polylearn.kernels import _poly_predict
15 | from polylearn import FactorizationMachineRegressor
16 | from polylearn import FactorizationMachineClassifier
17 |
18 |
19 | def cd_direct_slow(X, y, lams=None, degree=2, n_components=5, beta=1.,
20 | n_iter=10, tol=1e-5, verbose=False, random_state=None):
21 | from sklearn.utils import check_random_state
22 | from polylearn.kernels import anova_kernel
23 |
24 | n_samples, n_features = X.shape
25 |
26 | rng = check_random_state(random_state)
27 | P = 0.01 * rng.randn(n_components, n_features)
28 | if lams is None:
29 | lams = np.ones(n_components)
30 |
31 | K = anova_kernel(X, P, degree=degree)
32 | pred = np.dot(lams, K.T)
33 |
34 | mu = 1 # squared loss
35 | converged = False
36 |
37 | for i in range(n_iter):
38 | sum_viol = 0
39 | for s in range(n_components):
40 | ps = P[s]
41 | for j in range(n_features):
42 |
43 | # trivial approach:
44 | # multilinearity allows us to isolate the term with ps_j * x_j
45 | x = X[:, j]
46 | notj_mask = np.arange(n_features) != j
47 | X_notj = X[:, notj_mask]
48 | ps_notj = ps[notj_mask]
49 |
50 | if degree == 2:
51 | grad_y = lams[s] * x * np.dot(X_notj, ps_notj)
52 | elif degree == 3:
53 | grad_y = lams[s] * x * anova_kernel(np.atleast_2d(ps_notj),
54 | X_notj, degree=2)
55 | else:
56 | raise NotImplementedError("Degree > 3 not supported.")
57 |
58 | l1_reg = 2 * beta * np.abs(lams[s])
59 | inv_step_size = mu * (grad_y ** 2).sum() + l1_reg
60 |
61 | dloss = pred - y # squared loss
62 | step = (dloss * grad_y).sum() + l1_reg * ps[j]
63 | step /= inv_step_size
64 |
65 | P[s, j] -= step
66 | sum_viol += np.abs(step)
67 |
68 | # stupidly recompute all predictions. No rush yet.
69 | K = anova_kernel(X, P, degree=degree)
70 | pred = np.dot(lams, K.T)
71 |
72 | reg_obj = beta * np.sum((P ** 2).sum(axis=1) * np.abs(lams))
73 |
74 | if verbose:
75 | print("Epoch", i, "violations", sum_viol, "obj",
76 | 0.5 * ((pred - y) ** 2).sum() + reg_obj)
77 |
78 | if sum_viol < tol:
79 | converged = True
80 | break
81 |
82 | if not converged:
83 | warnings.warn("Objective did not converge. Increase max_iter.")
84 |
85 | return P
86 |
87 |
88 | n_components = 5
89 | n_features = 4
90 | n_samples = 20
91 |
92 | rng = np.random.RandomState(1)
93 |
94 | X = rng.randn(n_samples, n_features)
95 | P = rng.randn(n_components, n_features)
96 |
97 | lams = rng.randn(n_components)
98 |
99 |
100 | def test_augment():
101 | """Test that augmenting the data increases the dimension as expected"""
102 | y = _poly_predict(X, P, lams, kernel="anova", degree=3)
103 | fm = FactorizationMachineRegressor(degree=3, fit_lower='augment',
104 | fit_linear=True, tol=0.1)
105 | fm.fit(X, y)
106 | assert_equal(n_features + 1, fm.P_.shape[2],
107 | msg="Augmenting is wrong with explicit linear term.")
108 |
109 | fm.set_params(fit_linear=False)
110 | fm.fit(X, y)
111 | assert_equal(n_features + 2, fm.P_.shape[2],
112 | msg="Augmenting is wrong with augmented linear term.")
113 |
114 |
115 | def check_fit(degree):
116 | y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
117 |
118 | est = FactorizationMachineRegressor(degree=degree, n_components=5,
119 | fit_linear=False, fit_lower=None,
120 | max_iter=15000, beta=1e-6, tol=1e-3,
121 | random_state=0)
122 | est.fit(X, y)
123 | y_pred = est.predict(X)
124 | err = mean_squared_error(y, y_pred)
125 |
126 | assert_less_equal(
127 | err,
128 | 1e-6,
129 | msg="Error {} too big for degree {}.".format(err, degree))
130 |
131 |
132 | def test_fit():
133 | yield check_fit, 2
134 | yield check_fit, 3
135 |
136 |
137 | def check_improve(degree):
138 | y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
139 |
140 | est = FactorizationMachineRegressor(degree=degree, n_components=5,
141 | fit_lower=None, fit_linear=False,
142 | beta=0.0001, max_iter=5, tol=0,
143 | random_state=0)
144 | with warnings.catch_warnings():
145 | warnings.simplefilter("ignore")
146 | y_pred_5 = est.fit(X, y).predict(X)
147 | est.set_params(max_iter=10)
148 | y_pred_10 = est.fit(X, y).predict(X)
149 |
150 | assert_less_equal(mean_squared_error(y, y_pred_10),
151 | mean_squared_error(y, y_pred_5),
152 | msg="More iterations do not improve fit.")
153 |
154 |
155 | def test_improve():
156 | yield check_improve, 2
157 | yield check_improve, 3
158 |
159 |
160 | def check_overfit(degree):
161 | noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
162 | noisy_y += 5. * rng.randn(noisy_y.shape[0])
163 | X_train, X_test = X[:10], X[10:]
164 | y_train, y_test = noisy_y[:10], noisy_y[10:]
165 |
166 | # weak regularization, should overfit
167 | est = FactorizationMachineRegressor(degree=degree, n_components=5,
168 | fit_linear=False, fit_lower=None,
169 | beta=1e-4, tol=0.01, random_state=0)
170 | y_train_pred_weak = est.fit(X_train, y_train).predict(X_train)
171 | y_test_pred_weak = est.predict(X_test)
172 |
173 | est.set_params(beta=10) # high value of beta -> strong regularization
174 | y_train_pred_strong = est.fit(X_train, y_train).predict(X_train)
175 | y_test_pred_strong = est.predict(X_test)
176 |
177 | assert_less_equal(mean_squared_error(y_train, y_train_pred_weak),
178 | mean_squared_error(y_train, y_train_pred_strong),
179 | msg="Training error does not get worse with regul.")
180 |
181 | assert_less_equal(mean_squared_error(y_test, y_test_pred_strong),
182 | mean_squared_error(y_test, y_test_pred_weak),
183 | msg="Test error does not get better with regul.")
184 |
185 |
186 | def test_overfit():
187 | yield check_overfit, 2
188 | yield check_overfit, 3
189 |
190 |
191 | def test_convergence_warning():
192 | y = _poly_predict(X, P, lams, kernel="anova", degree=3)
193 |
194 | est = FactorizationMachineRegressor(degree=3, beta=1e-8, max_iter=1,
195 | random_state=0)
196 | assert_warns_message(UserWarning, "converge", est.fit, X, y)
197 |
198 |
199 | def test_random_starts():
200 | noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2)
201 | noisy_y += 5. * rng.randn(noisy_y.shape[0])
202 | X_train, X_test = X[:10], X[10:]
203 | y_train, y_test = noisy_y[:10], noisy_y[10:]
204 |
205 | scores = []
206 | # init_lambdas='ones' is important to reduce variance here
207 | reg = FactorizationMachineRegressor(degree=2, n_components=n_components,
208 | beta=5, fit_lower=None,
209 | fit_linear=False, max_iter=2000,
210 | init_lambdas='ones', tol=0.001)
211 | for k in range(10):
212 | reg.set_params(random_state=k)
213 | y_pred = reg.fit(X_train, y_train).predict(X_test)
214 | scores.append(mean_squared_error(y_test, y_pred))
215 |
216 | assert_less_equal(np.std(scores), 0.001)
217 |
218 |
219 | def check_same_as_slow(degree):
220 | y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
221 |
222 | reg = FactorizationMachineRegressor(degree=degree, n_components=5,
223 | fit_lower=None, fit_linear=False,
224 | beta=1, warm_start=False, tol=1e-3,
225 | max_iter=5, random_state=0)
226 |
227 | with warnings.catch_warnings():
228 | warnings.simplefilter('ignore')
229 | reg.fit(X, y)
230 |
231 | P_fit_slow = cd_direct_slow(X, y, lams=reg.lams_, degree=degree,
232 | n_components=5, beta=1, n_iter=5,
233 | tol=1e-3, random_state=0)
234 |
235 | assert_array_almost_equal(reg.P_[0, :, :], P_fit_slow, decimal=4)
236 |
237 |
238 | def test_same_as_slow():
239 | yield check_same_as_slow, 2
240 | yield check_same_as_slow, 3
241 |
242 |
243 | def check_classification_losses(loss, degree):
244 | y = np.sign(_poly_predict(X, P, lams, kernel="anova", degree=degree))
245 | clf = FactorizationMachineClassifier(degree=degree, loss=loss, beta=1e-3,
246 | fit_lower=None, fit_linear=False,
247 | tol=1e-3, random_state=0)
248 | clf.fit(X, y)
249 | assert_equal(1.0, clf.score(X, y))
250 |
251 |
252 | def test_classification_losses():
253 | for loss in ('squared_hinge', 'logistic'):
254 | for degree in (2, 3):
255 | yield check_classification_losses, loss, degree
256 |
257 |
258 | def check_warm_start(degree):
259 | y = _poly_predict(X, P, lams, kernel="anova", degree=degree)
260 | # Result should be the same if:
261 | # (a) running 10 iterations
262 | clf_10 = FactorizationMachineRegressor(degree=degree, n_components=5,
263 | fit_lower=None, fit_linear=False,
264 | max_iter=10, warm_start=False,
265 | random_state=0)
266 | with warnings.catch_warnings():
267 | warnings.simplefilter("ignore")
268 | clf_10.fit(X, y)
269 |
270 | # (b) running 5 iterations and 5 more
271 | clf_5_5 = FactorizationMachineRegressor(degree=degree, n_components=5,
272 | fit_lower=None, fit_linear=False,
273 | max_iter=5, warm_start=True,
274 | random_state=0)
275 | with warnings.catch_warnings():
276 | warnings.simplefilter("ignore")
277 | clf_5_5.fit(X, y)
278 | P_fit = clf_5_5.P_.copy()
279 | lams_fit = clf_5_5.lams_.copy()
280 | clf_5_5.fit(X, y)
281 |
282 | # (c) running 5 iterations when starting from previous point.
283 | clf_5 = FactorizationMachineRegressor(degree=degree, n_components=5,
284 | fit_lower=None, fit_linear=False,
285 | max_iter=5, warm_start=True,
286 | random_state=0)
287 | clf_5.P_ = P_fit
288 | clf_5.lams_ = lams_fit
289 | with warnings.catch_warnings():
290 | warnings.simplefilter("ignore")
291 | clf_5.fit(X, y)
292 |
293 | assert_array_almost_equal(clf_10.P_, clf_5_5.P_)
294 | assert_array_almost_equal(clf_10.P_, clf_5.P_)
295 |
296 | # Prediction results should also be the same if:
297 | # (note: could not get this test to work for the exact P_.)
298 |
299 | noisy_y = _poly_predict(X, P, lams, kernel="anova", degree=2)
300 | noisy_y += rng.randn(noisy_y.shape[0])
301 | X_train, X_test = X[:10], X[10:]
302 | y_train, y_test = noisy_y[:10], noisy_y[10:]
303 |
304 | beta_low = 0.5
305 | beta = 0.1
306 | beta_hi = 1
307 | ref = FactorizationMachineRegressor(degree=degree, n_components=5,
308 | fit_linear=False, fit_lower=None,
309 | beta=beta, max_iter=20000,
310 | random_state=0)
311 | ref.fit(X_train, y_train)
312 | y_pred_ref = ref.predict(X_test)
313 |
314 | # (a) starting from lower beta, increasing and refitting
315 | from_low = FactorizationMachineRegressor(degree=degree, n_components=5,
316 | fit_lower=None, fit_linear=False,
317 | beta=beta_low, warm_start=True,
318 | random_state=0)
319 | from_low.fit(X_train, y_train)
320 | from_low.set_params(beta=beta)
321 | from_low.fit(X_train, y_train)
322 | y_pred_low = from_low.predict(X_test)
323 |
324 | # (b) starting from higher beta, decreasing and refitting
325 | from_hi = FactorizationMachineRegressor(degree=degree, n_components=5,
326 | fit_lower=None, fit_linear=False,
327 | beta=beta_hi, warm_start=True,
328 | random_state=0)
329 | from_hi.fit(X_train, y_train)
330 | from_hi.set_params(beta=beta)
331 | from_hi.fit(X_train, y_train)
332 | y_pred_hi = from_hi.predict(X_test)
333 |
334 | assert_array_almost_equal(y_pred_low, y_pred_ref, decimal=4)
335 | assert_array_almost_equal(y_pred_hi, y_pred_ref, decimal=4)
336 |
337 |
338 | def test_warm_start():
339 | yield check_warm_start, 2
340 | yield check_warm_start, 3
341 |
--------------------------------------------------------------------------------
/polylearn/tests/test_kernels.py:
--------------------------------------------------------------------------------
1 | # Author: Vlad Niculae
2 | # License: Simplified BSD
3 |
4 | from itertools import product, combinations
5 | from functools import reduce
6 | from nose.tools import assert_true, assert_raises
7 |
8 | import numpy as np
9 | from numpy.testing import assert_array_almost_equal
10 | from scipy import sparse as sp
11 |
12 | from polylearn.kernels import homogeneous_kernel, anova_kernel, safe_power
13 | from polylearn.kernels import _poly_predict
14 |
15 |
16 | def _product(x):
17 | return reduce(lambda a, b: a * b, x, 1)
18 |
19 |
20 | def _power_iter(x, degree):
21 | return product(*([x] * degree))
22 |
23 |
24 | def dumb_homogeneous(x, p, degree=2):
25 | return sum(_product(x[k] * p[k] for k in ix)
26 | for ix in _power_iter(range(len(x)), degree))
27 |
28 |
29 | def dumb_anova(x, p, degree=2):
30 | return sum(_product(x[k] * p[k] for k in ix)
31 | for ix in combinations(range(len(x)), degree))
32 |
33 |
34 | n_samples = 5
35 | n_bases = 4
36 | n_features = 7
37 | rng = np.random.RandomState(0)
38 | X = rng.randn(n_samples, n_features)
39 | P = rng.randn(n_bases, n_features)
40 | lams = np.array([2, 1, -1, 3])
41 |
42 |
43 | def test_homogeneous():
44 | for m in range(1, 5):
45 | expected = np.zeros((n_samples, n_bases))
46 | for i in range(n_samples):
47 | for j in range(n_bases):
48 | expected[i, j] = dumb_homogeneous(X[i], P[j], degree=m)
49 | got = homogeneous_kernel(X, P, degree=m)
50 | assert_array_almost_equal(got, expected, err_msg=(
51 | "Homogeneous kernel incorrect for degree {}".format(m)))
52 |
53 |
54 | def test_anova():
55 | for m in (2, 3):
56 | expected = np.zeros((n_samples, n_bases))
57 | for i in range(n_samples):
58 | for j in range(n_bases):
59 | expected[i, j] = dumb_anova(X[i], P[j], degree=m)
60 | got = anova_kernel(X, P, degree=m)
61 | assert_array_almost_equal(got, expected, err_msg=(
62 | "ANOVA kernel incorrect for degree {}".format(m)))
63 |
64 |
65 | def test_anova_ignore_diag_equivalence():
66 | # predicting using anova kernel
67 | K = 2 * anova_kernel(X, P, degree=2)
68 | y_pred = np.dot(K, lams)
69 |
70 | # explicit
71 | Z = np.dot(P.T, (lams[:, np.newaxis] * P))
72 | y_manual = np.zeros_like(y_pred)
73 | for i in range(n_samples):
74 | x = X[i].ravel()
75 | xx = np.outer(x, x) - np.diag(x ** 2)
76 | y_manual[i] = np.trace(np.dot(Z.T, xx))
77 |
78 | assert_array_almost_equal(y_pred, y_manual)
79 |
80 |
81 | def test_safe_power_sparse():
82 | # TODO maybe move to a util module or something
83 | # scikit-learn has safe_sqr but not general power
84 |
85 | X_quad = X ** 4
86 | # assert X stays sparse
87 | X_sp = sp.csr_matrix(X)
88 | for sp_format in ('csr', 'csc', 'coo'): # not working with lil for now
89 | X_sp = X_sp.asformat(sp_format)
90 | X_sp_quad = safe_power(X_sp, degree=4)
91 | assert_true(sp.issparse(X_sp_quad),
92 | msg="safe_power breaks {} sparsity".format(sp_format))
93 | assert_array_almost_equal(X_quad,
94 | X_sp_quad.A,
95 | err_msg="safe_power differs for {} and "
96 | "dense".format(sp_format))
97 |
98 |
99 | def test_anova_sparse():
100 | X_sp = sp.csr_matrix(X)
101 | for m in (2, 3):
102 | dense = anova_kernel(X, P, degree=m)
103 | sparse = anova_kernel(X_sp, P, degree=m)
104 | assert_array_almost_equal(dense, sparse, err_msg=(
105 | "ANOVA kernel sparse != dense for degree {}".format(m)))
106 |
107 |
108 | def test_predict():
109 | # predict with homogeneous kernel
110 | y_pred_poly = _poly_predict(X, P, lams, kernel='poly', degree=3)
111 | K = homogeneous_kernel(X, P, degree=3)
112 | y_pred = np.dot(K, lams)
113 | assert_array_almost_equal(y_pred_poly, y_pred,
114 | err_msg="Homogeneous prediction incorrect.")
115 |
116 | # predict with homogeneous kernel
117 | y_pred_poly = _poly_predict(X, P, lams, kernel='anova', degree=3)
118 | K = anova_kernel(X, P, degree=3)
119 | y_pred = np.dot(K, lams)
120 | assert_array_almost_equal(y_pred_poly, y_pred,
121 | err_msg="ANOVA prediction incorrect.")
122 |
123 |
124 | def test_unsupported_degree():
125 | assert_raises(NotImplementedError, anova_kernel, X, P, degree=4)
126 |
127 |
128 | def test_unsupported_kernel():
129 | assert_raises(ValueError, _poly_predict, X, P, lams, kernel='rbf')
130 |
--------------------------------------------------------------------------------
/polylearn/tests/test_polynomial_network.py:
--------------------------------------------------------------------------------
1 | # Author: Vlad Niculae
2 | # License: Simplified BSD
3 |
4 | import warnings
5 |
6 | from nose.tools import assert_less_equal, assert_equal
7 |
8 | import numpy as np
9 | from numpy.testing import assert_array_almost_equal
10 | from sklearn.metrics import mean_squared_error
11 | from sklearn.utils.testing import assert_warns_message
12 | from sklearn.utils.extmath import fast_dot
13 |
14 | from lightning.impl.dataset_fast import get_dataset
15 |
16 | from polylearn import PolynomialNetworkClassifier, PolynomialNetworkRegressor
17 | from polylearn.polynomial_network import _lifted_predict as _ds_lifted_predict
18 |
19 |
20 | # to shave off some test seconds, since the data is tiny, we can use this.
21 | def _lifted_predict(U, X):
22 | return np.product(fast_dot(U, X.T), axis=0).sum(axis=0)
23 |
24 | max_degree = 5
25 | n_components = 3
26 | n_features = 7
27 | n_samples = 10
28 |
29 | rng = np.random.RandomState(1)
30 | U = rng.randn(max_degree, n_components, n_features)
31 | X = rng.randn(n_samples, n_features)
32 |
33 |
34 | def cd_lifted_slow(X, y, degree=2, n_components=5, beta=1., n_iter=10000,
35 | tol=1e-5, verbose=False, random_state=None):
36 | from sklearn.utils import check_random_state
37 |
38 | n_samples, n_features = X.shape
39 | rng = check_random_state(random_state)
40 | U = 0.01 * rng.randn(degree, n_components, n_features)
41 |
42 | # homogeneous kernel
43 | pred = np.product(np.dot(U, X.T), axis=0).sum(axis=0)
44 |
45 | mu = 1 # squared loss
46 | converged = False
47 |
48 | for i in range(n_iter):
49 | sum_viol = 0
50 | for t in range(degree):
51 | deg_idx = np.zeros(degree, dtype=np.bool)
52 | deg_idx[t] = True
53 | for s in range(n_components):
54 | xi = np.product(np.dot(U[~deg_idx, s, :], X.T), axis=0)
55 | for j in range(n_features):
56 | x = X[:, j]
57 |
58 | inv_step_size = mu * (xi ** 2 * x ** 2).sum()
59 | inv_step_size += beta
60 |
61 | dloss = pred - y # squared loss
62 | step = (xi * x * dloss).sum()
63 | step += beta * U[t, s, j]
64 | step /= inv_step_size
65 |
66 | U[t, s, j] -= step
67 | sum_viol += np.abs(step)
68 |
69 | # dumb synchronize
70 | pred = np.product(np.dot(U, X.T), axis=0).sum(axis=0)
71 | xi = np.product(np.dot(U[~deg_idx, s, :], X.T), axis=0)
72 | nrm = np.sum(U.ravel() ** 2)
73 | if verbose:
74 | print("Epoch", i, "violations", sum_viol, "loss",
75 | 0.5 * (np.sum((y - pred) ** 2) + beta * nrm))
76 |
77 | if sum_viol < tol:
78 | converged = True
79 | break
80 |
81 | if not converged:
82 | warnings.warn("Objective did not converge. Increase max_iter.")
83 |
84 | return U
85 |
86 |
87 | def test_lifted_predict():
88 | y_ref = _lifted_predict(U, X)
89 | ds = get_dataset(X, order='fortran')
90 | y = _ds_lifted_predict(U, ds)
91 | assert_array_almost_equal(y_ref, y)
92 |
93 |
94 | def check_fit(degree):
95 | y = _lifted_predict(U[:degree], X)
96 |
97 | est = PolynomialNetworkRegressor(degree=degree, n_components=n_components,
98 | max_iter=50000, beta=0.001, tol=1e-2,
99 | random_state=0)
100 | y_pred = est.fit(X, y).predict(X)
101 | assert_less_equal(mean_squared_error(y, y_pred), 1e-4,
102 | msg="Cannot learn degree {} function.".format(degree))
103 |
104 |
105 | def test_fit():
106 | for degree in range(2, max_degree + 1):
107 | yield check_fit, degree
108 |
109 |
110 | def check_improve(degree):
111 | y = _lifted_predict(U[:degree], X)
112 |
113 | common_settings = dict(degree=degree, n_components=n_components,
114 | beta=1e-10, tol=0, random_state=0)
115 |
116 | est_5 = PolynomialNetworkRegressor(max_iter=5, **common_settings)
117 | est_10 = PolynomialNetworkRegressor(max_iter=10, **common_settings)
118 |
119 | with warnings.catch_warnings():
120 | warnings.simplefilter("ignore")
121 | est_5.fit(X, y)
122 | est_10.fit(X, y)
123 |
124 | y_pred_5 = est_5.predict(X)
125 | y_pred_10 = est_10.predict(X)
126 |
127 | assert_less_equal(mean_squared_error(y, y_pred_10),
128 | mean_squared_error(y, y_pred_5),
129 | msg="More iterations do not improve fit.")
130 |
131 |
132 | def test_improve():
133 | for degree in range(2, max_degree + 1):
134 | yield check_improve, degree
135 |
136 |
137 | def test_convergence_warning():
138 | degree = 4
139 | y = _lifted_predict(U[:degree], X)
140 |
141 | est = PolynomialNetworkRegressor(degree=degree, n_components=n_components,
142 | beta=1e-10, max_iter=1, tol=1e-5,
143 | random_state=0)
144 | assert_warns_message(UserWarning, "converge", est.fit, X, y)
145 |
146 |
147 | def test_random_starts():
148 | # not as strong a test as the direct case!
149 | # using training error here, and a higher threshold.
150 | # We observe the lifted solver reaches rather diff. solutions.
151 | degree = 3
152 | noisy_y = _lifted_predict(U[:degree], X)
153 | noisy_y += 5. * rng.randn(noisy_y.shape[0])
154 |
155 | common_settings = dict(degree=degree, n_components=n_components,
156 | beta=0.01, tol=0.01)
157 | scores = []
158 | for k in range(5):
159 | est = PolynomialNetworkRegressor(random_state=k, **common_settings)
160 | y_pred = est.fit(X, noisy_y).predict(X)
161 | scores.append(mean_squared_error(noisy_y, y_pred))
162 |
163 | assert_less_equal(np.std(scores), 1e-4)
164 |
165 |
166 | def check_same_as_slow(degree):
167 | y = _lifted_predict(U[:degree], X)
168 | reg = PolynomialNetworkRegressor(degree=degree, n_components=n_components,
169 | fit_lower=None, beta=1, max_iter=5,
170 | random_state=0)
171 |
172 | with warnings.catch_warnings():
173 | warnings.simplefilter("ignore")
174 | reg.fit(X, y)
175 |
176 | U_fit_slow = cd_lifted_slow(X, y, degree=degree,
177 | n_components=n_components, beta=1,
178 | random_state=0, n_iter=5)
179 |
180 | assert_array_almost_equal(reg.U_, U_fit_slow)
181 |
182 |
183 | def test_same_as_slow():
184 | for degree in range(2, max_degree + 1):
185 | yield check_same_as_slow, degree
186 |
187 |
188 | def check_classification_losses(loss, degree):
189 | y = np.sign(_lifted_predict(U[:degree], X))
190 |
191 | clf = PolynomialNetworkClassifier(degree=degree, n_components=n_components,
192 | loss=loss, beta=1e-4, tol=1e-2,
193 | random_state=0)
194 | clf.fit(X, y)
195 | assert_equal(1.0, clf.score(X, y))
196 |
197 |
198 | def test_classification_losses():
199 | for loss in ('squared_hinge', 'logistic'):
200 | for degree in range(2, max_degree + 1):
201 | yield check_classification_losses, loss, degree
202 |
203 |
204 | def check_warm_start(degree):
205 | y = np.sign(_lifted_predict(U[:degree], X))
206 | # Result should be the same if:
207 | # (a) running 10 iterations
208 |
209 | common_settings = dict(fit_lower=None, degree=degree, n_components=2,
210 | random_state=0)
211 | clf_10 = PolynomialNetworkRegressor(max_iter=10, warm_start=False,
212 | **common_settings)
213 | with warnings.catch_warnings():
214 | warnings.simplefilter("ignore")
215 | clf_10.fit(X, y)
216 |
217 | # (b) running 5 iterations and 5 more
218 | clf_5_5 = PolynomialNetworkRegressor(max_iter=5, warm_start=True,
219 | **common_settings)
220 | with warnings.catch_warnings():
221 | warnings.simplefilter("ignore")
222 | clf_5_5.fit(X, y)
223 | U_fit = clf_5_5.U_.copy()
224 | clf_5_5.fit(X, y)
225 |
226 | # (c) running 5 iterations when starting from previous point.
227 | clf_5 = PolynomialNetworkRegressor(max_iter=5, warm_start=True,
228 | **common_settings)
229 | clf_5.U_ = U_fit
230 | with warnings.catch_warnings():
231 | warnings.simplefilter("ignore")
232 | clf_5.fit(X, y)
233 |
234 | assert_array_almost_equal(clf_10.U_, clf_5_5.U_)
235 | assert_array_almost_equal(clf_10.U_, clf_5.U_)
236 |
237 | # Prediction results should also be the same if:
238 | # (note: could not get this test to work for the exact P_.)
239 | # This test is very flimsy!
240 |
241 | y = np.sign(_lifted_predict(U[:degree], X))
242 |
243 | beta_low = 0.51
244 | beta = 0.5
245 | beta_hi = 0.49
246 |
247 | common_settings = dict(degree=degree, n_components=n_components,
248 | tol=1e-3, random_state=0)
249 | ref = PolynomialNetworkRegressor(beta=beta, **common_settings)
250 | ref.fit(X, y)
251 | y_pred_ref = ref.predict(X)
252 |
253 | # # (a) starting from lower beta, increasing and refitting
254 | from_low = PolynomialNetworkRegressor(beta=beta_low, warm_start=True,
255 | **common_settings)
256 | from_low.fit(X, y)
257 | from_low.set_params(beta=beta)
258 | from_low.fit(X, y)
259 | y_pred_low = from_low.predict(X)
260 |
261 | # (b) starting from higher beta, decreasing and refitting
262 | from_hi = PolynomialNetworkRegressor(beta=beta_hi, warm_start=True,
263 | **common_settings)
264 | from_hi.fit(X, y)
265 | from_hi.set_params(beta=beta)
266 | from_hi.fit(X, y)
267 | y_pred_hi = from_hi.predict(X)
268 |
269 | decimal = 3
270 | assert_array_almost_equal(y_pred_low, y_pred_ref, decimal=decimal)
271 | assert_array_almost_equal(y_pred_hi, y_pred_ref, decimal=decimal)
272 |
273 |
274 | def test_warm_start():
275 | for degree in range(2, max_degree + 1):
276 | yield check_warm_start, degree
277 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.rst
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os.path
3 | import sys
4 | import setuptools
5 | from numpy.distutils.core import setup
6 |
7 |
8 | try:
9 | import numpy
10 | except ImportError:
11 | print('numpy is required during installation')
12 | sys.exit(1)
13 |
14 |
15 | DISTNAME = 'polylearn'
16 | DESCRIPTION = ("Factorization machines and polynomial networks "
17 | "for classification and regression in Python.")
18 | LONG_DESCRIPTION = open('README.rst').read()
19 | MAINTAINER = 'Vlad Niculae'
20 | MAINTAINER_EMAIL = 'vlad@vene.ro'
21 | URL = 'https://contrib.scikit-learn.org/polylearn'
22 | LICENSE = 'Simplified BSD'
23 | DOWNLOAD_URL = 'https://github.com/scikit-learn-contrib/polylearn'
24 | VERSION = '0.1.dev0'
25 |
26 |
27 | def configuration(parent_package='', top_path=None):
28 | from numpy.distutils.misc_util import Configuration
29 |
30 | config = Configuration(None, parent_package, top_path)
31 |
32 | config.add_subpackage('polylearn')
33 |
34 | return config
35 |
36 |
37 | if __name__ == '__main__':
38 | old_path = os.getcwd()
39 | local_path = os.path.dirname(os.path.abspath(sys.argv[0]))
40 |
41 | os.chdir(local_path)
42 | sys.path.insert(0, local_path)
43 |
44 | setup(configuration=configuration,
45 | name=DISTNAME,
46 | maintainer=MAINTAINER,
47 | include_package_data=True,
48 | install_requires=[
49 | 'six',
50 | 'scikit-learn'
51 | ],
52 | maintainer_email=MAINTAINER_EMAIL,
53 | description=DESCRIPTION,
54 | license=LICENSE,
55 | url=URL,
56 | version=VERSION,
57 | download_url=DOWNLOAD_URL,
58 | long_description=LONG_DESCRIPTION,
59 | zip_safe=False, # the package can run out of an .egg file
60 | classifiers=[
61 | 'Intended Audience :: Science/Research',
62 | 'Intended Audience :: Developers', 'License :: OSI Approved',
63 | 'Programming Language :: C', 'Programming Language :: Python',
64 | 'Topic :: Software Development',
65 | 'Topic :: Scientific/Engineering',
66 | 'Operating System :: Microsoft :: Windows',
67 | 'Operating System :: POSIX', 'Operating System :: Unix',
68 | 'Operating System :: MacOS'
69 | ]
70 | )
--------------------------------------------------------------------------------