├── .gitignore
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── doc
    ├── Makefile
    ├── api.rst
    ├── conf.py
    ├── index.rst
    ├── intro.rst
    ├── make.bat
    ├── notebooks
    │   ├── pybroom-example-multi-datasets-minimize.ipynb
    │   ├── pybroom-example-multi-datasets-scipy-robust-fit.ipynb
    │   ├── pybroom-example-multi-datasets.ipynb
    │   └── pybroom-example.ipynb
    ├── rtd_requirements.txt
    └── whatsnew.rst
├── pybroom.py
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | *.pyc
3 | doc/_build
4 | build
5 | *.egg-info
6 | dist
7 | recipe
8 | doc/notebooks/_*
9 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Antonino Ingargiola and contributors.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE.txt
2 | include README.md
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pybroom
 2 | 
 3 | > *pybroom, the python's broom to tidy up messy fit results!*
 4 | 
 5 | **Pybroom** is a small python 3 library for converting fitting results
 6 | (curve fitting or other optimizations)
 7 | to [Pandas](http://pandas.pydata.org/)
 8 | [DataFrame](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe)
 9 | in tidy format
10 | [(Wickham 2014)](http://dx.doi.org/10.18637/jss.v059.i10).
11 | DataFrames in tidy format (or long-form) follow a simple rule:
12 | one "observation" per row and one "variable" per column.
13 | This simple structure makes it easy to process the data with clear and
14 | [well-understood idioms](http://tomaugspurger.github.io/modern-5-tidy.html)
15 | (for filtering, aggregation, etc.) and allows
16 | plot libraries to automatically generate complex plots in which many
17 | variables are compared. Plotting libraries supporting tidy DataFrames
18 | include [seaborn](https://web.stanford.edu/~mwaskom/software/seaborn/),
19 | recent versions of [matplotlib](http://matplotlib.org/),
20 | [bokeh](http://bokeh.pydata.org/) and
21 | [altair](https://github.com/ellisonbg/altair).
22 | pybroom development was inspired by the R library
23 | [broom](https://github.com/dgrtwo/broom).
24 | 
25 | Like the R library broom, *pybroom* provides 3 functions: `tidy`, `augment` and `glance`.
26 | 
27 | For details see the [documentation](http://pybroom.readthedocs.io/)
28 | which includes example notebooks (you can find the source notebooks in
29 | [docs/notebooks](docs/notebooks)).
30 | 
31 | Pybroom was started after watching this presentation by
32 | David Robinson (broom's author):
33 | 
34 | - [broom: Converting statistical models to tidy data frames](https://www.youtube.com/watch?v=eM3Ha0kTAz4).
35 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | 	$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  epub3      to make an epub3"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 	@echo "  dummy      to check syntax errors of document sources"
 51 | 
 52 | .PHONY: clean
 53 | clean:
 54 | 	rm -rf $(BUILDDIR)/*
 55 | 
 56 | .PHONY: html
 57 | html:
 58 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 61 | 
 62 | .PHONY: dirhtml
 63 | dirhtml:
 64 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 65 | 	@echo
 66 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 67 | 
 68 | .PHONY: singlehtml
 69 | singlehtml:
 70 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 71 | 	@echo
 72 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 73 | 
 74 | .PHONY: pickle
 75 | pickle:
 76 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 77 | 	@echo
 78 | 	@echo "Build finished; now you can process the pickle files."
 79 | 
 80 | .PHONY: json
 81 | json:
 82 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 83 | 	@echo
 84 | 	@echo "Build finished; now you can process the JSON files."
 85 | 
 86 | .PHONY: htmlhelp
 87 | htmlhelp:
 88 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 89 | 	@echo
 90 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 91 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 92 | 
 93 | .PHONY: qthelp
 94 | qthelp:
 95 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 96 | 	@echo
 97 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 98 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 99 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pybroom.qhcp"
100 | 	@echo "To view the help file:"
101 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pybroom.qhc"
102 | 
103 | .PHONY: applehelp
104 | applehelp:
105 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
106 | 	@echo
107 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
108 | 	@echo "N.B. You won't be able to view it unless you put it in" \
109 | 	      "~/Library/Documentation/Help or install it in your application" \
110 | 	      "bundle."
111 | 
112 | .PHONY: devhelp
113 | devhelp:
114 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
115 | 	@echo
116 | 	@echo "Build finished."
117 | 	@echo "To view the help file:"
118 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/pybroom"
119 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pybroom"
120 | 	@echo "# devhelp"
121 | 
122 | .PHONY: epub
123 | epub:
124 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
125 | 	@echo
126 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
127 | 
128 | .PHONY: epub3
129 | epub3:
130 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
131 | 	@echo
132 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
133 | 
134 | .PHONY: latex
135 | latex:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo
138 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
139 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
140 | 	      "(use \`make latexpdf' here to do that automatically)."
141 | 
142 | .PHONY: latexpdf
143 | latexpdf:
144 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
145 | 	@echo "Running LaTeX files through pdflatex..."
146 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
147 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
148 | 
149 | .PHONY: latexpdfja
150 | latexpdfja:
151 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
152 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
153 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
154 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
155 | 
156 | .PHONY: text
157 | text:
158 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
159 | 	@echo
160 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
161 | 
162 | .PHONY: man
163 | man:
164 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
165 | 	@echo
166 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
167 | 
168 | .PHONY: texinfo
169 | texinfo:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo
172 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
173 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
174 | 	      "(use \`make info' here to do that automatically)."
175 | 
176 | .PHONY: info
177 | info:
178 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
179 | 	@echo "Running Texinfo files through makeinfo..."
180 | 	make -C $(BUILDDIR)/texinfo info
181 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
182 | 
183 | .PHONY: gettext
184 | gettext:
185 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
186 | 	@echo
187 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
188 | 
189 | .PHONY: changes
190 | changes:
191 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
192 | 	@echo
193 | 	@echo "The overview file is in $(BUILDDIR)/changes."
194 | 
195 | .PHONY: linkcheck
196 | linkcheck:
197 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
198 | 	@echo
199 | 	@echo "Link check complete; look for any errors in the above output " \
200 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
201 | 
202 | .PHONY: doctest
203 | doctest:
204 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
205 | 	@echo "Testing of doctests in the sources finished, look at the " \
206 | 	      "results in $(BUILDDIR)/doctest/output.txt."
207 | 
208 | .PHONY: coverage
209 | coverage:
210 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
211 | 	@echo "Testing of coverage in the sources finished, look at the " \
212 | 	      "results in $(BUILDDIR)/coverage/python.txt."
213 | 
214 | .PHONY: xml
215 | xml:
216 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
217 | 	@echo
218 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
219 | 
220 | .PHONY: pseudoxml
221 | pseudoxml:
222 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
223 | 	@echo
224 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
225 | 
226 | .PHONY: dummy
227 | dummy:
228 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
229 | 	@echo
230 | 	@echo "Build finished. Dummy builder generates no files."
231 | 


--------------------------------------------------------------------------------
/doc/api.rst:
--------------------------------------------------------------------------------
 1 | Pybroom API Documentation
 2 | =========================
 3 | 
 4 | .. automodule :: pybroom
 5 | 
 6 | Main Functions
 7 | --------------
 8 | 
 9 | The 3 high-level functions :func:`glance`, :func:`tidy` and :func:`augment`
10 | allows tidying one or more fit results.
11 | These are pybroom's most generic functions, accepting all the
12 | the supported fit result objects, as well as a list/dict of such objects.
13 | See also the examples at the beginning of this page and the example notebooks.
14 | 
15 | .. autofunction :: glance
16 | 
17 | .. autofunction :: tidy
18 | 
19 | .. autofunction :: augment
20 | 
21 | 
22 | Dictionary conversions
23 | ----------------------
24 | 
25 | The two functions :func:`tidy_to_dict` and :func:`dict_to_tidy` provide
26 | the ability to convert a tidy DataFrame to and from a python dictionary.
27 | 
28 | .. autofunction :: tidy_to_dict
29 | 
30 | .. autofunction :: dict_to_tidy
31 | 
32 | 
33 | Specialized functions
34 | ---------------------
35 | 
36 | These are the specialized (i.e. low-level) functions, each converting one
37 | specific object to a tidy DataFrame.
38 | 
39 | .. autofunction :: glance_scipy_result
40 | 
41 | .. autofunction :: tidy_scipy_result
42 | 
43 | .. autofunction :: glance_lmfit_result
44 | 
45 | .. autofunction :: tidy_lmfit_result
46 | 
47 | .. autofunction :: _augment_lmfit_modelresult
48 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # pybroom documentation build configuration file, created by
  5 | # sphinx-quickstart on Mon Jul 25 15:02:27 2016.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | 
 19 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 20 | 
 21 | # If extensions (or modules to document with autodoc) are in another directory,
 22 | # add these directories to sys.path here. If the directory is relative to the
 23 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 24 | #sys.path.insert(0, os.path.abspath('.'))
 25 | 
 26 | # -- General configuration ------------------------------------------------
 27 | 
 28 | # If your documentation needs a minimal Sphinx version, state it here.
 29 | #needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = [
 35 |     'nbsphinx',
 36 |     'sphinx.ext.autodoc',
 37 |     'sphinx.ext.intersphinx',
 38 |     'sphinx.ext.coverage',
 39 |     'sphinx.ext.mathjax',
 40 |     'sphinx.ext.napoleon',
 41 |     'IPython.sphinxext.ipython_console_highlighting',
 42 | ]
 43 | nbsphinx_allow_errors = False
 44 | 
 45 | 
 46 | # Add any paths that contain templates here, relative to this directory.
 47 | templates_path = ['_templates']
 48 | 
 49 | # The suffix(es) of source filenames.
 50 | # You can specify multiple suffix as a list of string:
 51 | # source_suffix = ['.rst', '.md']
 52 | source_suffix = '.rst'
 53 | 
 54 | # The encoding of source files.
 55 | #source_encoding = 'utf-8-sig'
 56 | 
 57 | # The master toctree document.
 58 | master_doc = 'index'
 59 | 
 60 | # General information about the project.
 61 | project = 'pybroom'
 62 | copyright = '2016, Antonino Ingargiola'
 63 | author = 'Antonino Ingargiola'
 64 | 
 65 | # The version info for the project you're documenting, acts as replacement for
 66 | # |version| and |release|, also used in various other places throughout the
 67 | # built documents.
 68 | #
 69 | # The short X.Y version.
 70 | sys.path.insert(0, os.path.abspath('..'))
 71 | import pybroom
 72 | version = pybroom.__version__
 73 | # The full version, including alpha/beta/rc tags.
 74 | release = version
 75 | 
 76 | # The language for content autogenerated by Sphinx. Refer to documentation
 77 | # for a list of supported languages.
 78 | #
 79 | # This is also used if you do content translation via gettext catalogs.
 80 | # Usually you set "language" from the command line for these cases.
 81 | language = None
 82 | 
 83 | # There are two options for replacing |today|: either, you set today to some
 84 | # non-false value, then it is used:
 85 | #today = ''
 86 | # Else, today_fmt is used as the format for a strftime call.
 87 | #today_fmt = '%B %d, %Y'
 88 | 
 89 | # List of patterns, relative to source directory, that match files and
 90 | # directories to ignore when looking for source files.
 91 | # This patterns also effect to html_static_path and html_extra_path
 92 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store',
 93 |                     '**/.ipynb_checkpoints', '**/_*.ipynb']
 94 | 
 95 | # The reST default role (used for this markup: `text`) to use for all
 96 | # documents.
 97 | #default_role = None
 98 | 
 99 | # If true, '()' will be appended to :func: etc. cross-reference text.
100 | #add_function_parentheses = True
101 | 
102 | # If true, the current module name will be prepended to all description
103 | # unit titles (such as .. function::).
104 | #add_module_names = True
105 | 
106 | # If true, sectionauthor and moduleauthor directives will be shown in the
107 | # output. They are ignored by default.
108 | #show_authors = False
109 | 
110 | # The name of the Pygments (syntax highlighting) style to use.
111 | pygments_style = 'sphinx'
112 | 
113 | # A list of ignored prefixes for module index sorting.
114 | #modindex_common_prefix = []
115 | 
116 | # If true, keep warnings as "system message" paragraphs in the built documents.
117 | #keep_warnings = False
118 | 
119 | # If true, `todo` and `todoList` produce output, else they produce nothing.
120 | todo_include_todos = False
121 | 
122 | 
123 | # -- Options for HTML output ----------------------------------------------
124 | 
125 | # The theme to use for HTML and HTML Help pages.  See the documentation for
126 | # a list of builtin themes.
127 | #html_theme = 'alabaster'
128 | if not on_rtd:
129 |     import sphinx_rtd_theme
130 |     html_theme = 'sphinx_rtd_theme'
131 |     html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
132 | 
133 | # Theme options are theme-specific and customize the look and feel of a theme
134 | # further.  For a list of options available for each theme, see the
135 | # documentation.
136 | #html_theme_options = {}
137 | 
138 | # Add any paths that contain custom themes here, relative to this directory.
139 | #html_theme_path = []
140 | 
141 | # The name for this set of Sphinx documents.
142 | # "<project> v<release> documentation" by default.
143 | #html_title = 'pybroom v0.1'
144 | 
145 | # A shorter title for the navigation bar.  Default is the same as html_title.
146 | #html_short_title = None
147 | 
148 | # The name of an image file (relative to this directory) to place at the top
149 | # of the sidebar.
150 | #html_logo = None
151 | 
152 | # The name of an image file (relative to this directory) to use as a favicon of
153 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
154 | # pixels large.
155 | #html_favicon = None
156 | 
157 | # Add any paths that contain custom static files (such as style sheets) here,
158 | # relative to this directory. They are copied after the builtin static files,
159 | # so a file named "default.css" will overwrite the builtin "default.css".
160 | html_static_path = ['_static']
161 | 
162 | # Add any extra paths that contain custom files (such as robots.txt or
163 | # .htaccess) here, relative to this directory. These files are copied
164 | # directly to the root of the documentation.
165 | #html_extra_path = []
166 | 
167 | # If not None, a 'Last updated on:' timestamp is inserted at every page
168 | # bottom, using the given strftime format.
169 | # The empty string is equivalent to '%b %d, %Y'.
170 | #html_last_updated_fmt = None
171 | 
172 | # If true, SmartyPants will be used to convert quotes and dashes to
173 | # typographically correct entities.
174 | #html_use_smartypants = True
175 | 
176 | # Custom sidebar templates, maps document names to template names.
177 | #html_sidebars = {}
178 | 
179 | # Additional templates that should be rendered to pages, maps page names to
180 | # template names.
181 | #html_additional_pages = {}
182 | 
183 | # If false, no module index is generated.
184 | #html_domain_indices = True
185 | 
186 | # If false, no index is generated.
187 | #html_use_index = True
188 | 
189 | # If true, the index is split into individual pages for each letter.
190 | #html_split_index = False
191 | 
192 | # If true, links to the reST sources are added to the pages.
193 | #html_show_sourcelink = True
194 | 
195 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
196 | #html_show_sphinx = True
197 | 
198 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
199 | #html_show_copyright = True
200 | 
201 | # If true, an OpenSearch description file will be output, and all pages will
202 | # contain a <link> tag referring to it.  The value of this option must be the
203 | # base URL from which the finished HTML is served.
204 | #html_use_opensearch = ''
205 | 
206 | # This is the file name suffix for HTML files (e.g. ".xhtml").
207 | #html_file_suffix = None
208 | 
209 | # Language to be used for generating the HTML full-text search index.
210 | # Sphinx supports the following languages:
211 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
212 | #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
213 | #html_search_language = 'en'
214 | 
215 | # A dictionary with options for the search language support, empty by default.
216 | # 'ja' uses this config value.
217 | # 'zh' user can custom change `jieba` dictionary path.
218 | #html_search_options = {'type': 'default'}
219 | 
220 | # The name of a javascript file (relative to the configuration directory) that
221 | # implements a search results scorer. If empty, the default will be used.
222 | #html_search_scorer = 'scorer.js'
223 | 
224 | # Output file base name for HTML help builder.
225 | htmlhelp_basename = 'pybroomdoc'
226 | 
227 | # -- Options for LaTeX output ---------------------------------------------
228 | 
229 | latex_elements = {
230 | # The paper size ('letterpaper' or 'a4paper').
231 | #'papersize': 'letterpaper',
232 | 
233 | # The font size ('10pt', '11pt' or '12pt').
234 | #'pointsize': '10pt',
235 | 
236 | # Additional stuff for the LaTeX preamble.
237 | #'preamble': '',
238 | 
239 | # Latex figure (float) alignment
240 | #'figure_align': 'htbp',
241 | }
242 | 
243 | # Grouping the document tree into LaTeX files. List of tuples
244 | # (source start file, target name, title,
245 | #  author, documentclass [howto, manual, or own class]).
246 | latex_documents = [
247 |     (master_doc, 'pybroom.tex', 'pybroom Documentation',
248 |      'Antonino Ingargiola', 'manual'),
249 | ]
250 | 
251 | # The name of an image file (relative to this directory) to place at the top of
252 | # the title page.
253 | #latex_logo = None
254 | 
255 | # For "manual" documents, if this is true, then toplevel headings are parts,
256 | # not chapters.
257 | #latex_use_parts = False
258 | 
259 | # If true, show page references after internal links.
260 | #latex_show_pagerefs = False
261 | 
262 | # If true, show URL addresses after external links.
263 | #latex_show_urls = False
264 | 
265 | # Documents to append as an appendix to all manuals.
266 | #latex_appendices = []
267 | 
268 | # If false, no module index is generated.
269 | #latex_domain_indices = True
270 | 
271 | 
272 | # -- Options for manual page output ---------------------------------------
273 | 
274 | # One entry per manual page. List of tuples
275 | # (source start file, name, description, authors, manual section).
276 | man_pages = [
277 |     (master_doc, 'pybroom', 'pybroom Documentation',
278 |      [author], 1)
279 | ]
280 | 
281 | # If true, show URL addresses after external links.
282 | #man_show_urls = False
283 | 
284 | 
285 | # -- Options for Texinfo output -------------------------------------------
286 | 
287 | # Grouping the document tree into Texinfo files. List of tuples
288 | # (source start file, target name, title, author,
289 | #  dir menu entry, description, category)
290 | texinfo_documents = [
291 |     (master_doc, 'pybroom', 'pybroom Documentation',
292 |      author, 'pybroom', 'One line description of project.',
293 |      'Miscellaneous'),
294 | ]
295 | 
296 | # Documents to append as an appendix to all manuals.
297 | #texinfo_appendices = []
298 | 
299 | # If false, no module index is generated.
300 | #texinfo_domain_indices = True
301 | 
302 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
303 | #texinfo_show_urls = 'footnote'
304 | 
305 | # If true, do not generate a @detailmenu in the "Top" node's menu.
306 | #texinfo_no_detailmenu = False
307 | 
308 | 
309 | # Example configuration for intersphinx: refer to the Python standard library.
310 | #intersphinx_mapping = {'https://docs.python.org/': None}
311 | 


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. pybroom documentation master file, created by
 2 |    sphinx-quickstart on Mon Jul 25 15:02:27 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to pybroom's documentation!
 7 | ===================================
 8 | 
 9 | :Latest Version: |version|
10 | 
11 | **Pybroom** is a small python 3+ library for converting collections of
12 | fit results (curve fitting or other optimizations)
13 | to `Pandas <http://pandas.pydata.org/>`__
14 | `DataFrame <http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe>`__
15 | in tidy format (or long-form)
16 | `(Wickham 2014) <http://dx.doi.org/10.18637/jss.v059.i10>`__.
17 | Once fit results are in tidy DataFrames, it is possible to leverage
18 | `common patterns <http://tomaugspurger.github.io/modern-5-tidy.html>`__
19 | for tidy data analysis. Furthermore powerful visual
20 | explorations using multi-facet plots becomes easy thanks to libraries
21 | like `seaborn <https://pypi.python.org/pypi/seaborn/>`__ natively
22 | supporting tidy DataFrames.
23 | 
24 | 
25 | Installation
26 | ------------
27 | 
28 | You can install pybroom from PyPI using the following command::
29 | 
30 |     pip install pybroom
31 | 
32 | or from `conda-forge <https://conda-forge.github.io/>`__ using::
33 | 
34 |     conda install -c conda-forge pybroom
35 | 
36 | Dependencies are python 3.4+, pandas and lmfit (0.9.5+, which in turn requires scipy).
37 | However, matplotlib and seaborn are strongly recommended (and necessary
38 | to run the example notebooks).
39 | 
40 | .. toctree::
41 |     :maxdepth: 1
42 |     :caption: Documentation
43 | 
44 |     intro
45 |     whatsnew
46 |     api
47 | 
48 | .. toctree::
49 |    :maxdepth: 1
50 |    :caption: Notebook Tutorials
51 | 
52 |    notebooks/pybroom-example.ipynb
53 |    notebooks/pybroom-example-multi-datasets.ipynb
54 |    notebooks/pybroom-example-multi-datasets-minimize.ipynb
55 |    notebooks/pybroom-example-multi-datasets-scipy-robust-fit.ipynb
56 | 
57 | 
58 | Indices and tables
59 | ==================
60 | 
61 | * :ref:`genindex`
62 | * :ref:`modindex`
63 | * :ref:`search`
64 | 


--------------------------------------------------------------------------------
/doc/intro.rst:
--------------------------------------------------------------------------------
 1 | Why Pybroom?
 2 | ============
 3 | 
 4 | The Problem
 5 | -----------
 6 | 
 7 | DataFrames in tidy format (or long-form) follow a simple rule:
 8 | one "observation" per row and one "variable" per column.
 9 | This simple structure makes it easy to process the data with clear and
10 | `well-understood idioms <http://tomaugspurger.github.io/modern-5-tidy.html>`__
11 | (filtering, aggregation, etc.) and allows
12 | plot libraries to automatically generate complex plots in which many
13 | variables are compared. Plotting libraries supporting tidy DataFrames
14 | include `seaborn <https://web.stanford.edu/~mwaskom/software/seaborn/>`__,
15 | recent versions of `matplotlib <http://matplotlib.org/>`__,
16 | `bokeh <http://bokeh.pydata.org/>`__ and
17 | `altair <https://github.com/ellisonbg/altair>`__.
18 | 
19 | But, while data is oftentimes represented in tidy DataFrames, fit results
20 | are usually stored in a variety of custom objects and are harder
21 | to manipulate, compare and plot.
22 | 
23 | 
24 | Pybroom to the rescue!
25 | ----------------------
26 | 
27 | Pybroom allows to convert several types of fit results to tidy
28 | DataFrames, and is particularly useful for handling collections
29 | of such fit results.
30 | Pybroom development was inspired by the R library
31 | `broom <https://github.com/dgrtwo/broom>`__.
32 | You can watch `this video <https://www.youtube.com/watch?v=eM3Ha0kTAz4>`__
33 | for details of the philosophy behind broom (and by extension pybroom).
34 | 
35 | Like the R library broom, pybroom provides 3 functions: `glance`, `tidy` and
36 | `augment`. The `glance` function returns fit statistics, one for each
37 | fit result (e.g. fit method, number of iterations, chi-square etc.).
38 | The `tidy` function returns data for each fitted parameter
39 | (e.g. fitted value, gradient, bounds, etc.).
40 | The `augment` function returns data with the same size as the fitted
41 | data points (evaluated best-fit model, residuals, etc.).
42 | Additionally, pybroom has two functions `tidy_to_dict` and `dict_to_tidy`
43 | for conversion between dictionaries and 2-columns tidy DataFrames.
44 | 
45 | Collections of fit results can be in `list`, `dict`,
46 | or any nested `dict`/`list` combination.
47 | When a collection of fit result is used as input, pybroom functions
48 | return a DataFrame with additional "categorical" column(s) containing
49 | the dict keys or the list index.
50 | 
51 | Currently, supported fit result object are:
52 | 
53 | - `scipy.optimize.OptimizeResult` returned by several functions in
54 |   `scipy.optimize`;
55 | - `lmfit.model.ModelResult` (returned by `lmfit.Model.fit()`);
56 | - `lmfit.minimizer.MinimizerResult` (returned by `lmfit.minimizer()`).
57 | 
58 | Note that the 3 functions (glance, tidy and augment) are implemented only for
59 | the fit-result objects that are relevant. For example, `augment` cannot
60 | process lmfit's `MinimizerResult` or scipy's `OptimizeResult` because
61 | there is little or no data relevant to each data point.
62 | 
63 | Support for result objects from other libraries such as
64 | `sklearn` can be added based on user request
65 | (`PR welcome! <https://github.com/tritemio/pybroom>`__).
66 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  epub3      to make an epub3
 31 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 32 | 	echo.  text       to make text files
 33 | 	echo.  man        to make manual pages
 34 | 	echo.  texinfo    to make Texinfo files
 35 | 	echo.  gettext    to make PO message catalogs
 36 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 37 | 	echo.  xml        to make Docutils-native XML files
 38 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 39 | 	echo.  linkcheck  to check all external links for integrity
 40 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 41 | 	echo.  coverage   to run coverage check of the documentation if enabled
 42 | 	echo.  dummy      to check syntax errors of document sources
 43 | 	goto end
 44 | )
 45 | 
 46 | if "%1" == "clean" (
 47 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 48 | 	del /q /s %BUILDDIR%\*
 49 | 	goto end
 50 | )
 51 | 
 52 | 
 53 | REM Check if sphinx-build is available and fallback to Python version if any
 54 | %SPHINXBUILD% 1>NUL 2>NUL
 55 | if errorlevel 9009 goto sphinx_python
 56 | goto sphinx_ok
 57 | 
 58 | :sphinx_python
 59 | 
 60 | set SPHINXBUILD=python -m sphinx.__init__
 61 | %SPHINXBUILD% 2> nul
 62 | if errorlevel 9009 (
 63 | 	echo.
 64 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 65 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 66 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 67 | 	echo.may add the Sphinx directory to PATH.
 68 | 	echo.
 69 | 	echo.If you don't have Sphinx installed, grab it from
 70 | 	echo.http://sphinx-doc.org/
 71 | 	exit /b 1
 72 | )
 73 | 
 74 | :sphinx_ok
 75 | 
 76 | 
 77 | if "%1" == "html" (
 78 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 79 | 	if errorlevel 1 exit /b 1
 80 | 	echo.
 81 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 82 | 	goto end
 83 | )
 84 | 
 85 | if "%1" == "dirhtml" (
 86 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 87 | 	if errorlevel 1 exit /b 1
 88 | 	echo.
 89 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 90 | 	goto end
 91 | )
 92 | 
 93 | if "%1" == "singlehtml" (
 94 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 95 | 	if errorlevel 1 exit /b 1
 96 | 	echo.
 97 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 98 | 	goto end
 99 | )
100 | 
101 | if "%1" == "pickle" (
102 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
103 | 	if errorlevel 1 exit /b 1
104 | 	echo.
105 | 	echo.Build finished; now you can process the pickle files.
106 | 	goto end
107 | )
108 | 
109 | if "%1" == "json" (
110 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
111 | 	if errorlevel 1 exit /b 1
112 | 	echo.
113 | 	echo.Build finished; now you can process the JSON files.
114 | 	goto end
115 | )
116 | 
117 | if "%1" == "htmlhelp" (
118 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
119 | 	if errorlevel 1 exit /b 1
120 | 	echo.
121 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
122 | .hhp project file in %BUILDDIR%/htmlhelp.
123 | 	goto end
124 | )
125 | 
126 | if "%1" == "qthelp" (
127 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
128 | 	if errorlevel 1 exit /b 1
129 | 	echo.
130 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
131 | .qhcp project file in %BUILDDIR%/qthelp, like this:
132 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pybroom.qhcp
133 | 	echo.To view the help file:
134 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pybroom.ghc
135 | 	goto end
136 | )
137 | 
138 | if "%1" == "devhelp" (
139 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
140 | 	if errorlevel 1 exit /b 1
141 | 	echo.
142 | 	echo.Build finished.
143 | 	goto end
144 | )
145 | 
146 | if "%1" == "epub" (
147 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
148 | 	if errorlevel 1 exit /b 1
149 | 	echo.
150 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
151 | 	goto end
152 | )
153 | 
154 | if "%1" == "epub3" (
155 | 	%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
156 | 	if errorlevel 1 exit /b 1
157 | 	echo.
158 | 	echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
159 | 	goto end
160 | )
161 | 
162 | if "%1" == "latex" (
163 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
164 | 	if errorlevel 1 exit /b 1
165 | 	echo.
166 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdf" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "latexpdfja" (
181 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
182 | 	cd %BUILDDIR%/latex
183 | 	make all-pdf-ja
184 | 	cd %~dp0
185 | 	echo.
186 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
187 | 	goto end
188 | )
189 | 
190 | if "%1" == "text" (
191 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
192 | 	if errorlevel 1 exit /b 1
193 | 	echo.
194 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
195 | 	goto end
196 | )
197 | 
198 | if "%1" == "man" (
199 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
200 | 	if errorlevel 1 exit /b 1
201 | 	echo.
202 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
203 | 	goto end
204 | )
205 | 
206 | if "%1" == "texinfo" (
207 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
208 | 	if errorlevel 1 exit /b 1
209 | 	echo.
210 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
211 | 	goto end
212 | )
213 | 
214 | if "%1" == "gettext" (
215 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
216 | 	if errorlevel 1 exit /b 1
217 | 	echo.
218 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
219 | 	goto end
220 | )
221 | 
222 | if "%1" == "changes" (
223 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
224 | 	if errorlevel 1 exit /b 1
225 | 	echo.
226 | 	echo.The overview file is in %BUILDDIR%/changes.
227 | 	goto end
228 | )
229 | 
230 | if "%1" == "linkcheck" (
231 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
232 | 	if errorlevel 1 exit /b 1
233 | 	echo.
234 | 	echo.Link check complete; look for any errors in the above output ^
235 | or in %BUILDDIR%/linkcheck/output.txt.
236 | 	goto end
237 | )
238 | 
239 | if "%1" == "doctest" (
240 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
241 | 	if errorlevel 1 exit /b 1
242 | 	echo.
243 | 	echo.Testing of doctests in the sources finished, look at the ^
244 | results in %BUILDDIR%/doctest/output.txt.
245 | 	goto end
246 | )
247 | 
248 | if "%1" == "coverage" (
249 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
250 | 	if errorlevel 1 exit /b 1
251 | 	echo.
252 | 	echo.Testing of coverage in the sources finished, look at the ^
253 | results in %BUILDDIR%/coverage/python.txt.
254 | 	goto end
255 | )
256 | 
257 | if "%1" == "xml" (
258 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
259 | 	if errorlevel 1 exit /b 1
260 | 	echo.
261 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
262 | 	goto end
263 | )
264 | 
265 | if "%1" == "pseudoxml" (
266 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
267 | 	if errorlevel 1 exit /b 1
268 | 	echo.
269 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
270 | 	goto end
271 | )
272 | 
273 | if "%1" == "dummy" (
274 | 	%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
275 | 	if errorlevel 1 exit /b 1
276 | 	echo.
277 | 	echo.Build finished. Dummy builder generates no files.
278 | 	goto end
279 | )
280 | 
281 | :end
282 | 


--------------------------------------------------------------------------------
/doc/notebooks/pybroom-example-multi-datasets-minimize.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PyBroom Example - Multiple Datasets - Minimize\n",
  8 |     "\n",
  9 |     "*This notebook is part of* [pybroom](https://github.com/tritemio/pybroom).\n",
 10 |     "\n",
 11 |     ">This notebook demonstrate using pybroom when performing **Maximum-Likelihood fitting**\n",
 12 |     ">(scalar minimization as opposed to curve fitting) of a set of datasets with *lmfit.minimize*.\n",
 13 |     ">We will show that *pybroom* greatly simplifies comparing, filtering and plotting fit results \n",
 14 |     ">from multiple datasets.\n",
 15 |     ">For an example using curve fitting see\n",
 16 |     ">[pybroom-example-multi-datasets](pybroom-example-multi-datasets.ipynb)."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {
 23 |     "collapsed": false
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "%matplotlib inline\n",
 28 |     "%config InlineBackend.figure_format='retina'  # for hi-dpi displays\n",
 29 |     "import numpy as np\n",
 30 |     "import pandas as pd\n",
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "from matplotlib.pylab import normpdf\n",
 33 |     "import seaborn as sns\n",
 34 |     "from lmfit import Model\n",
 35 |     "import lmfit\n",
 36 |     "print('lmfit: %s' % lmfit.__version__)"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {
 43 |     "collapsed": true
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "sns.set_style('whitegrid')"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {
 54 |     "collapsed": true
 55 |    },
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "import pybroom as br"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "## Create Noisy Data\n",
 66 |     "\n",
 67 |     "Simulate *N* datasets which are identical except for the additive noise."
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "collapsed": true
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "N = 20  # number of datasets\n",
 79 |     "n = 1000  # number of sample in each dataset\n",
 80 |     "\n",
 81 |     "np.random.seed(1)\n",
 82 |     "d1 = np.random.randn(20, int(0.6*n))*0.5 - 2\n",
 83 |     "d2 = np.random.randn(20, int(0.4*n))*1.5 + 2\n",
 84 |     "d = np.hstack((d1, d2))"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "collapsed": false
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "ds = pd.DataFrame(data=d, columns=range(d.shape[1])).stack().reset_index()\n",
 96 |     "ds.columns = ['dataset', 'sample', 'data']\n",
 97 |     "ds.head()"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {
104 |     "collapsed": false
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "kws = dict(bins = np.arange(-5, 5.1, 0.1), histtype='step', \n",
109 |     "           lw=2, color='k', alpha=0.1)\n",
110 |     "for i in range(N):\n",
111 |     "    ds.loc[ds.dataset == i, :].data.plot.hist(**kws)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "## Model Fitting"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "### Two-peaks model"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "Here, we use a Gaussian mixture distribution for fitting the data.\n",
133 |     "\n",
134 |     "We fit the data using the Maximum-Likelihood method, i.e. we minimize the\n",
135 |     "(negative) log-likelihood function:"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {
142 |     "collapsed": true
143 |    },
144 |    "outputs": [],
145 |    "source": [
146 |     "# Model PDF to be maximized\n",
147 |     "def model_pdf(x, a2, mu1, mu2, sig1, sig2):\n",
148 |     "    a1 = 1 - a2\n",
149 |     "    return (a1 * normpdf(x, mu1, sig1) + \n",
150 |     "            a2 * normpdf(x, mu2, sig2))\n",
151 |     "\n",
152 |     "# Function to be minimized by lmfit\n",
153 |     "def log_likelihood_lmfit(params, x):\n",
154 |     "    pnames = ('a2', 'mu1', 'mu2', 'sig1', 'sig2')\n",
155 |     "    kws = {n: params[n] for n in pnames}\n",
156 |     "    return -np.log(model_pdf(x, **kws)).sum()"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "We define the parameters and \"fit\" the $N$ datasets by minimizing the (scalar) function `log_likelihood_lmfit`:"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {
170 |     "collapsed": false
171 |    },
172 |    "outputs": [],
173 |    "source": [
174 |     "params = lmfit.Parameters()\n",
175 |     "params.add('a2', 0.5, min=0, max=1)\n",
176 |     "params.add('mu1', -1, min=-5, max=5)\n",
177 |     "params.add('mu2', 1, min=-5, max=5)\n",
178 |     "params.add('sig1', 1, min=1e-6)\n",
179 |     "params.add('sig2', 1, min=1e-6)\n",
180 |     "params.add('ax', expr='a2')   # just a test for a derived parameter\n",
181 |     "\n",
182 |     "Results = [lmfit.minimize(log_likelihood_lmfit, params, args=(di,), \n",
183 |     "                          nan_policy='omit', method='least_squares')\n",
184 |     "           for di in d]"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "Fit results can be inspected with\n",
192 |     "`lmfit.fit_report()` or `params.pretty_print()`:"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {
199 |     "collapsed": false
200 |    },
201 |    "outputs": [],
202 |    "source": [
203 |     "print(lmfit.fit_report(Results[0]))\n",
204 |     "print()\n",
205 |     "Results[0].params.pretty_print()"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "markdown",
210 |    "metadata": {},
211 |    "source": [
212 |     "This is good for peeking at the results. However,\n",
213 |     "extracting these data from lmfit objects is quite a chore\n",
214 |     "and requires good knowledge of lmfit objects structure.\n",
215 |     "\n",
216 |     "**pybroom** helps in this task: it extracts data from fit results and\n",
217 |     "returns familiar pandas DataFrame (in tidy format). \n",
218 |     "Thanks to the tidy format these data can be\n",
219 |     "much more easily manipulated, filtered and plotted.\n",
220 |     "\n",
221 |     "Let's use the [glance](http://pybroom.readthedocs.io/en/latest/api.html#pybroom.glance) and \n",
222 |     "[tidy](http://pybroom.readthedocs.io/en/latest/api.html#pybroom.tidy) functions:"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {
229 |     "collapsed": false
230 |    },
231 |    "outputs": [],
232 |    "source": [
233 |     "dg = br.glance(Results)\n",
234 |     "dg.drop('message', 1).head()"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": null,
240 |    "metadata": {
241 |     "collapsed": false
242 |    },
243 |    "outputs": [],
244 |    "source": [
245 |     "dt = br.tidy(Results, var_names='dataset')\n",
246 |     "dt.query('dataset == 0')"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "metadata": {},
252 |    "source": [
253 |     "Note that while glance returns one row per fit result, the tidy function\n",
254 |     "return one row per fitted parameter.\n",
255 |     "\n",
256 |     "We can query the value of one parameter (peak position) across the multiple datasets:"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "code",
261 |    "execution_count": null,
262 |    "metadata": {
263 |     "collapsed": false
264 |    },
265 |    "outputs": [],
266 |    "source": [
267 |     "dt.query('name == \"mu1\"').head()"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "markdown",
272 |    "metadata": {},
273 |    "source": [
274 |     "By computing the standard deviation of the peak positions:"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": null,
280 |    "metadata": {
281 |     "collapsed": false
282 |    },
283 |    "outputs": [],
284 |    "source": [
285 |     "dt.query('name == \"mu1\"')['value'].std()"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {
292 |     "collapsed": false
293 |    },
294 |    "outputs": [],
295 |    "source": [
296 |     "dt.query('name == \"mu2\"')['value'].std()"
297 |    ]
298 |   },
299 |   {
300 |    "cell_type": "markdown",
301 |    "metadata": {},
302 |    "source": [
303 |     "we see that the estimation of `mu1` as less error than the estimation\n",
304 |     "of `mu2`. \n",
305 |     "\n",
306 |     "This difference can be also observed in the histogram of \n",
307 |     "the fitted values:"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": null,
313 |    "metadata": {
314 |     "collapsed": false
315 |    },
316 |    "outputs": [],
317 |    "source": [
318 |     "dt.query('name == \"mu1\"')['value'].hist()\n",
319 |     "dt.query('name == \"mu2\"')['value'].hist(ax=plt.gca());"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "markdown",
324 |    "metadata": {},
325 |    "source": [
326 |     "We can also use pybroom's [tidy_to_dict](http://pybroom.readthedocs.io/en/latest/api.html#pybroom.tidy_to_dict) \n",
327 |     "and [dict_to_tidy](http://pybroom.readthedocs.io/en/latest/api.html#pybroom.dict_to_tidy) \n",
328 |     "functions to convert\n",
329 |     "a set of fitted parameters to a dict (and vice-versa):"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": null,
335 |    "metadata": {
336 |     "collapsed": false
337 |    },
338 |    "outputs": [],
339 |    "source": [
340 |     "kwd_params = br.tidy_to_dict(dt.loc[dt['dataset'] == 0])\n",
341 |     "kwd_params"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": null,
347 |    "metadata": {
348 |     "collapsed": false
349 |    },
350 |    "outputs": [],
351 |    "source": [
352 |     "br.dict_to_tidy(kwd_params)"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "markdown",
357 |    "metadata": {},
358 |    "source": [
359 |     "This conversion is useful to call a python functions\n",
360 |     "passing argument values from a tidy DataFrame. \n",
361 |     "\n",
362 |     "For example, here we use `tidy_to_dict`\n",
363 |     "to easily plot the model distribution:"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": null,
369 |    "metadata": {
370 |     "collapsed": false
371 |    },
372 |    "outputs": [],
373 |    "source": [
374 |     "bins = np.arange(-5, 5.01, 0.25)\n",
375 |     "x = bins[:-1] + 0.5*(bins[1] - bins[0])\n",
376 |     "grid = sns.FacetGrid(ds.query('dataset < 6'), col='dataset', hue='dataset', col_wrap=3)\n",
377 |     "grid.map(plt.hist, 'data', bins=bins, normed=True);\n",
378 |     "for i, ax in enumerate(grid.axes):\n",
379 |     "    kw_pars = br.tidy_to_dict(dt.loc[dt.dataset == i], keys_exclude=['ax'])\n",
380 |     "    y = model_pdf(x, **kw_pars)\n",
381 |     "    ax.plot(x, y, lw=2, color='k')"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "markdown",
386 |    "metadata": {},
387 |    "source": [
388 |     "### Single-peak model"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "markdown",
393 |    "metadata": {},
394 |    "source": [
395 |     "For the sake of the example we also fit the $N$ datasets with a single Gaussian distribution:"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": null,
401 |    "metadata": {
402 |     "collapsed": true
403 |    },
404 |    "outputs": [],
405 |    "source": [
406 |     "def model_pdf1(x, mu, sig):\n",
407 |     "    return normpdf(x, mu, sig)\n",
408 |     "\n",
409 |     "def log_likelihood_lmfit1(params, x):\n",
410 |     "    return -np.log(model_pdf1(x, **params.valuesdict())).sum()"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "code",
415 |    "execution_count": null,
416 |    "metadata": {
417 |     "collapsed": false
418 |    },
419 |    "outputs": [],
420 |    "source": [
421 |     "params = lmfit.Parameters()\n",
422 |     "params.add('mu', 0, min=-5, max=5)\n",
423 |     "params.add('sig', 1, min=1e-6)\n",
424 |     "\n",
425 |     "Results1 = [lmfit.minimize(log_likelihood_lmfit1, params, args=(di,), \n",
426 |     "                          nan_policy='omit', method='least_squares')\n",
427 |     "           for di in d]"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "code",
432 |    "execution_count": null,
433 |    "metadata": {
434 |     "collapsed": false
435 |    },
436 |    "outputs": [],
437 |    "source": [
438 |     "dg1 = br.glance(Results)\n",
439 |     "dg1.drop('message', 1).head()"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "code",
444 |    "execution_count": null,
445 |    "metadata": {
446 |     "collapsed": false
447 |    },
448 |    "outputs": [],
449 |    "source": [
450 |     "dt1 = br.tidy(Results1, var_names='dataset')\n",
451 |     "dt1.query('dataset == 0')"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "markdown",
456 |    "metadata": {},
457 |    "source": [
458 |     "## Augment?\n",
459 |     "\n",
460 |     "Pybroom [augment](http://pybroom.readthedocs.io/en/latest/api.html#pybroom.augment) function \n",
461 |     "extracts information that is the same size as the input dataset,\n",
462 |     "for example the array of residuals. In this case, however, we performed a scalar minimization\n",
463 |     "(the log-likelihood function returns a scalar) and therefore the `MinimizerResult` object\n",
464 |     "does not contain any residual array or other data of the same size as the dataset.\n",
465 |     "\n",
466 |     "## Comparing fit results\n",
467 |     "\n",
468 |     "We will do instead a comparison of single and two-peaks distribution using the results\n",
469 |     "from the `tidy` function obtained in the previous section.\n",
470 |     "\n",
471 |     "We start with the following plot:"
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "code",
476 |    "execution_count": null,
477 |    "metadata": {
478 |     "collapsed": false
479 |    },
480 |    "outputs": [],
481 |    "source": [
482 |     "dt['model'] = 'twopeaks'\n",
483 |     "dt1['model'] = 'onepeak'\n",
484 |     "dt_tot = pd.concat([dt, dt1], ignore_index=True)"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": null,
490 |    "metadata": {
491 |     "collapsed": false
492 |    },
493 |    "outputs": [],
494 |    "source": [
495 |     "bins = np.arange(-5, 5.01, 0.25)\n",
496 |     "x = bins[:-1] + 0.5*(bins[1] - bins[0])\n",
497 |     "grid = sns.FacetGrid(ds.query('dataset < 6'), col='dataset', hue='dataset', col_wrap=3)\n",
498 |     "grid.map(plt.hist, 'data', bins=bins, normed=True);\n",
499 |     "for i, ax in enumerate(grid.axes):\n",
500 |     "    kw_pars = br.tidy_to_dict(dt_tot.loc[(dt_tot.dataset == i) & (dt_tot.model == 'onepeak')])\n",
501 |     "    y1 = model_pdf1(x, **kw_pars)\n",
502 |     "    li1, = ax.plot(x, y1, lw=2, color='k', alpha=0.5)\n",
503 |     "    kw_pars = br.tidy_to_dict(dt_tot.loc[(dt_tot.dataset == i) & (dt_tot.model == 'twopeaks')], keys_exclude=['ax'])\n",
504 |     "    y = model_pdf(x, **kw_pars)\n",
505 |     "    li, = ax.plot(x, y, lw=2, color='k')\n",
506 |     "grid.add_legend(legend_data=dict(onepeak=li1, twopeaks=li), \n",
507 |     "                label_order=['onepeak', 'twopeaks'], title='model');"
508 |    ]
509 |   },
510 |   {
511 |    "cell_type": "markdown",
512 |    "metadata": {},
513 |    "source": [
514 |     "The problem is that `FacetGrid` only takes one DataFrame as input. In the previous\n",
515 |     "example we provide the DataFrame of \"experimental\" data (`ds`) and use the `.map` method to plot\n",
516 |     "histograms of the different datasets. The fitted distributions, instead, are\n",
517 |     "plotted manually in the for loop.\n",
518 |     "\n",
519 |     "We can invert the approach, and pass to `FacetGrid` the DataFrame of fitted parameters (`dt_tot`),\n",
520 |     "while leaving the simple histogram for manual plotting. In this case we need to write an \n",
521 |     "helper function (`_plot`) that knows how to plot a distribution given a set of parameter:"
522 |    ]
523 |   },
524 |   {
525 |    "cell_type": "code",
526 |    "execution_count": null,
527 |    "metadata": {
528 |     "collapsed": true
529 |    },
530 |    "outputs": [],
531 |    "source": [
532 |     "def _plot(names, values, x, label=None, color=None):\n",
533 |     "    df = pd.concat([names, values], axis=1)\n",
534 |     "    kw_pars = br.tidy_to_dict(df, keys_exclude=['ax'])\n",
535 |     "    func = model_pdf1 if label == 'onepeak' else model_pdf\n",
536 |     "    y = func(x, **kw_pars)\n",
537 |     "    plt.plot(x, y, lw=2, color=color, label=label)    "
538 |    ]
539 |   },
540 |   {
541 |    "cell_type": "code",
542 |    "execution_count": null,
543 |    "metadata": {
544 |     "collapsed": false
545 |    },
546 |    "outputs": [],
547 |    "source": [
548 |     "bins = np.arange(-5, 5.01, 0.25)\n",
549 |     "x = bins[:-1] + 0.5*(bins[1] - bins[0])\n",
550 |     "grid = sns.FacetGrid(dt_tot.query('dataset < 6'), col='dataset', hue='model', col_wrap=3)\n",
551 |     "grid.map(_plot, 'name', 'value', x=x)\n",
552 |     "grid.add_legend()\n",
553 |     "for i, ax in enumerate(grid.axes):\n",
554 |     "    ax.hist(ds.query('dataset == %d' % i).data, bins=bins, histtype='stepfilled', normed=True, \n",
555 |     "            color='gray', alpha=0.5);"
556 |    ]
557 |   },
558 |   {
559 |    "cell_type": "markdown",
560 |    "metadata": {},
561 |    "source": [
562 |     "For an even better (i.e. simpler) example of plots of fit results see\n",
563 |     "[pybroom-example-multi-datasets](pybroom-example-multi-datasets.ipynb)."
564 |    ]
565 |   }
566 |  ],
567 |  "metadata": {
568 |   "kernelspec": {
569 |    "display_name": "Python 3",
570 |    "language": "python",
571 |    "name": "python3"
572 |   },
573 |   "language_info": {
574 |    "codemirror_mode": {
575 |     "name": "ipython",
576 |     "version": 3
577 |    },
578 |    "file_extension": ".py",
579 |    "mimetype": "text/x-python",
580 |    "name": "python",
581 |    "nbconvert_exporter": "python",
582 |    "pygments_lexer": "ipython3",
583 |    "version": "3.5.2"
584 |   }
585 |  },
586 |  "nbformat": 4,
587 |  "nbformat_minor": 0
588 | }
589 | 


--------------------------------------------------------------------------------
/doc/notebooks/pybroom-example-multi-datasets-scipy-robust-fit.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PyBroom Example - Multiple Datasets - Scipy Robust Fit\n",
  8 |     "\n",
  9 |     "*This notebook is part of* [pybroom](https://github.com/tritemio/pybroom).\n",
 10 |     "\n",
 11 |     ">This notebook demonstrate using *pybroom* when fitting **a set of curves** (curve fitting) using robust fitting and scipy.\n",
 12 |     ">We will show that *pybroom* greatly simplifies comparing, filtering and plotting fit results \n",
 13 |     ">from multiple datasets.\n",
 14 |     "> See\n",
 15 |     ">[pybroom-example-multi-datasets](pybroom-example-multi-datasets.ipynb)\n",
 16 |     "> for an example using `lmfit.Model` instead of directly scipy."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "%matplotlib inline\n",
 26 |     "%config InlineBackend.figure_format='retina'  # for hi-dpi displays\n",
 27 |     "import numpy as np\n",
 28 |     "import pandas as pd\n",
 29 |     "import matplotlib.pyplot as plt\n",
 30 |     "from matplotlib.pylab import normpdf\n",
 31 |     "import seaborn as sns\n",
 32 |     "from lmfit import Model\n",
 33 |     "import lmfit\n",
 34 |     "print('lmfit: %s' % lmfit.__version__)"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {
 41 |     "collapsed": true
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "sns.set_style('whitegrid')"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {
 52 |     "collapsed": true
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "import pybroom as br"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "## Create Noisy Data\n",
 64 |     "\n",
 65 |     "We start simulating *N* datasets which are identical except for the additive noise."
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "collapsed": true
 73 |    },
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "N = 200"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": true
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "x = np.linspace(-10, 10, 101)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {
 94 |     "collapsed": true
 95 |    },
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "peak1 = lmfit.models.GaussianModel(prefix='p1_')\n",
 99 |     "peak2 = lmfit.models.GaussianModel(prefix='p2_')\n",
100 |     "model = peak1 + peak2"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {
107 |     "collapsed": true
108 |    },
109 |    "outputs": [],
110 |    "source": [
111 |     "#params = model.make_params(p1_amplitude=1.5, p2_amplitude=1, \n",
112 |     "#                           p1_sigma=1, p2_sigma=1)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "Y_data = np.zeros((N, x.size))\n",
122 |     "Y_data.shape, x.shape"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {
129 |     "collapsed": true
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "for i in range(Y_data.shape[0]):\n",
134 |     "    Y_data[i] = model.eval(x=x, p1_center=-1, p2_center=2, \n",
135 |     "                           p1_sigma=0.5, p2_sigma=1, \n",
136 |     "                           p1_height=1, p2_height=0.5)\n",
137 |     "Y_data += np.random.randn(*Y_data.shape)/10"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "Add some outliers:"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": true,
152 |     "scrolled": true
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "num_outliers = int(Y_data.size * 0.05)\n",
157 |     "idx_ol = np.random.randint(low=0, high=Y_data.size, size=num_outliers)\n",
158 |     "Y_data.reshape(-1)[idx_ol] = (np.random.rand(num_outliers) - 0.5)*4"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "plt.plot(x, Y_data.T, 'ok', alpha=0.1);\n",
168 |     "plt.title('%d simulated datasets, with outliers' % N);"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "## Model Fitting"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "metadata": {},
181 |    "source": [
182 |     "### curve_fit()"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {
189 |     "collapsed": true
190 |    },
191 |    "outputs": [],
192 |    "source": [
193 |     "import scipy.optimize as so"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": null,
199 |    "metadata": {
200 |     "collapsed": true
201 |    },
202 |    "outputs": [],
203 |    "source": [
204 |     "from collections import namedtuple"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {
211 |     "collapsed": true
212 |    },
213 |    "outputs": [],
214 |    "source": [
215 |     "# Model PDF to be maximized\n",
216 |     "def model_pdf(x, a1, a2, mu1, mu2, sig1, sig2):\n",
217 |     "    return (a1 * normpdf(x, mu1, sig1) + \n",
218 |     "            a2 * normpdf(x, mu2, sig2))"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {
225 |     "collapsed": true
226 |    },
227 |    "outputs": [],
228 |    "source": [
229 |     "result = so.curve_fit(model_pdf, x, Y_data[0])"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "metadata": {},
236 |    "outputs": [],
237 |    "source": [
238 |     "type(result), type(result[0]), type(result[1])"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {},
245 |    "outputs": [],
246 |    "source": [
247 |     "result[0]"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {},
253 |    "source": [
254 |     "Using a `namedtuple` is a clean way to assign names to an array of paramenters:"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": null,
260 |    "metadata": {
261 |     "collapsed": true
262 |    },
263 |    "outputs": [],
264 |    "source": [
265 |     "Params = namedtuple('Params', 'a1 a2 mu1 mu2 sig1 sig2')"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "p = Params(*result[0])\n",
275 |     "p"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "markdown",
280 |    "metadata": {},
281 |    "source": [
282 |     "Unfortunately, not much data is returned by `curve_fit`, a 2-element tuple with:\n",
283 |     "\n",
284 |     "- array of best-fit parameters\n",
285 |     "- array of jacobian\n",
286 |     "\n",
287 |     "Therefore `curve_fit` is not very useful for detailed comparison of fit results. \n",
288 |     "A better interface for curve fitting would be *lmfit.Model* (see \n",
289 |     "[this other notebook](pybroom-example-multi-datasets.ipynb)).\n",
290 |     "\n",
291 |     "In the current notebook we keep exploring further options offered by `scipy.optimize`.\n",
292 |     "\n",
293 |     "\n",
294 |     "### least_squares()\n",
295 |     "\n",
296 |     "As an example, we use the `least_squares` function which supports robust loss functions and constraints.\n",
297 |     "\n",
298 |     "We need to define the residuals:"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": null,
304 |    "metadata": {
305 |     "collapsed": true
306 |    },
307 |    "outputs": [],
308 |    "source": [
309 |     "def residuals(p, x, y):\n",
310 |     "    return y - model_pdf(x, *p)"
311 |    ]
312 |   },
313 |   {
314 |    "cell_type": "markdown",
315 |    "metadata": {},
316 |    "source": [
317 |     "Then, we fit the *N* datasets with different loss functions storing result in a dict containing lists:"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": null,
323 |    "metadata": {
324 |     "collapsed": true
325 |    },
326 |    "outputs": [],
327 |    "source": [
328 |     "losses = ('linear', 'huber', 'cauchy')\n",
329 |     "Results = {}\n",
330 |     "for loss in losses:\n",
331 |     "    Results[loss] = [so.least_squares(residuals, (1,1,0,1,1,1), args=(x, y), loss=loss, f_scale=0.5)\n",
332 |     "                     for y in Y_data]"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "markdown",
337 |    "metadata": {},
338 |    "source": [
339 |     "<div class=\"alert alert-info\">\n",
340 |     "\n",
341 |     "**NOTE**: For more details on robust fitting and on the different loss functions see\n",
342 |     "[Robust nonlinear regression in scipy](http://scipy-cookbook.readthedocs.io/items/robust_regression.html).\n",
343 |     "\n",
344 |     "</div>"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": null,
350 |    "metadata": {
351 |     "collapsed": true
352 |    },
353 |    "outputs": [],
354 |    "source": [
355 |     "# result = Results['cauchy'][0]\n",
356 |     "# for k in result.keys():\n",
357 |     "#     print(k, type(result[k]))"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "markdown",
362 |    "metadata": {},
363 |    "source": [
364 |     "## Tidying the results\n",
365 |     "\n",
366 |     "Now we tidy the results, combining the results for the different loss functions\n",
367 |     "in a single DataFrames.\n",
368 |     "\n",
369 |     "We start with the `glance` function, which returns one row per fit result:"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": null,
375 |    "metadata": {},
376 |    "outputs": [],
377 |    "source": [
378 |     "dg_tot = br.glance(Results, var_names=['loss', 'dataset'])\n",
379 |     "dg_tot.head()"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "execution_count": null,
385 |    "metadata": {},
386 |    "outputs": [],
387 |    "source": [
388 |     "dg_tot.success.all()"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "markdown",
393 |    "metadata": {},
394 |    "source": [
395 |     "Then we apply `tidy`, which returns one row per parameter.\n",
396 |     "\n",
397 |     "Since the object `OptimzeResult` returned by `scipy.optimize` does \n",
398 |     "only contains an array of parameters, we need to pass the names as\n",
399 |     "as additional argument:"
400 |    ]
401 |   },
402 |   {
403 |    "cell_type": "code",
404 |    "execution_count": null,
405 |    "metadata": {},
406 |    "outputs": [],
407 |    "source": [
408 |     "pnames = 'a1 a2 mu1 mu2 sig1 sig2'\n",
409 |     "dt_tot = br.tidy(Results, var_names=['loss', 'dataset'], param_names=pnames)\n",
410 |     "dt_tot.head()"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "markdown",
415 |    "metadata": {},
416 |    "source": [
417 |     "Finally, we cannot apply the\n",
418 |     "`augment` function, since the `OptimizeResult` object\n",
419 |     "does not include much per-data-point information \n",
420 |     "(it may contain the array of residuals)."
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "markdown",
425 |    "metadata": {},
426 |    "source": [
427 |     "## Plots"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "markdown",
432 |    "metadata": {},
433 |    "source": [
434 |     "First we plot the peak position and sigmas distributions:"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "code",
439 |    "execution_count": null,
440 |    "metadata": {},
441 |    "outputs": [],
442 |    "source": [
443 |     "kws = dict(bins = np.arange(-2, 4, 0.1), histtype='step', lw=2)\n",
444 |     "for loss in losses:\n",
445 |     "    dt_tot.query('(name == \"mu1\" or name == \"mu2\") and loss == \"%s\"' % loss)['value'].hist(label=loss, **kws)\n",
446 |     "    kws['ax'] = plt.gca()\n",
447 |     "plt.title(' Distribution of peaks centers')\n",
448 |     "plt.legend();"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": null,
454 |    "metadata": {},
455 |    "outputs": [],
456 |    "source": [
457 |     "kws = dict(bins = np.arange(0, 4, 0.1), histtype='step', lw=2)\n",
458 |     "for loss in losses:\n",
459 |     "    dt_tot.query('(name == \"sig1\" or name == \"sig2\") and loss == \"%s\"' % loss)['value'].hist(label=loss, **kws)\n",
460 |     "    kws['ax'] = plt.gca()\n",
461 |     "plt.title(' Distribution of peaks sigmas')\n",
462 |     "plt.legend();"
463 |    ]
464 |   },
465 |   {
466 |    "cell_type": "markdown",
467 |    "metadata": {},
468 |    "source": [
469 |     "A more complete overview for all the fit paramenters can be obtained with a factorplot:"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "code",
474 |    "execution_count": null,
475 |    "metadata": {
476 |     "scrolled": false
477 |    },
478 |    "outputs": [],
479 |    "source": [
480 |     "sns.factorplot(x='loss', y='value', data=dt_tot, col='name', hue='loss',\n",
481 |     "               col_wrap=4, kind='box', sharey=False);"
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "markdown",
486 |    "metadata": {},
487 |    "source": [
488 |     "From all the previous plots we see that, as espected, using robust fitting \n",
489 |     "with higher damping of outlier (i.e. `cauchy` vs `huber` or `linear`) \n",
490 |     "results in more accurate fit results.\n",
491 |     "\n",
492 |     "Finally, we can have a peek at the comparison of raw data and fitted models\n",
493 |     "for a few datatsets.\n",
494 |     "\n",
495 |     "Since `OptimizeResults` does not include \"augmented\" data we need to \n",
496 |     "generate these data by evaluating the model with the best-fit parameters.\n",
497 |     "We use seaborn's `FacetGrid`, passing a custom function `_plot`\n",
498 |     "for model evaluation:"
499 |    ]
500 |   },
501 |   {
502 |    "cell_type": "code",
503 |    "execution_count": null,
504 |    "metadata": {
505 |     "collapsed": true
506 |    },
507 |    "outputs": [],
508 |    "source": [
509 |     "def _plot(names, values, x, label=None, color=None):\n",
510 |     "    df = pd.concat([names, values], axis=1)\n",
511 |     "    kw_pars = br.tidy_to_dict(df)\n",
512 |     "    y = model_pdf(x, **kw_pars)\n",
513 |     "    plt.plot(x, y, lw=2, color=color, label=label)    "
514 |    ]
515 |   },
516 |   {
517 |    "cell_type": "code",
518 |    "execution_count": null,
519 |    "metadata": {},
520 |    "outputs": [],
521 |    "source": [
522 |     "grid = sns.FacetGrid(dt_tot.query('dataset < 9'), col='dataset', hue='loss', col_wrap=3)\n",
523 |     "grid.map(_plot, 'name', 'value', x=x)\n",
524 |     "grid.add_legend()\n",
525 |     "for i, ax in enumerate(grid.axes):\n",
526 |     "    ax.plot(x, Y_data[i], 'o', ms=3, color='k')\n",
527 |     "plt.ylim(-1, 1.5)"
528 |    ]
529 |   },
530 |   {
531 |    "cell_type": "markdown",
532 |    "metadata": {},
533 |    "source": [
534 |     "For comparison, the `ModelResult` object returned by lmfit,\n",
535 |     "contains not only the evaluated model but also the evaluation\n",
536 |     "of the single components (each single peak in this case).\n",
537 |     "Therefore the above plot can be generated more straighforwardly\n",
538 |     "using the \"augmented\" data.\n",
539 |     "See the notebook [pybroom-example-multi-datasets](pybroom-example-multi-datasets.ipynb)\n",
540 |     "for an example."
541 |    ]
542 |   }
543 |  ],
544 |  "metadata": {
545 |   "kernelspec": {
546 |    "display_name": "Python [default]",
547 |    "language": "python",
548 |    "name": "python3"
549 |   },
550 |   "language_info": {
551 |    "codemirror_mode": {
552 |     "name": "ipython",
553 |     "version": 3
554 |    },
555 |    "file_extension": ".py",
556 |    "mimetype": "text/x-python",
557 |    "name": "python",
558 |    "nbconvert_exporter": "python",
559 |    "pygments_lexer": "ipython3",
560 |    "version": "3.5.2"
561 |   },
562 |   "toc": {
563 |    "colors": {
564 |     "hover_highlight": "#DAA520",
565 |     "running_highlight": "#FF0000",
566 |     "selected_highlight": "#FFD700"
567 |    },
568 |    "moveMenuLeft": true,
569 |    "nav_menu": {
570 |     "height": "135px",
571 |     "width": "253px"
572 |    },
573 |    "navigate_menu": true,
574 |    "number_sections": false,
575 |    "sideBar": true,
576 |    "threshold": 4,
577 |    "toc_cell": false,
578 |    "toc_section_display": "block",
579 |    "toc_window_display": false,
580 |    "widenNotebook": false
581 |   }
582 |  },
583 |  "nbformat": 4,
584 |  "nbformat_minor": 1
585 | }
586 | 


--------------------------------------------------------------------------------
/doc/notebooks/pybroom-example-multi-datasets.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PyBroom Example - Multiple Datasets\n",
  8 |     "\n",
  9 |     "*This notebook is part of* [pybroom](https://github.com/tritemio/pybroom).\n",
 10 |     "\n",
 11 |     ">This notebook demonstrate using *pybroom* when fitting **a set of curves** (curve fitting) using lmfit.Model.\n",
 12 |     ">We will show that *pybroom* greatly simplifies comparing, filtering and plotting fit results \n",
 13 |     ">from multiple datasets."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {
 20 |     "collapsed": false
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "%matplotlib inline\n",
 25 |     "%config InlineBackend.figure_format='retina'  # for hi-dpi displays\n",
 26 |     "import numpy as np\n",
 27 |     "import pandas as pd\n",
 28 |     "import matplotlib.pyplot as plt\n",
 29 |     "import seaborn as sns\n",
 30 |     "from lmfit import Model\n",
 31 |     "import lmfit\n",
 32 |     "print('lmfit: %s' % lmfit.__version__)"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {
 39 |     "collapsed": true
 40 |    },
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "sns.set_style('whitegrid')"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {
 50 |     "collapsed": true
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "import pybroom as br"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## Create Noisy Data\n",
 62 |     "\n",
 63 |     "We start simulating *N* datasets which are identical except for the additive noise."
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {
 70 |     "collapsed": true
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "N = 20"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {
 81 |     "collapsed": false
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "x = np.linspace(-10, 10, 101)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {
 92 |     "collapsed": false
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "peak1 = lmfit.models.GaussianModel(prefix='p1_')\n",
 97 |     "peak2 = lmfit.models.GaussianModel(prefix='p2_')\n",
 98 |     "model = peak1 + peak2"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {
105 |     "collapsed": false
106 |    },
107 |    "outputs": [],
108 |    "source": [
109 |     "#params = model.make_params(p1_amplitude=1.5, p2_amplitude=1, \n",
110 |     "#                           p1_sigma=1, p2_sigma=1)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {
117 |     "collapsed": false
118 |    },
119 |    "outputs": [],
120 |    "source": [
121 |     "Y_data = np.zeros((N, x.size))\n",
122 |     "Y_data.shape"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {
129 |     "collapsed": false
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "for i in range(Y_data.shape[0]):\n",
134 |     "    Y_data[i] = model.eval(x=x, p1_center=-1, p2_center=2, \n",
135 |     "                           p1_sigma=0.5, p2_sigma=1.5, \n",
136 |     "                           p1_height=1, p2_height=0.5)\n",
137 |     "Y_data += np.random.randn(*Y_data.shape)/10"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {
144 |     "collapsed": false
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "plt.plot(x, Y_data.T, '-k', alpha=0.1);"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "## Model Fitting\n",
156 |     "\n",
157 |     "### Single-peak model\n",
158 |     "\n",
159 |     "Define and fit a single Gaussian model to the $N$ datasets:"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {
166 |     "collapsed": true
167 |    },
168 |    "outputs": [],
169 |    "source": [
170 |     "model1 = lmfit.models.GaussianModel()"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {
177 |     "collapsed": false
178 |    },
179 |    "outputs": [],
180 |    "source": [
181 |     "Results1 = [model1.fit(y, x=x) for y in Y_data]"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {},
187 |    "source": [
188 |     "### Two-peaks model\n",
189 |     "\n",
190 |     "Here, instead, we use a more appropriate Gaussian mixture model. \n",
191 |     "\n",
192 |     "To fit the noisy data, the residuals (the difference between model and data)\n",
193 |     "is minimized in the least-squares sense.\n"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": null,
199 |    "metadata": {
200 |     "collapsed": true
201 |    },
202 |    "outputs": [],
203 |    "source": [
204 |     "params = model.make_params(p1_center=0, p2_center=3, \n",
205 |     "                           p1_sigma=0.5, p2_sigma=1, \n",
206 |     "                           p1_amplitude=1, p2_amplitude=2)"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {
213 |     "collapsed": false
214 |    },
215 |    "outputs": [],
216 |    "source": [
217 |     "Results = [model.fit(y, x=x, params=params) for y in Y_data]"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "metadata": {},
223 |    "source": [
224 |     "Fit results from an lmfit `Model` can be inspected with\n",
225 |     "with `fit_report` or `params.pretty_print()`:"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "metadata": {
232 |     "collapsed": false
233 |    },
234 |    "outputs": [],
235 |    "source": [
236 |     "#print(Results[0].fit_report())\n",
237 |     "#Results[0].params.pretty_print()"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "markdown",
242 |    "metadata": {},
243 |    "source": [
244 |     "This is good for peeking at the results. However,\n",
245 |     "extracting these data from lmfit objects is quite a chore\n",
246 |     "and requires good knowledge of lmfit objects structure.\n",
247 |     "\n",
248 |     "**pybroom** helps in this task: it extracts data from fit results and\n",
249 |     "returns familiar pandas DataFrame (in tidy format). \n",
250 |     "Thanks to the tidy format these data can be\n",
251 |     "much more easily manipulated, filtered and plotted."
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "markdown",
256 |    "metadata": {},
257 |    "source": [
258 |     "### Glance\n",
259 |     "\n",
260 |     "A summary of the two-peaks model fit:"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": null,
266 |    "metadata": {
267 |     "collapsed": false
268 |    },
269 |    "outputs": [],
270 |    "source": [
271 |     "dg = br.glance(Results, var_names='dataset')\n",
272 |     "\n",
273 |     "dg.drop('model', 1).drop('message', 1).head()"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "markdown",
278 |    "metadata": {},
279 |    "source": [
280 |     "A summary of the one-peak model fit:"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": null,
286 |    "metadata": {
287 |     "collapsed": false
288 |    },
289 |    "outputs": [],
290 |    "source": [
291 |     "dg1 = br.glance(Results1, var_names='dataset')\n",
292 |     "\n",
293 |     "dg1.drop('model', 1).drop('message', 1).head()"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "metadata": {},
299 |    "source": [
300 |     "### Tidy\n",
301 |     "\n",
302 |     "Tidy fit results for all the parameters:"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {
309 |     "collapsed": false
310 |    },
311 |    "outputs": [],
312 |    "source": [
313 |     "dt = br.tidy(Results, var_names='dataset')"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "markdown",
318 |    "metadata": {},
319 |    "source": [
320 |     "Let's see the results for a single dataset:"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": null,
326 |    "metadata": {
327 |     "collapsed": false
328 |    },
329 |    "outputs": [],
330 |    "source": [
331 |     "dt.query('dataset == 0')"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "markdown",
336 |    "metadata": {},
337 |    "source": [
338 |     "or for a single parameter across datasets:"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": null,
344 |    "metadata": {
345 |     "collapsed": false
346 |    },
347 |    "outputs": [],
348 |    "source": [
349 |     "dt.query('name == \"p1_center\"').head()"
350 |    ]
351 |   },
352 |   {
353 |    "cell_type": "code",
354 |    "execution_count": null,
355 |    "metadata": {
356 |     "collapsed": false
357 |    },
358 |    "outputs": [],
359 |    "source": [
360 |     "dt.query('name == \"p1_center\"')['value'].std()"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": null,
366 |    "metadata": {
367 |     "collapsed": false
368 |    },
369 |    "outputs": [],
370 |    "source": [
371 |     "dt.query('name == \"p2_center\"')['value'].std()"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "markdown",
376 |    "metadata": {},
377 |    "source": [
378 |     "Note that there is a much larger error in fitting `p2_center`\n",
379 |     "than `p1_center`."
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "execution_count": null,
385 |    "metadata": {
386 |     "collapsed": false
387 |    },
388 |    "outputs": [],
389 |    "source": [
390 |     "dt.query('name == \"p1_center\"')['value'].hist()\n",
391 |     "dt.query('name == \"p2_center\"')['value'].hist(ax=plt.gca());"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "markdown",
396 |    "metadata": {},
397 |    "source": [
398 |     "### Augment\n",
399 |     "\n",
400 |     "Tidy dataframe with data function of the independent variable ('x'). Columns include\n",
401 |     "the data being fitted, best fit, best fit components, residuals, etc."
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "code",
406 |    "execution_count": null,
407 |    "metadata": {
408 |     "collapsed": false
409 |    },
410 |    "outputs": [],
411 |    "source": [
412 |     "da = br.augment(Results, var_names='dataset')"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "code",
417 |    "execution_count": null,
418 |    "metadata": {
419 |     "collapsed": true
420 |    },
421 |    "outputs": [],
422 |    "source": [
423 |     "da1 = br.augment(Results1, var_names='dataset')"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": null,
429 |    "metadata": {
430 |     "collapsed": true
431 |    },
432 |    "outputs": [],
433 |    "source": [
434 |     "r = Results[0]"
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "markdown",
439 |    "metadata": {},
440 |    "source": [
441 |     "Let's see the results for a single dataset:"
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "code",
446 |    "execution_count": null,
447 |    "metadata": {
448 |     "collapsed": false
449 |    },
450 |    "outputs": [],
451 |    "source": [
452 |     "da.query('dataset == 0').head()"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "markdown",
457 |    "metadata": {},
458 |    "source": [
459 |     "Plotting a single dataset is simplified compared to a manual plot:"
460 |    ]
461 |   },
462 |   {
463 |    "cell_type": "code",
464 |    "execution_count": null,
465 |    "metadata": {
466 |     "collapsed": false
467 |    },
468 |    "outputs": [],
469 |    "source": [
470 |     "da0 = da.query('dataset == 0')"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "code",
475 |    "execution_count": null,
476 |    "metadata": {
477 |     "collapsed": false
478 |    },
479 |    "outputs": [],
480 |    "source": [
481 |     "plt.plot('x', 'data', data=da0, marker='o', ls='None')\n",
482 |     "plt.plot('x', \"Model(gaussian, prefix='p1_')\", data=da0, lw=2, ls='--')\n",
483 |     "plt.plot('x', \"Model(gaussian, prefix='p2_')\", data=da0, lw=2, ls='--')\n",
484 |     "plt.plot('x', 'best_fit', data=da0, lw=2);\n",
485 |     "plt.legend()"
486 |    ]
487 |   },
488 |   {
489 |    "cell_type": "markdown",
490 |    "metadata": {},
491 |    "source": [
492 |     "But keep in mind that, for a single dataset, we could\n",
493 |     "use the lmfit method as well (which is even simpler):"
494 |    ]
495 |   },
496 |   {
497 |    "cell_type": "code",
498 |    "execution_count": null,
499 |    "metadata": {
500 |     "collapsed": false
501 |    },
502 |    "outputs": [],
503 |    "source": [
504 |     "Results[0].plot_fit();"
505 |    ]
506 |   },
507 |   {
508 |    "cell_type": "markdown",
509 |    "metadata": {},
510 |    "source": [
511 |     "However, things become much more interesting when we want to plot multiple\n",
512 |     "datasets or models as in the next section."
513 |    ]
514 |   },
515 |   {
516 |    "cell_type": "markdown",
517 |    "metadata": {},
518 |    "source": [
519 |     "#### Comparison of different datasets"
520 |    ]
521 |   },
522 |   {
523 |    "cell_type": "code",
524 |    "execution_count": null,
525 |    "metadata": {
526 |     "collapsed": false
527 |    },
528 |    "outputs": [],
529 |    "source": [
530 |     "grid = sns.FacetGrid(da.query('dataset < 6'), col=\"dataset\", hue=\"dataset\", col_wrap=3)\n",
531 |     "grid.map(plt.plot, 'x', 'data', marker='o', ls='None', ms=3, color='k')\n",
532 |     "grid.map(plt.plot, 'x', \"Model(gaussian, prefix='p1_')\", ls='--')\n",
533 |     "grid.map(plt.plot, 'x', \"Model(gaussian, prefix='p2_')\", ls='--')\n",
534 |     "grid.map(plt.plot, \"x\", \"best_fit\");"
535 |    ]
536 |   },
537 |   {
538 |    "cell_type": "markdown",
539 |    "metadata": {},
540 |    "source": [
541 |     "#### Comparison of one- or two-peaks models"
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "markdown",
546 |    "metadata": {},
547 |    "source": [
548 |     "Here we plot a comparison of the two fitted models (one or two peaks)\n",
549 |     "for different datasets.\n",
550 |     "\n",
551 |     "First we create a single tidy DataFrame with data from the two models:"
552 |    ]
553 |   },
554 |   {
555 |    "cell_type": "code",
556 |    "execution_count": null,
557 |    "metadata": {
558 |     "collapsed": false
559 |    },
560 |    "outputs": [],
561 |    "source": [
562 |     "da['model'] = 'twopeaks'\n",
563 |     "da1['model'] = 'onepeak'\n",
564 |     "da_tot = pd.concat([da, da1], ignore_index=True)"
565 |    ]
566 |   },
567 |   {
568 |    "cell_type": "markdown",
569 |    "metadata": {},
570 |    "source": [
571 |     "Then we perfom a facet plot with seaborn:"
572 |    ]
573 |   },
574 |   {
575 |    "cell_type": "code",
576 |    "execution_count": null,
577 |    "metadata": {
578 |     "collapsed": false
579 |    },
580 |    "outputs": [],
581 |    "source": [
582 |     "grid = sns.FacetGrid(da_tot.query('dataset < 6'), col=\"dataset\", hue=\"model\", col_wrap=3)\n",
583 |     "grid.map(plt.plot, 'x', 'data', marker='o', ls='None', ms=3, color='k')\n",
584 |     "grid.map(plt.plot, \"x\", \"best_fit\")\n",
585 |     "grid.add_legend();"
586 |    ]
587 |   },
588 |   {
589 |    "cell_type": "markdown",
590 |    "metadata": {},
591 |    "source": [
592 |     "Note that the \"tidy\" organization of data allows plot libraries such as seaborn\n",
593 |     "to automatically infer most information to create complex plots with simple commands.\n",
594 |     "Without tidy data, instead, a manual creation of such plots becomes a daunting task."
595 |    ]
596 |   }
597 |  ],
598 |  "metadata": {
599 |   "kernelspec": {
600 |    "display_name": "Python 3",
601 |    "language": "python",
602 |    "name": "python3"
603 |   },
604 |   "language_info": {
605 |    "codemirror_mode": {
606 |     "name": "ipython",
607 |     "version": 3
608 |    },
609 |    "file_extension": ".py",
610 |    "mimetype": "text/x-python",
611 |    "name": "python",
612 |    "nbconvert_exporter": "python",
613 |    "pygments_lexer": "ipython3",
614 |    "version": "3.5.2"
615 |   }
616 |  },
617 |  "nbformat": 4,
618 |  "nbformat_minor": 0
619 | }
620 | 


--------------------------------------------------------------------------------
/doc/notebooks/pybroom-example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PyBroom Example - Simple"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {
 13 |     "raw_mimetype": "text/markdown"
 14 |    },
 15 |    "source": [
 16 |     "*This notebook is part of* [pybroom](https://github.com/tritemio/pybroom).\n",
 17 |     "\n",
 18 |     ">This notebook shows the simplest usage of *pybroom* when performing\n",
 19 |     ">a curve fit of a single dataset. Possible applications are only hinted.\n",
 20 |     ">For a more complex (and interesting!) example using multiple datasets see\n",
 21 |     ">[pybroom-example-multi-datasets](pybroom-example-multi-datasets.ipynb)."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {
 28 |     "collapsed": true
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import numpy as np\n",
 33 |     "from numpy import sqrt, pi, exp, linspace\n",
 34 |     "from lmfit import Model\n",
 35 |     "import matplotlib.pyplot as plt\n",
 36 |     "%matplotlib inline\n",
 37 |     "%config InlineBackend.figure_format='retina'  # for hi-dpi displays"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": false
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "import lmfit\n",
 49 |     "print('lmfit: %s' % lmfit.__version__)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "collapsed": true
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "import pybroom as br"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "## Create Noisy Data"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "collapsed": false
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "x = np.linspace(-10, 10, 101)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {
 85 |     "collapsed": false
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "peak1 = lmfit.models.GaussianModel(prefix='p1_')\n",
 90 |     "peak2 = lmfit.models.GaussianModel(prefix='p2_')\n",
 91 |     "model = peak1 + peak2"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [],
101 |    "source": [
102 |     "params = model.make_params(p1_amplitude=1, p2_amplitude=1, \n",
103 |     "                           p1_sigma=1, p2_sigma=1)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {
110 |     "collapsed": false
111 |    },
112 |    "outputs": [],
113 |    "source": [
114 |     "y_data = model.eval(x=x, p1_center=-1, p2_center=2, p1_sigma=0.5, p2_sigma=1, p1_amplitude=1, p2_amplitude=2)\n",
115 |     "y_data.shape"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {
122 |     "collapsed": false
123 |    },
124 |    "outputs": [],
125 |    "source": [
126 |     "y_data += np.random.randn(*y_data.shape)/10"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "metadata": {
133 |     "collapsed": false
134 |    },
135 |    "outputs": [],
136 |    "source": [
137 |     "plt.plot(x, y_data)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "## Model Fitting"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "params = model.make_params(p1_center=0, p2_center=3, \n",
156 |     "                           p1_sigma=0.5, p2_sigma=1, \n",
157 |     "                           p1_amplitude=1, p2_amplitude=2)\n",
158 |     "result = model.fit(y_data, x=x, params=params)"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "markdown",
163 |    "metadata": {},
164 |    "source": [
165 |     "Fit result from an lmfit `Model` can be inspected with\n",
166 |     "with `fit_report` or `params.pretty_print()`:"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "metadata": {
173 |     "collapsed": false
174 |    },
175 |    "outputs": [],
176 |    "source": [
177 |     "print(result.fit_report())"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {
184 |     "collapsed": false
185 |    },
186 |    "outputs": [],
187 |    "source": [
188 |     "result.params.pretty_print()"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "markdown",
193 |    "metadata": {},
194 |    "source": [
195 |     "These methods a re convenient but extracting the data\n",
196 |     "from the lmfit object requires some work and the knowledge\n",
197 |     "of lmfit object structure.\n",
198 |     "\n",
199 |     "pybroom comes to help, extracting data from fit results and\n",
200 |     "returning  pandas DataFrame in tidy format that can be \n",
201 |     "much more easily manipulated, filtered and plotted."
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "markdown",
206 |    "metadata": {},
207 |    "source": [
208 |     "### Glance\n",
209 |     "\n",
210 |     "Glancing at the fit results (dropping some verbose columns):"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {
217 |     "collapsed": false
218 |    },
219 |    "outputs": [],
220 |    "source": [
221 |     "dg = br.glance(result)\n",
222 |     "dg.drop('model', 1).drop('message', 1)"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "markdown",
227 |    "metadata": {},
228 |    "source": [
229 |     "The [glance](http://pybroom.readthedocs.io/en/latest/api.html#pybroom.glance) \n",
230 |     "function returns a DataFrame with one row per fit-result object."
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "#### Application Idea\n",
238 |     "\n",
239 |     "If you fit *N* models to the same dataset\n",
240 |     "you can compare statistics such as reduced-$\\chi^2$\n",
241 |     "\n",
242 |     "Or, fitting several with several methods (and datasets) you\n",
243 |     "can study the convergence properties using reduced-$\\chi^2$,\n",
244 |     "number of function evaluation and success rate."
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "markdown",
249 |    "metadata": {},
250 |    "source": [
251 |     "### Tidy\n",
252 |     "\n",
253 |     "Tidy fit results for all the parameters:"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {
260 |     "collapsed": false
261 |    },
262 |    "outputs": [],
263 |    "source": [
264 |     "dt = br.tidy(result)\n",
265 |     "dt"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {},
271 |    "source": [
272 |     "The [tidy](http://pybroom.readthedocs.io/en/latest/api.html#pybroom.tidy) function returns one row for each parameter."
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": null,
278 |    "metadata": {
279 |     "collapsed": false
280 |    },
281 |    "outputs": [],
282 |    "source": [
283 |     "dt.loc[dt.name == 'p1_center']"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "markdown",
288 |    "metadata": {},
289 |    "source": [
290 |     "### Augment\n",
291 |     "\n",
292 |     "Tidy dataframe with data function of the independent variable ('x'). Columns include\n",
293 |     "the data being fitted, best fit, best fit components, residuals, etc."
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": null,
299 |    "metadata": {
300 |     "collapsed": false
301 |    },
302 |    "outputs": [],
303 |    "source": [
304 |     "da = br.augment(result)\n",
305 |     "da.head()"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "markdown",
310 |    "metadata": {},
311 |    "source": [
312 |     "The [augment]() function returns one row for each data point."
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": null,
318 |    "metadata": {
319 |     "collapsed": true
320 |    },
321 |    "outputs": [],
322 |    "source": [
323 |     "d = br.augment(result)"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {
330 |     "collapsed": false
331 |    },
332 |    "outputs": [],
333 |    "source": [
334 |     "fig, ax = plt.subplots(2, 1, figsize=(7, 8))\n",
335 |     "ax[1].plot('x', 'data', data=d, marker='o', ls='None')\n",
336 |     "ax[1].plot('x', \"Model(gaussian, prefix='p1_')\", data=d, lw=2, ls='--')\n",
337 |     "ax[1].plot('x', \"Model(gaussian, prefix='p2_')\", data=d, lw=2, ls='--')\n",
338 |     "ax[1].plot('x', 'best_fit', data=d, lw=2)\n",
339 |     "ax[0].plot('x', 'residual', data=d);"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "markdown",
344 |    "metadata": {},
345 |    "source": [
346 |     "#### Application Idea\n",
347 |     "\n",
348 |     "Fitting *N* datasets with the same model (or *N* models with the same dataset) \n",
349 |     "you can automatically build a panel plot with `seaborn` using the dataset (or the model) \n",
350 |     "as categorical variable.\n",
351 |     "This example is illustrated in [pybroom-example-multi-datasets](pybroom-example-multi-datasets.ipynb)."
352 |    ]
353 |   }
354 |  ],
355 |  "metadata": {
356 |   "kernelspec": {
357 |    "display_name": "Python 3",
358 |    "language": "python",
359 |    "name": "python3"
360 |   },
361 |   "language_info": {
362 |    "codemirror_mode": {
363 |     "name": "ipython",
364 |     "version": 3
365 |    },
366 |    "file_extension": ".py",
367 |    "mimetype": "text/x-python",
368 |    "name": "python",
369 |    "nbconvert_exporter": "python",
370 |    "pygments_lexer": "ipython3",
371 |    "version": "3.5.2"
372 |   }
373 |  },
374 |  "nbformat": 4,
375 |  "nbformat_minor": 0
376 | }
377 | 


--------------------------------------------------------------------------------
/doc/rtd_requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>=1.4
2 | pandas>=0.18.1
3 | lmfit
4 | nbsphinx
5 | ipykernel
6 | matplotlib>=1.5.1
7 | seaborn
8 | 


--------------------------------------------------------------------------------
/doc/whatsnew.rst:
--------------------------------------------------------------------------------
 1 | Pybroom's Release Notes
 2 | =======================
 3 | 
 4 | Version 0.2
 5 | -----------
 6 | 
 7 | - Improved support for `scipy.optimize` fit result.
 8 | - In addition to list of fit results, `pybroom` now supports:
 9 | 
10 |     - dict of fit results,
11 |     - dict of lists of fit results
12 |     - any other nested combination.
13 | 
14 | - When input contains a dict, pybroom adds "key" column of type
15 |   `pandas.Categorical`.
16 |   When input contains a list, pybroom adds a "key" column (i.e. list index)
17 |   of type `int64`.
18 | - Updated and expanded documentation and notebooks.
19 | 
20 | Version 0.1
21 | -----------
22 | 
23 | - Support `lmfit` and `scipy.optimize` fit results.
24 | - Support lists of fit results.
25 | 


--------------------------------------------------------------------------------
/pybroom.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (c) 2016 Antonino Ingargiola and contributors.
  3 | #
  4 | """
  5 | This module contains the 3 main pybroom's functions:
  6 | 
  7 | - :func:`glance`
  8 | - :func:`tidy`
  9 | - :func:`augment`
 10 | 
 11 | These functions take one or multiple fit results as input and return a
 12 | "tidy" (or long-form) DataFrame.
 13 | The `glance` function returns fit statistics, one for each
 14 | fit result (e.g. fit method, number of iterations, chi-square etc.).
 15 | The `tidy` function returns data for each fitted parameter
 16 | (e.g. fitted value, gradient, bounds, etc.).
 17 | The `augment` function returns data with the same size as the fitted
 18 | data points (evaluated best-fit model, residuals, etc.).
 19 | 
 20 | In the case of multiple fit results, pybroom functions accept a list, a
 21 | dict or a nested structure of dict and lists (for example a dict of lists
 22 | of fit results). The example below shows some use cases.
 23 | 
 24 | Note:
 25 |     pybroom functions are particularly convenient when tidying a
 26 |     collection of fit results. The following examples are valid for
 27 |     all the 3 pybroom functions. If `results` is a list
 28 |     of datasets (e.g. data replicates), the returned dataframe will
 29 |     have an additional "index" column containing the index of the
 30 |     dataset in the list. If `results` is a dict of fit results (e.g.
 31 |     results from different fit methods or models on the same dataset),
 32 |     then the "index" column contains the keys of the dict (each key
 33 |     identifies a fit result). In the previous two example, `var_names`
 34 |     should contains the name of the "index" column (a string).
 35 |     Nested structures are also possible. For example, when fitting
 36 |     a list of datasets with different methods, we can build a dict
 37 |     of lists of fit results where the dict keys are the method names
 38 |     and the items in the list are fit results for the different datasets.
 39 |     In this case the returned dataframe has two additional "index"
 40 |     columns: one with the dict keys and one with the list index.
 41 |     The tuple (key, list index) identifies each single fit result.
 42 |     In this case `var_names` should be a list of column names for
 43 |     the keys and index column respectively (list of strings)
 44 | 
 45 | 
 46 | Example:
 47 |     The following examples shows pybroom output when multiple fit results
 48 |     are used. The `glance` function is used as example but the same logic
 49 |     (and input arguments) can be also passsed to `tidy` and `augment`.
 50 | 
 51 |     Input is a list of fit results::
 52 | 
 53 |         >>> results = [fit_res1, fit_res2, fit_res3]
 54 |         >>> br.glance(results, var_names='dataset')
 55 | 
 56 |           num_params num_data_points      redchi      AIC  dataset
 57 |         0          6             101  0.00911793 -468.634        0
 58 |         1          6             101  0.00996431 -459.669        1
 59 |         2          6             101   0.0109456 -450.183        2
 60 | 
 61 |     Input is a dict of fit results::
 62 | 
 63 |         >>> results = {'A': fit_res1, 'B': fit_res2, 'C': fit_res3}
 64 |         >>> br.glance(results, var_names='function')
 65 | 
 66 |           num_params num_data_points      redchi      AIC function
 67 |         0          6             101  0.00911793 -468.634        A
 68 |         1          6             101  0.00996431 -459.669        B
 69 |         2          6             101   0.0109456 -450.183        C
 70 | 
 71 |     Input is a dict of lists of fit results::
 72 | 
 73 |         >>> results = {'A': [fit_res1, fit_res2], 'B': [fit_res3, fit_res4]}
 74 |         >>> br.glance(results, var_names=['function', 'dataset'])
 75 | 
 76 |           num_params num_data_points      redchi      AIC  dataset function
 77 |         0          6             101  0.00911793 -468.634        0        A
 78 |         1          6             101  0.00996431 -459.669        1        A
 79 |         2          6             101   0.0109456 -450.183        0        B
 80 |         3          6             101   0.0176529 -401.908        1        B
 81 | 
 82 | 
 83 | """
 84 | from collections import OrderedDict, namedtuple
 85 | import numpy as np
 86 | import pandas as pd
 87 | import scipy.optimize as so
 88 | import lmfit
 89 | 
 90 | __version__ = '0.3.dev0'
 91 | 
 92 | 
 93 | def tidy(result, var_names='key', **kwargs):
 94 |     """Tidy DataFrame containing fitted parameter data from `result`.
 95 | 
 96 |     A function to tidy any of the supported fit result
 97 |     (or a list of fit results). This function will identify input type
 98 |     and call the relative "specialized" tidying function. When the input
 99 |     is a list, the returned DataFrame contains data from all the fit
100 |     results.
101 |     Supported fit result objects are `lmfit.ModelResult`,
102 |     `lmfit.MinimizeResult` and `scipy.optimize.OptimizeResult`.
103 | 
104 |     Arguments:
105 |         result (fit result object or list): one of the supported fit result
106 |             objects or a list of supported fit result objects. When a list,
107 |             all the elements need to be of the same type.
108 |         var_names (string or list): name(s) of the column(s) containing
109 |             an "index" that is different for each element in the set of
110 |             fit results.
111 |         param_names (string or list of string): names of the fitted parameters
112 |             for fit results which don't include parameter's names
113 |             (such as scipy's OptimizeResult). It can either be a list of
114 |             strings or a single string with space-separated names.
115 |         **kwargs: additional arguments passed to the underlying specialized
116 |             tidying function.
117 | 
118 |     Returns:
119 |         A DataFrame with one row for each fitted parameter.
120 |         Columns include parameter properties such as best-fit value,
121 |         standard error, eventual bounds/constrains, etc.
122 |         When a list of fit-result objects is passed, the column `var_name`
123 |         (`'item'` by default) contains the index of the object
124 |         in the list.
125 | 
126 |     See also:
127 |         For more details on the returned DataFrame and on additional
128 |         arguments refer to the specialized tidying functions:
129 |         :func:`tidy_lmfit_result` and :func:`tidy_scipy_result`.
130 |     """
131 |     # Find out what result is and call the relevant function
132 |     if isinstance(result, so.OptimizeResult):
133 |         if 'param_names' not in kwargs:
134 |             msg = "The argument `param_names` is required for this input type."
135 |             raise ValueError(msg)
136 |         return tidy_scipy_result(result, **kwargs)
137 |     elif (isinstance(result, lmfit.model.ModelResult) or
138 |           isinstance(result, lmfit.minimizer.MinimizerResult)):
139 |         return tidy_lmfit_result(result)
140 |     elif isinstance(result, list) or isinstance(result, dict):
141 |         return _multi_dataframe(tidy, result, var_names, **kwargs)
142 |     else:
143 |         msg = 'Sorry, `tidy` does not support this object type (%s)'
144 |         raise NotImplementedError(msg % type(result))
145 | 
146 | 
147 | def glance(results, var_names='key', **kwargs):
148 |     """Tidy DataFrame containing fit summaries from`result`.
149 | 
150 |     A function to tidy any of the supported fit result
151 |     (or a list of fit results). This function will identify input type
152 |     and call the relative "specialized" tidying function. When the input
153 |     is a list, the returned DataFrame contains data from all the fit
154 |     results.
155 |     Supported fit result objects are `lmfit.ModelResult`,
156 |     `lmfit.MinimizeResult` and `scipy.optimize.OptimizeResult`.
157 | 
158 |     Arguments:
159 |         result (fit result object or list): one of the supported fit result
160 |             objects or a list of supported fit result objects. When a list,
161 |             all the elements need to be of the same type.
162 |         var_names (string or list): name(s) of the column(s) containing
163 |             an "index" that is different for each element in the set of
164 |             fit results.
165 |         **kwargs: additional arguments passed to the underlying specialized
166 |             tidying function.
167 | 
168 |     Returns:
169 |         A DataFrame with one row for each passed fit result.
170 |         Columns include fit summaries such as reduced chi-square,
171 |         number of evaluation, successful convergence, AIC, BIC, etc.
172 |         When a list of fit-result objects is passed, the column `var_name`
173 |         (`'item'` by default) contains the index of the object
174 |         in the list.
175 | 
176 |     See also:
177 |         For more details on the returned DataFrame and on additional
178 |         arguments refer to the specialized tidying functions:
179 |         :func:`glance_lmfit_result` and :func:`glance_scipy_result`.
180 |     """
181 |     if isinstance(results, so.OptimizeResult):
182 |         return glance_scipy_result(results, **kwargs)
183 |     elif (isinstance(results, lmfit.model.ModelResult) or
184 |           isinstance(results, lmfit.minimizer.MinimizerResult)):
185 |         return glance_lmfit_result(results)
186 |     elif isinstance(results, list) or isinstance(results, dict):
187 |         return _multi_dataframe(glance, results, var_names, **kwargs)
188 |     else:
189 |         msg = 'Sorry, `glance` does not support this object type (%s)'
190 |         raise NotImplementedError(msg % type(results))
191 | 
192 | 
193 | def augment(results, var_names='key', **kwargs):
194 |     """Tidy DataFrame containing fit data from `result`.
195 | 
196 |     A function to tidy any of the supported fit result
197 |     (or a list of fit results). This function will identify input type
198 |     and call the relative "specialized" tidying function. When the input
199 |     is a list or a dict of fit results, the returned DataFrame contains
200 |     data from all the fit results. In this case data from different fit
201 |     results is identified by the values in the additional "index"
202 |     (or categorical) column(s) whose name(s) are specified in `var_names`.
203 | 
204 |     Arguments:
205 |         results (fit result object or list): one of the supported fit result
206 |             objects or a list of supported fit result objects. When a list,
207 |             all the elements need to be of the same type.
208 |         var_names (string or list): name(s) of the column(s) containing
209 |             an "index" that is different for each element in the set of
210 |             fit results. See the example section below.
211 |         **kwargs: additional arguments passed to the underlying specialized
212 |             tidying function.
213 | 
214 |     Returns:
215 |         A DataFrame with one row for each data point used in the fit.
216 |         It contains the input data, the model evaluated at the data points
217 |         with best fitted parameters, error ranges, etc.
218 |         When a list of fit-result objects is passed, the column `var_name`
219 |         (`'item'` by default) contains the index of the object
220 |         in the list.
221 | 
222 |     """
223 |     if isinstance(results, lmfit.model.ModelResult):
224 |         return _augment_lmfit_modelresult(results)
225 |     elif isinstance(results, list) or isinstance(results, dict):
226 |         return _multi_dataframe(augment, results, var_names, **kwargs)
227 |     else:
228 |         msg = 'Sorry, `augment` does not support this object type (%s)'
229 |         raise NotImplementedError(msg % type(results))
230 | 
231 | 
232 | def _as_odict_copy(results):
233 |     """Transform input into a OrderedDict, if needed. Returns a copy.
234 |     """
235 |     iterator = enumerate(results)
236 |     if isinstance(results, dict):
237 |         iterator = results.items()
238 |     return OrderedDict((k, v) for k, v in iterator)
239 | 
240 | 
241 | def _as_list_of_strings_copy(var_names):
242 |     """Transform input into a list of strings, if needed. Returns a copy.
243 |     """
244 |     if isinstance(var_names, str):
245 |         var_names = [var_names]
246 |     return var_names.copy()
247 | 
248 | 
249 | def _multi_dataframe(func, results, var_names, **kwargs):
250 |     """Recursively call `func` on each item in `results` and concatenate output.
251 | 
252 |     Usually `func` is :func:`glance`, :func:`tidy` or :func:`augment`.
253 |     The function `func` is also the calling function, therefore this implements
254 |     a recursion which unpacks the nested `results` structure (a tree) and
255 |     builds a global tidy DataFrame with "key" columns corresponding to
256 |     the `results` structure.
257 | 
258 |     Arguments:
259 |         func (function): function of the called on each element of `results`.
260 |             Chose between `glance`, `tidy` or `augment`.
261 |         results (dict or list): collection of fit results. It can be a list,
262 |             a dict or a nested structure such as a dict of lists.
263 |         var_names (list or string): names of DataFrame columns used to index
264 |             the results. It can be a list of strings or single string in case
265 |             only one categorical "index" is needed (i.e. a string is equivalent
266 |             to a 1-element list of strings).
267 | 
268 |     Returns:
269 |         "Tidy" DataFrame merging data from all the items in `results`.
270 |         Necessary "key" columns are added to encode layout of fitting result
271 |         objects in `results`.
272 |     """
273 |     if isinstance(results, so.OptimizeResult):
274 |         raise ValueError('Input argument has wrong type: `OptimizeResult`.')
275 |     if len(var_names) == 0:
276 |         msg = ('The list `var_names` is too short. Its length should be equal '
277 |                'to the nesting levels in `results`.')
278 |         raise ValueError(msg)
279 |     d = _as_odict_copy(results)
280 |     var_names = _as_list_of_strings_copy(var_names)
281 |     var_name = var_names.pop(0)
282 |     for i, (key, res) in enumerate(d.items()):
283 |         d[key] = func(res, var_names, **kwargs)
284 |         d[key][var_name] = key
285 |     df = pd.concat(d, ignore_index=True)
286 |     # Convert "key" column to categorical only if input was dict-type
287 |     # not list/tuple.
288 |     if isinstance(results, dict):
289 |         kw = {var_name: lambda x: pd.Categorical(x[var_name], ordered=True)}
290 |         df = df.assign(**kw)
291 |     return df
292 | 
293 | 
294 | def tidy_lmfit_result(result):
295 |     """Tidy parameters from lmfit's  `ModelResult` or `MinimizerResult`.
296 | 
297 |     Normally this function is not called directly but invoked by the
298 |     general purpose function :func:`tidy`.
299 | 
300 |     Arguments:
301 |         result (`ModelResult` or `MinimizerResult`): the fit result object.
302 | 
303 |     Returns:
304 |         A DataFrame in tidy format with one row for each parameter.
305 | 
306 |     Note:
307 |         The (possible) columns of the returned DataFrame are:
308 | 
309 |         - `name` (string): name of the parameter.
310 |         - `value` (number): value of the parameter after the optimization.
311 |         - `init_value` (number): initial value of the parameter before the
312 |           optimization.
313 |         - `min`, `max` (numbers): bounds of the parameter
314 |         - `vary` (bool): whether the parameter has been varied during the
315 |           optimization.
316 |         - `expr` (string): constraint expression for the parameter.
317 |         - `stderr` (float): standard error for the parameter.
318 |     """
319 |     params_attrs = ['name', 'value', 'min', 'max', 'vary', 'expr', 'stderr']
320 |     columns = params_attrs + ['init_value']
321 |     d = pd.DataFrame(index=range(result.nvarys), columns=columns)
322 |     for i, (name, param) in enumerate(sorted(result.params.items())):
323 |         for p in params_attrs:
324 |             d.loc[i, p] = getattr(param, p)
325 |         # Derived parameters may not have init value
326 |         if name in result.init_values:
327 |             d.loc[i, 'init_value'] = result.init_values[name]
328 |     return d.apply(pd.to_numeric, errors='ignore')
329 | 
330 | 
331 | def tidy_scipy_result(result, param_names, **kwargs):
332 |     """Tidy parameters data from scipy's `OptimizeResult`.
333 | 
334 |     Normally this function is not called directly but invoked by the
335 |     general purpose function :func:`tidy`.
336 |     Since `OptimizeResult` has a raw array of fitted parameters
337 |     but no names, the parameters' names need to be passed in `param_names`.
338 | 
339 |     Arguments:
340 |         result (`OptimizeResult`): the fit result object.
341 |         param_names (string or list of string): names of the fitted parameters.
342 |             It can either be a list of strings or a single string with
343 |             space-separated names.
344 | 
345 |     Returns:
346 |         A DataFrame in tidy format with one row for each parameter.
347 | 
348 |     Note:
349 |         These two columns are always present in the returned DataFrame:
350 | 
351 |         - `name` (string): name of the parameter.
352 |         - `value` (number): value of the parameter after the optimization.
353 | 
354 |         Optional columns (depending on the type of result) are:
355 | 
356 |         - `grad` (float): gradient for each parameter
357 |         - `active_mask` (int)
358 |     """
359 |     Params = namedtuple('Params', param_names)
360 |     params = Params(*result.x)
361 |     df = dict_to_tidy(params._asdict(), **kwargs)
362 |     for var in ('grad', 'active_mask'):
363 |         if hasattr(result, var):
364 |             df[var] = result[var]
365 |     return df
366 | 
367 | 
368 | def glance_scipy_result(result):
369 |     """Tidy summary statistics from scipy's `OptimizeResult`.
370 | 
371 |     Normally this function is not called directly but invoked by the
372 |     general purpose function :func:`glance`.
373 | 
374 |     Arguments:
375 |         result (`OptimizeResult`): the fit result object.
376 | 
377 |     Returns:
378 |         A DataFrame in tidy format with one row and several summary statistics
379 |         as columns.
380 | 
381 |     Note:
382 |         Possible columns of the returned DataFrame include:
383 | 
384 |         - `success` (bool): whether the fit succeed
385 |         - `cost` (float): cost function
386 |         - `optimality` (float): optimality parameter as returned by
387 |           scipy.optimize.least_squares.
388 |         - `nfev` (int): number of objective function evaluations
389 |         - `njev` (int): number of jacobian function evaluations
390 |         - `nit` (int): number of iterations
391 |         - `status` (int): status returned by the fit routine
392 |         - `message` (string): message returned by the fit routine
393 |     """
394 |     attr_names_all = ['success', 'cost', 'optimality', 'nfev', 'njev', 'nit'
395 |                       'status', 'message']
396 |     attr_names = [a for a in attr_names_all if hasattr(result, a)]
397 |     if hasattr(result, 'fun') and np.size(result.fun) == 1:
398 |         attr_names.append('fun')
399 |     d = pd.DataFrame(index=range(1), columns=attr_names)
400 |     for attr_name in attr_names:
401 |         d.loc[0, attr_name] = getattr(result, attr_name)
402 |     return d.apply(pd.to_numeric, errors='ignore')
403 | 
404 | 
405 | def glance_lmfit_result(result):
406 |     """Tidy summary statistics from lmfit's `ModelResult` or `MinimizerResult`.
407 | 
408 |     Normally this function is not called directly but invoked by the
409 |     general purpose function :func:`glance`.
410 | 
411 |     Arguments:
412 |         result (`ModelResult` or `MinimizerResult`): the fit result object.
413 | 
414 |     Returns:
415 |         A DataFrame in tidy format with one row and several summary statistics
416 |         as columns.
417 | 
418 |     Note:
419 |         The columns of the returned DataFrame are:
420 | 
421 |         - `model` (string): model name (only for `ModelResult`)
422 |         - `method` (string): method used for the optimization (e.g. `leastsq`).
423 |         - `num_params` (int): number of varied parameters
424 |         - `ndata` (int):
425 |         - `chisqr` (float): chi-square statistics.
426 |         - `redchi` (float): reduced chi-square statistics.
427 |         - `AIC` (float): Akaike Information Criterion statistics.
428 |         - `BIC` (float): Bayes Information Criterion statistics.
429 |         - `num_func_eval` (int): number of evaluations of the objective
430 |           function during the fit.
431 |         - `num_data_points` (int): number of data points (e.g. samples) used
432 |           for the fit.
433 | 
434 |     """
435 |     def _is_modelresult(res):
436 |         return hasattr(res, 'model')
437 |     result_attrs = ['name', 'method', 'nvarys', 'ndata', 'chisqr', 'redchi',
438 |                     'aic', 'bic', 'nfev', 'success', 'message']
439 |     attrs_map = OrderedDict((n, n) for n in result_attrs)
440 |     attrs_map['name'] = 'model'
441 |     attrs_map['aic'] = 'AIC'
442 |     attrs_map['bic'] = 'BIC'
443 |     attrs_map['nvarys'] = 'num_params'
444 |     attrs_map['nfev'] = 'num_func_eval'
445 |     attrs_map['ndata'] = 'num_data_points'
446 |     # ModelResult has attribute `.model.name`, MinimizerResult does not
447 |     if not _is_modelresult(result):
448 |         attrs_map.pop('name')
449 |     d = pd.DataFrame(index=range(1), columns=attrs_map.values())
450 |     if _is_modelresult(result):
451 |         d.loc[0, attrs_map.pop('name')] = result.model.name
452 |     for attr_name, df_name in attrs_map.items():
453 |         d.loc[0, df_name] = getattr(result, attr_name)
454 |     #d.loc[0, 'num_components'] = len(result.components)
455 |     if hasattr(result, 'kws') and result.kws is not None:
456 |         for key, value in result.kws.items():
457 |             d['_'.join((result.method, key))] = value
458 |     return d.apply(pd.to_numeric, errors='ignore')
459 | 
460 | 
461 | def _augment_lmfit_modelresult(result):
462 |     """Tidy data values and fitted model from `lmfit.model.ModelResult`.
463 |     """
464 |     columns = ['x', 'data', 'best_fit', 'residual']
465 |     d = pd.DataFrame(index=range(result.ndata), columns=columns)
466 |     for col in columns[1:]:
467 |         d.loc[:, col] = getattr(result, col)
468 | 
469 |     independent_vars = result.model.independent_vars
470 |     if len(independent_vars) == 1:
471 |         independent_var = independent_vars[0]
472 |     else:
473 |         msg = ('Only 1 independent variable is currently supported.\n'
474 |                'Found independent variables: %s' % str(independent_vars))
475 |         raise NotImplementedError(msg)
476 | 
477 |     x_array = result.userkws[independent_var]
478 |     d.loc[:, 'x'] = x_array
479 | 
480 |     if len(result.components) > 1:
481 |         comp_names = [c.name for c in result.components]
482 |         for cname, comp in zip(comp_names, result.components):
483 |             d.loc[:, cname] = comp.eval(x=d.x, **result.values)
484 |     return d.apply(pd.to_numeric, errors='ignore')
485 | 
486 | 
487 | def tidy_to_dict(df, key='name', value='value', keys_exclude=None,
488 |                  cast_value=float):
489 |     """Convert a tidy DataFrame into a dictionary.
490 | 
491 |     This function converts two columns from an input tidy (or long-form)
492 |     DataFrame into a dictionary. A typical use-case is passing
493 |     parameters stored in tidy DataFrame to a python function. The arguments
494 |     `key` and `value` contain the name of the DataFrame columns containing
495 |     the keys and the values of the dictionary.
496 | 
497 |     Arguments:
498 |         df (pandas.DataFrame): the "tidy" DataFrame containing the data.
499 |             Two columns of this DataFrame should contain the keys and the
500 |             values to construct the dictionary.
501 |         key (string or scalar): name of the DataFrame column containing
502 |             the keys of the dictionary.
503 |         value (string or scalar ): name of the DataFrame column containing
504 |             the values of the dictionary.
505 |         keys_exclude (iterable or None): list of keys excluded when building
506 |             the returned dictionary.
507 |         cast_value (callable or None): callable used to cast
508 |             the value of each item in the dictionary. If None, no casting
509 |             is performed and the resulting values are 1-element
510 |             `pandas.Series`. Default is the python built-in `float`.
511 |             Other typical values may be `int` or `str`.
512 | 
513 |     Returns:
514 |         A dictionary with keys and values extracted from the input (tidy)
515 |         DataFrame.
516 | 
517 |     See also: :func:`dict_to_tidy`.
518 |     """
519 |     keys_list = set(df[key])
520 |     if keys_exclude is not None:
521 |         keys_list = keys_list - set(keys_exclude)
522 |     if cast_value is None:
523 |         cast_value = lambda x: x
524 |     return {var: cast_value(df.loc[df[key] == var, value])
525 |             for var in keys_list}
526 | 
527 | 
528 | def dict_to_tidy(dc, key='name', value='value', keys_exclude=None):
529 |     """Convert a dictionary into a tidy DataFrame.
530 | 
531 |     This function converts a dictionary into a "tidy" (or long-form)
532 |     DataFrame with two columns: one containing the keys and the other
533 |     containing the values from the dictionary. Names of the columns
534 |     can be specified with the `key` and `value` argument.
535 | 
536 |     Arguments:
537 |         dc (dict): the input dictionary used to build the DataFrame.
538 |         key (string or scalar): name of the DataFrame column containing
539 |             the keys of the dictionary.
540 |         value (string or scalar): name of the DataFrame column containing
541 |             the values of the dictionary.
542 |         keys_exclude (iterable or None): list of keys excluded when building
543 |             the returned DataFrame.
544 | 
545 |     Returns:
546 |         A two-columns tidy DataFrame containing the data in the dictionary.
547 | 
548 | 
549 |     See also: :func:`tidy_to_dict`.
550 |     """
551 |     keys = dc.keys()  # this is a set
552 |     if keys_exclude is not None:
553 |         keys -= keys_exclude
554 |     keys = sorted(keys)
555 |     df = pd.DataFrame(columns=(key, value), index=range(len(keys)))
556 |     df[key] = keys
557 |     df[value] = [dc[k] for k in keys]
558 |     return df
559 | 
560 | 
561 | def _test_dict_to_tidy(dc, key='name', value='value', keys_exclude=None,
562 |                        value_type=None):
563 |     # Alternative implementation
564 |     if keys_exclude is None:
565 |         keys_exclude = []
566 |     dc2 = {k: v for k, v in dc.items() if k not in keys_exclude}
567 |     df = pd.DataFrame(columns=(key, value), index=range(len(dc2)))
568 |     keys = sorted(dc2.keys())
569 |     df[key] = keys
570 |     df[value] = [dc2[k] for k in keys]
571 |     # Test compliance
572 |     assert all(df == dict_to_tidy(dc, key, value, keys_exclude, value_type))
573 |     return df
574 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas>=0.18.1
2 | lmfit>=0.9.5
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | #import versioneer
 3 | 
 4 | 
 5 | def get_version():
 6 |     # http://stackoverflow.com/questions/2058802/how-can-i-get-the-version-defined-in-setup-py-setuptools-in-my-package
 7 |     from ast import parse
 8 |     with open('pybroom.py') as f:
 9 |         version = parse(next(filter(
10 |             lambda line: line.startswith('__version__'), f))).body[0].value.s
11 |     return version
12 | 
13 | 
14 | long_description = r"""
15 | pybroom
16 | =======
17 | 
18 | **Pybroom** is a small python 3+ library for converting collections of
19 | fit results (curve fitting or other optimizations)
20 | to `Pandas <http://pandas.pydata.org/>`__
21 | `DataFrame <http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe>`__
22 | in tidy format (or long-form)
23 | `(Wickham 2014) <http://dx.doi.org/10.18637/jss.v059.i10>`__.
24 | Once fit results are in tidy DataFrames, it is possible to leverage
25 | `common patterns <http://tomaugspurger.github.io/modern-5-tidy.html>`__
26 | for tidy data analysis. Furthermore powerful visual
27 | explorations using multi-facet plots becomes easy thanks to libraries
28 | like `seaborn <https://pypi.python.org/pypi/seaborn/>`__ natively
29 | supporting tidy DataFrames.
30 | 
31 | See the `pybroom homepage <http://pybroom.readthedocs.io/>`__ for more info.
32 | """
33 | 
34 | setup(
35 |     name='pybroom',
36 |     version=get_version(),
37 |     py_modules=['pybroom'],
38 |     #version=versioneer.get_version(),
39 |     #cmdclass=versioneer.get_cmdclass(),
40 |     author='Antonino Ingargiola',
41 |     author_email='tritemio@gmail.com',
42 |     url='http://pybroom.readthedocs.io/',
43 |     download_url='https://github.com/tritemio/pybroom',
44 |     install_requires=['pandas', 'lmfit'],
45 |     include_package_data=True,
46 |     license='MIT',
47 |     description=("Make tidy DataFrames from messy fit/model results."),
48 |     long_description=long_description,
49 |     platforms=('Windows', 'Linux', 'Mac OS X'),
50 |     classifiers=['Intended Audience :: Science/Research',
51 |                  'Operating System :: OS Independent',
52 |                  'Programming Language :: Python',
53 |                  'Programming Language :: Python :: 3.4',
54 |                  'Programming Language :: Python :: 3.5',
55 |                  'Topic :: Scientific/Engineering',
56 |                  'License :: OSI Approved :: MIT License'],
57 |     keywords=('dataframe tidy-data long-form model fitting tidyverse'))
58 | 


--------------------------------------------------------------------------------