├── __init__.py
├── lib
├── __init__.py
├── yi_secform.py
├── yi_matrix.py
├── yi_stocks.py
├── ys_mlearn.py
├── ys_opt_holt.py
├── yi_simulation.py
├── yi_timeseries.py
├── ys_prtf_boltzmann.py
├── ys_optimize.py
├── yi_0sys.py
└── yi_plot.py
├── VERSION
├── nb
├── prtf-markowitz.gif
├── holt-winters-equations.png
├── FRED-SP500_1957-2014-ARC.csv.gz
├── FRED-EURUSD_1971-2002-ARC.csv.gz
├── FRED-XAUUSD_1968-2014-ARC.csv.gz
├── boots_ndl_d4spx_1957-2018.csv.gz
├── FRED-home-Case-Shiller_1987-2013.csv.gz
└── SIMU-mn0-sd1pc-d4spx_1957-2014.csv.gz
├── tests
├── zdata-xau-13hj-c30.csv
├── test_system.py
├── 01-run-notebooks.sh
├── test_matrix.py
├── test_fred.py
├── test_fecon235.py
├── 10-check-modules.sh
├── test_boltzmann.py
├── smell
├── test_1tools.py
├── test_gauss_mix.py
├── test_timeseries.py
└── test_optimize.py
├── docs
├── wiki
│ ├── fecon235-00-wiki.md
│ ├── make-wiki-mirror.sh
│ └── mirror
│ │ └── Home.md
├── others
│ └── Brown-2016-Data_worth.md
└── fecon235-08-sympy.ipynb
├── .github
├── PULL_REQUEST_TEMPLATE.md
├── ISSUE_TEMPLATE
│ └── custom.md
├── CONTRIBUTING.md
└── CODE_OF_CONDUCT.md
├── .gitignore
├── bin
├── update-yi_quandl_api
├── ipnbrun
└── docker
│ └── rsvp_fecon235
│ └── Dockerfile
└── LICENSE.md
/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | v6.18.1203
2 |
--------------------------------------------------------------------------------
/nb/prtf-markowitz.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsvp/fecon235/HEAD/nb/prtf-markowitz.gif
--------------------------------------------------------------------------------
/nb/holt-winters-equations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsvp/fecon235/HEAD/nb/holt-winters-equations.png
--------------------------------------------------------------------------------
/nb/FRED-SP500_1957-2014-ARC.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsvp/fecon235/HEAD/nb/FRED-SP500_1957-2014-ARC.csv.gz
--------------------------------------------------------------------------------
/nb/FRED-EURUSD_1971-2002-ARC.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsvp/fecon235/HEAD/nb/FRED-EURUSD_1971-2002-ARC.csv.gz
--------------------------------------------------------------------------------
/nb/FRED-XAUUSD_1968-2014-ARC.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsvp/fecon235/HEAD/nb/FRED-XAUUSD_1968-2014-ARC.csv.gz
--------------------------------------------------------------------------------
/nb/boots_ndl_d4spx_1957-2018.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsvp/fecon235/HEAD/nb/boots_ndl_d4spx_1957-2018.csv.gz
--------------------------------------------------------------------------------
/nb/FRED-home-Case-Shiller_1987-2013.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsvp/fecon235/HEAD/nb/FRED-home-Case-Shiller_1987-2013.csv.gz
--------------------------------------------------------------------------------
/nb/SIMU-mn0-sd1pc-d4spx_1957-2014.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rsvp/fecon235/HEAD/nb/SIMU-mn0-sd1pc-d4spx_1957-2014.csv.gz
--------------------------------------------------------------------------------
/tests/zdata-xau-13hj-c30.csv:
--------------------------------------------------------------------------------
1 | T,XAU
2 | 2013-03-08,1581.75
3 | 2013-03-11,1579.0
4 | 2013-03-12,1594.0
5 | 2013-03-13,1589.25
6 | 2013-03-14,1586.0
7 | 2013-03-15,1595.5
8 | 2013-03-18,1603.75
9 | 2013-03-19,1610.75
10 | 2013-03-20,1607.5
11 | 2013-03-21,1613.75
12 | 2013-03-22,1607.75
13 | 2013-03-25,1599.25
14 | 2013-03-26,1598.0
15 | 2013-03-27,1603.0
16 | 2013-03-28,1598.25
17 | 2013-03-29,1598.25
18 | 2013-04-01,1598.25
19 | 2013-04-02,1583.5
20 | 2013-04-03,1574.75
21 | 2013-04-04,1546.5
22 | 2013-04-05,1568.0
23 | 2013-04-08,1575.0
24 | 2013-04-09,1577.25
25 | 2013-04-10,1575.0
26 | 2013-04-11,1565.0
27 | 2013-04-12,1535.5
28 | 2013-04-15,1395.0
29 | 2013-04-16,1380.0
30 | 2013-04-17,1392.0
31 | 2013-04-18,1393.75
32 |
--------------------------------------------------------------------------------
/docs/wiki/fecon235-00-wiki.md:
--------------------------------------------------------------------------------
1 | ## Wiki for fecon235
2 |
3 | GitHub renders wiki pages for **fecon235** at https://github.com/rsvp/fecon235/wiki
4 | which is available shortened as https://git.io/econ
5 |
6 | The contents of the wiki is actually developed in a *separate* repository
7 | called `fecon235.wiki`, so for convenient reference we shall periodically
8 | reproduce it under `docs/wiki/mirror` (but not as a subrepository).
9 |
10 | The wiki is intended as user documentation, and may include FAQs, tips,
11 | as well as summaries of issues and best remedies.
12 |
13 | Please kindly consider contributing to our documentation via
14 | [pull request](https://github.com/rsvp/fecon235/blob/master/.github/CONTRIBUTING.md).
15 |
16 |
17 | ---
18 |
19 | Revision date : 2017-05-01
20 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ### Proposed changes in this pull request
2 |
3 | Fix #
4 |
5 | - [ ] Bug :bug:
6 | - [ ] Add tests :white_check_mark:
7 | - [ ] Performance boost :racehorse:
8 | - [ ] Documentation or notebook change :memo:
9 | - [ ] Removed code or file :fire:
10 | - [ ] Security issue :lock:
11 | - [ ] Dependency downgrade :arrow_down:
12 | - [ ] Dependency upgrade :arrow_up:
13 | - [ ] Enhancement :art:
14 |
15 |
16 |
17 |
18 |
19 | ### Additional helpful details
20 |
21 | - [ ] REQUIRE: PR rebased on *latest* master branch
22 | - [ ] This pull request needs to be manually tested
23 | - [ ] Work-in-progress: more changes will follow
24 | - [ ] Issue requires further discussion
25 | - [ ] Issue has been fully resolved
26 |
27 |
28 |
29 |
30 |
31 | ATTN mention: @rsvp
32 |
33 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/custom.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Custom issue template
3 | about: For fecon235 bugs and enhancements.
4 |
5 | ---
6 |
7 | ### Description of specific issue
8 |
9 |
10 |
11 |
12 | - [ ] Enhancement
13 | - [ ] Bug
14 | - Issue for source code, non-notebook, should be opened at
15 | [fecon236](https://github.com/MathSci/fecon236/issues)
16 |
17 |
18 | ### Expected behavior
19 |
20 |
21 |
22 |
23 | ### Observed behavior
24 |
25 |
26 |
27 |
28 | ### Steps which reproduce or enhance behavior
29 |
30 | 1.
31 | 2.
32 |
33 |
34 | ```
35 | Optional: Markdown to include images...
36 | 
37 | ```
38 |
39 |
40 | ### Why would the improvement be useful to most users?
41 |
42 |
43 |
44 |
45 | ### Additional helpful details for bugs
46 |
47 | - [ ] Problem started recently, but not in older versions
48 | - [ ] Problem happens with all files, not only some files
49 | - [ ] Problem can be reliably reproduced
50 | - [ ] Problem happens randomly
51 |
52 | - **fecon235 version:**
53 | - **pandas version:**
54 | - **Python version:**
55 | - **Operating system:**
56 |
57 |
58 | ATTN mention: @rsvp
59 |
60 |
--------------------------------------------------------------------------------
/tests/test_system.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2015-12-21
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| test_system : Test fecon235 yi_0sys module.
5 |
6 | => As of fecon235 v4, we also favor pytest over nosetests, so e.g.
7 |
8 | $ py.test --doctest-modules
9 |
10 | REFERENCE:
11 | pytest: https://pytest.org/latest/getting-started.html
12 | or PDF at http://pytest.org/latest/pytest.pdf
13 |
14 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
15 | 2015-12-29 First edition for gitinfo(), Python and pandas versions.
16 | '''
17 |
18 | from __future__ import absolute_import, print_function
19 |
20 | from fecon235.lib import yi_0sys as system
21 | #
22 | # N.B. - in this tests directory without __init__.py,
23 | # we use absolute import as if outside the fecon235 package,
24 | # not relative import (cf. modules within lib).
25 |
26 |
27 | def test_minimumPython_yi_0sys_fecon235():
28 | '''Test minimum Python version for fecon235.'''
29 | # We hope to support Python 2.7 until 2019,
30 | # but the Jupyter project views it as "legacy."
31 | assert system.pythontup() >= system.minimumPython
32 |
33 |
34 | def test_minimumPandas_yi_0sys_fecon235_vSlow():
35 | '''Test minimum Pandas version for fecon235.'''
36 | s = system.versionstr("pandas")
37 | s = s.replace('.', '', 1)
38 | # ^only one replace: e.g. 0.17.1 -> 017.1
39 | assert float(s) >= system.minimumPandas
40 |
41 |
42 | def test_gitinfo_yi_0sys_fecon235():
43 | '''Test gitinfo() which obtains repo info by running git.'''
44 | repo, tag, bra = system.gitinfo()
45 | # Only repo has a response known here in advance:
46 | assert repo == 'fecon235'
47 |
48 |
49 | if __name__ == "__main__":
50 | system.endmodule()
51 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # .gitignore template Date : 2018-03-11
2 |
3 | authtoken.p
4 | # File generated by using Quandl API key for data access.
5 | # It should never be committed for security reasons.
6 |
7 | *-bak.*
8 | zmp-*
9 | # Almost tmp files.
10 | *tmp*
11 | # ANY filename containing tmp is temporary!
12 |
13 |
14 | # Committing images will add unnecessary binary blot to the repo!
15 | *.png
16 | # charts generally produced from notebooks.
17 | *.jpg
18 | # images locally for notebooks.
19 | # Suggest hosting them externally, thus avoid committing them.
20 | # Use Image() to display them with embed=False.
21 |
22 |
23 | # Interim IPython notebooks:
24 | .ipynb_checkpoints/
25 | *-checkpoint.ipynb
26 |
27 | 00-tpl*.ipynb
28 | # template for notebooks with preamble and examples.
29 |
30 |
31 | # ================================================= PYTHON begin ===============
32 |
33 |
34 | # Generated by nosetests, Python nose
35 | .noseids
36 |
37 |
38 | # Byte-compiled / optimized / DLL files
39 | __pycache__/
40 | *.py[cod]
41 |
42 | # C extensions
43 | *.so
44 |
45 | # Distribution / packaging
46 | .Python
47 | env/
48 | build/
49 | develop-eggs/
50 | dist/
51 | downloads/
52 | eggs/
53 | lib/
54 | lib64/
55 | parts/
56 | sdist/
57 | var/
58 | *.egg-info/
59 | .installed.cfg
60 | *.egg
61 |
62 | # PyInstaller
63 | # Usually these files are written by a python script from a template
64 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
65 | *.manifest
66 | *.spec
67 |
68 | # Installer logs
69 | pip-log.txt
70 | pip-delete-this-directory.txt
71 |
72 | # Unit test / coverage reports
73 | htmlcov/
74 | .tox/
75 | .coverage
76 | .cache
77 | nosetests.xml
78 | coverage.xml
79 |
80 | # pytest
81 | lastfailed
82 | # Added 2018-03-10 for .../.pytest_cache/v/cache/...
83 |
84 | # Translations
85 | *.mo
86 | *.pot
87 |
88 | # Django stuff:
89 | *.log
90 |
91 | # Sphinx documentation
92 | docs/_build/
93 |
94 | # PyBuilder
95 | target/
96 |
97 | # ================================================= PYTHON end =================
98 |
99 |
100 |
--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # CONTRIBUTING: Notes on forks and pull requests
2 |
3 | We are thrilled that you would like to collaborate on
4 | this project. Your help is essential.
5 |
6 | - Code revisions: please kindly follow [Github flow].
7 |
8 | - Running tests: details are in the `tests` directory.
9 | Python tests will run under both py.test and nosetests.
10 |
11 | - For integration testing, we run all notebooks in batch mode
12 | in a Linux environment. This also syncs temporary notebooks
13 | with current data.
14 |
15 | - If you have modified code in a Jupyter/IPython notebook where
16 | there are many embedded images, *please clear out all
17 | outputs before your commit*. (The only exception arises
18 | in the case of major releases where we want archival
19 | samples of images generated).
20 |
21 |
22 | ## Submitting a pull request
23 |
24 | 0. [Fork][fork] and clone the repository.
25 | 0. Create a new branch: `git checkout -b my-branch-name`
26 | 0. Make your change, add tests, and make sure the tests still pass.
27 | 0. Be sure to ***pull origin/master and rebase*** before the next step.
28 | 0. Push to your fork and [submit a pull request][pr]
29 | 0. Kindly wait for your pull request to be reviewed.
30 | 0. Stay in touch with fellow developers at [Gitter].
31 |
32 |
33 | ## Tips regarding pull requests
34 |
35 | - Refine tests whenever possible.
36 |
37 | - Update documentation as necessary.
38 |
39 | - Keep your change focused. If there are multiple changes that are not
40 | dependent upon each other, please submit them as separate pull requests.
41 |
42 | - Write a [good commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html).
43 |
44 |
45 | **Thank you very much for your consideration.
46 | Your contributing work is very appreciated.**
47 |
48 |
49 | ## Resources
50 |
51 | - [Contributing to Open Source on GitHub](https://guides.github.com/activities/contributing-to-open-source/)
52 | - [Using Pull Requests](https://help.github.com/articles/using-pull-requests/)
53 |
54 |
55 | - - - -
56 |
57 | Revision date : 2016-01-23
58 |
59 | [fork]: https://github.com/rsvp/fecon235/fork "Fork fecon235"
60 | [Github flow]: http://scottchacon.com/2011/08/31/github-flow.html "Github Flow"
61 | [Gitter]: https://gitter.im/rsvp/fecon235 "Gitter fecon235"
62 | [pr]: https://github.com/rsvp/fecon235/compare "Pull request"
63 |
--------------------------------------------------------------------------------
/docs/wiki/make-wiki-mirror.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # bash 4.3.11(1) Linux 3.13.0 Ubuntu 14.04.3 Date : 2017-05-01
3 | #
4 | # _______________| make-wiki-mirror : for fecon235/docs/wiki/mirror
5 | #
6 | # Usage: $ ./make-wiki-mirror
7 | # # Execute from: ../fecon235/docs/wiki
8 | #
9 | # Dependencies: cp [files assumed to be local]
10 | #
11 | # CHANGE LOG LATEST version available: https://github.com/rsvp/fecon235
12 | # 2017-05-01 First version.
13 |
14 |
15 | # _____ PREAMBLE_v3: settings, variables, and error handling.
16 | #
17 | LC_ALL=POSIX
18 | # locale means "ASCII, US English, no special rules,
19 | # output per ISO and RFC standards."
20 | # Esp. use ASCII encoding for glob and sorting characters.
21 | shopt -s extglob
22 | # ^set extended glob for pattern matching.
23 | shopt -s failglob
24 | # ^failed pattern matching signals error.
25 | set -e
26 | # ^errors checked: immediate exit if a command has non-zero status.
27 | set -o pipefail
28 | # ^exit status on fail within pipe, not (default) last command.
29 | set -u
30 | # ^unassigned variables shall be errors.
31 | # Example of default VARIABLE ASSIGNMENT: arg1=${1:-'foo'}
32 |
33 |
34 | program=${0##*/} # similar to using basename
35 | errf=$( mktemp /dev/shm/88_${program}_tmp.XXXXXXXXXX )
36 |
37 |
38 | cleanup () {
39 | # Delete temporary files, then optionally exit given status.
40 | local status=${1:-'0'}
41 | rm -f $errf
42 | [ $status = '-1' ] || exit $status # thus -1 prevents exit.
43 | } #--------------------------------------------------------------------
44 | warn () {
45 | # Message with basename to stderr. Usage: warn "message"
46 | echo -e "\n !! ${program}: $1 " >&2
47 | } #--------------------------------------------------------------------
48 | die () {
49 | # Exit with status of most recent command or custom status, after
50 | # cleanup and warn. Usage: command || die "message" [status]
51 | local status=${2:-"$?"}
52 | cat $errf >&2
53 | cleanup -1 && warn "$1" && exit $status
54 | } #--------------------------------------------------------------------
55 | trap "die 'SIG disruption, but cleanup finished.' 114" 1 2 3 15
56 | # Cleanup after INTERRUPT: 1=SIGHUP, 2=SIGINT, 3=SIGQUIT, 15=SIGTERM
57 | trap "die 'unhandled ERR via trap, but cleanup finished.' 116" ERR
58 | # Cleanup after command failure unless it's part of a test clause.
59 | #
60 | # _______________ :: BEGIN Script ::::::::::::::::::::::::::::::::::::::::
61 |
62 |
63 | wiki='../../../fecon235.wiki'
64 | # ___ATTN___ relative location, and assumes wiki is in good state.
65 |
66 |
67 | [ -d $wiki ] || die "Cannot find separate wiki repository for fecon235." 113
68 |
69 |
70 | shopt -u dotglob
71 | # ^For * wildcard, DOT FILES, e.g. .git, WILL NOT BE INCLUDED.
72 | # We are not interested in creating a subrepository, just a mirror.
73 |
74 | cp -d -x -r -p -u $wiki/* mirror 2> $errf
75 | # ^recursive ^see dotglob
76 |
77 |
78 | cleanup # Instead of: trap arg EXIT
79 | # _______________ EOS :: END of Script ::::::::::::::::::::::::::::::::::::::::
80 |
81 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=sh :
82 |
--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, gender identity and expression, level of experience,
9 | nationality, personal appearance, race, religion, or sexual identity and
10 | orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * Use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team.
59 | All complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at [http://contributor-covenant.org/version/1/4][version]
72 |
73 | [homepage]: http://contributor-covenant.org
74 | [version]: http://contributor-covenant.org/version/1/4/
75 |
--------------------------------------------------------------------------------
/docs/wiki/mirror/Home.md:
--------------------------------------------------------------------------------
1 | ## Welcome to our wiki for Financial Economics!
2 |
3 | The main repository at GitHub is called **fecon235**
4 | (shortcut https://git.io/fecon235)
5 | where you will find README.md
6 | for setting up the computational data tools.
7 |
8 | The `lib` directory contains Python modules
9 | with the high-level commands used in the notebooks.
10 | If customization is required, these functions provide good templates
11 | to access the core packages such as pandas, numpy, and matplotlib.
12 |
13 | The essential modules are unified by the top-level
14 | module called **fecon235.py**. Please see
15 | https://git.io/fecon-intro where details are given regarding its import.
16 |
17 |
18 | ### Q: Where is the documentation for fecon235?
19 |
20 | The most current user documentation can be found in the `docs` directory,
21 | however, the source code is thoroughly documented with comments.
22 |
23 | The best way to learn about the user-friendly code is to
24 | pick a Jupyter notebook for a topic
25 | which interests you, and then to work interactively with it for analysis.
26 | Please checkout the `nb` directory: https://git.io/fecon235nb
27 |
28 |
29 | ### Q: Are there online discussions?
30 |
31 | Chat with fellow users at Gitter: https://gitter.im/rsvp/fecon235
32 |
33 |
34 | ### Q: How do I report a bug, or suggest enhancements?
35 |
36 | For issues, please visit https://github.com/rsvp/fecon235/issues -- but
37 | better yet please consider fixing module bugs by
38 | making a pull request https://git.io/fecon-pr
39 |
40 |
41 | ### Q: How do I retrieve economic FRED data series not listed in fecon235?
42 |
43 | We have defined *functions* to access data from the St. Louis Federal Reserve Bank.
44 | Now each economic time series and its frequency has its own "fredcode" which
45 | is freely available at their site: https://fred.stlouisfed.org
46 | so check there first.
47 |
48 |
49 | ```
50 | df = get( fredcode )
51 | # fredcode is entered as a string, or an
52 | # assigned variable named d4*, m4*, q4*.
53 | # E.g. q4gdpusr = 'GDPC1'
54 | # ^U.S. real GDP in 2009 USD billions, SA quarterly.
55 |
56 | plot( dataframe or fredcode )
57 | ```
58 |
59 | See the `lib/yi_fred.py module` for further details.
60 | Constructing your own pandas DataFrame is easy,
61 | see for example *m4eurusd* as to how a synthetic series can be created.
62 |
63 |
64 | ### Q: How do I retrieve data from Quandl not listed in fecon235?
65 |
66 | The same idea as FRED above. For example, d7xbtusd='BCHAIN/MKPRU' which
67 | is for the Bitcoin price in USD (d7 indicates that the data is 7 days
68 | per week). The quandlcodes can be found at https://www.quandl.com
69 | (however, use Google with keyword "quandl" for better results).
70 | See our `lib/yi_quandl.py` module for further details.
71 |
72 |
73 | ### Q: How do I retrieve financial data series not listed in fecon235?
74 |
75 | We use a special string called "*stock slang*" in the format "s4symbol"
76 | where symbol is spelled out in all lower-case.
77 |
78 | Example: to retrieve SPY (the ETF for S&P500), use "s4spy"
79 |
80 | ```
81 | df = get( "s4spy" )
82 |
83 | plot( df )
84 | ```
85 |
86 | The source will be Yahoo Finance, falling back on Google Finance.
87 | The retrieved data will be in pandas DataFrame format.
88 | See our `lib/yi_stocks.py` module for further details.
89 |
90 |
91 | - - - -
92 |
93 | Revision date : 2017-04-24
94 |
--------------------------------------------------------------------------------
/tests/01-run-notebooks.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # bash 4.3.11(1) Linux Ubuntu 14.04.1 Date : 2014-11-12
3 | #
4 | # _______________| 01-run-notebooks.sh : run all notebooks in nbdir.
5 | #
6 | # Usage: $ ./01-run-notebooks.sh [nbdir=nb]
7 | # # Non-iteractively update with current data,
8 | # # and also do integration testing.
9 | # #
10 | # # - ASSUMES execution from the tests directory.
11 | # # - nbdir can only be a top-level directory.
12 | # # - Look for tmp-* output files in nbdir.
13 | #
14 | # Dependencies: bin/ipnbrun (which uses runipy package)
15 |
16 |
17 | # CHANGE LOG
18 | # 2014-11-12 Exclude tmp*.ipynb from tests.
19 | # 2014-11-11 First version.
20 |
21 |
22 | # _____ PREAMBLE_v3: settings, variables, and error handling.
23 | #
24 | LC_ALL=POSIX
25 | # locale means "ASCII, US English, no special rules,
26 | # output per ISO and RFC standards."
27 | # Esp. use ASCII encoding for glob and sorting characters.
28 | shopt -s extglob
29 | # ^set extended glob for pattern matching.
30 | shopt -s failglob
31 | # ^failed pattern matching signals error.
32 | set -e
33 | # ^errors checked: immediate exit if a command has non-zero status.
34 | set -o pipefail
35 | # ^exit status on fail within pipe, not (default) last command.
36 | set -u
37 | # ^unassigned variables shall be errors.
38 | # Example of default VARIABLE ASSIGNMENT: arg1=${1:-'foo'}
39 |
40 | nbdir=${1:-'nb'}
41 |
42 |
43 | program=${0##*/} # similar to using basename
44 | errf=$( mktemp /dev/shm/88_${program}_tmp.XXXXXXXXXX )
45 |
46 |
47 | cleanup () {
48 | # Delete temporary files, then optionally exit given status.
49 | local status=${1:-'0'}
50 | rm -f $errf
51 | [ $status = '-1' ] || exit $status # thus -1 prevents exit.
52 | } #--------------------------------------------------------------------
53 | warn () {
54 | # Message with basename to stderr. Usage: warn "message"
55 | echo -e "\n !! ${program}: $1 " >&2
56 | } #--------------------------------------------------------------------
57 | die () {
58 | # Exit with status of most recent command or custom status, after
59 | # cleanup and warn. Usage: command || die "message" [status]
60 | local status=${2:-"$?"}
61 | cat $errf >&2
62 | cleanup -1 && warn "$1" && exit $status
63 | } #--------------------------------------------------------------------
64 | trap "die 'SIG disruption, but cleanup finished.' 114" 1 2 3 15
65 | # Cleanup after INTERRUPT: 1=SIGHUP, 2=SIGINT, 3=SIGQUIT, 15=SIGTERM
66 | trap "die 'unhandled ERR via trap, but cleanup finished.' 116" ERR
67 | # Cleanup after command failure unless it's part of a test clause.
68 | #
69 | # _______________ :: BEGIN Script ::::::::::::::::::::::::::::::::::::::::
70 |
71 |
72 | path="$(pwd)"
73 | dir="${path##*/}"
74 |
75 |
76 | instruct () {
77 | echo " !! Please go to the tests directory, "
78 | echo " !! and then execute this program, ./$program "
79 | }
80 |
81 |
82 |
83 | if [ "$dir" = 'tests' ] ; then
84 | cd ../$nbdir
85 | ../bin/ipnbrun !(tmp*).ipynb
86 | # Exclude tmp*.ipynb files -- thanks extglob.
87 | echo " :: At: $(pwd) "
88 | echo " :: SUCCESS! Notebooks passed integration tests."
89 | else
90 | instruct > $errf
91 | die "Current directory $dir yields incorrect relative path." 113
92 | fi
93 |
94 |
95 |
96 | cleanup # Instead of: trap arg EXIT
97 | # _______________ EOS :: END of Script ::::::::::::::::::::::::::::::::::::::::
98 |
99 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=sh :
100 |
--------------------------------------------------------------------------------
/bin/update-yi_quandl_api:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # bash 4.3.11(1) Linux Ubuntu 14.04.1 Date : 2016-01-17
3 | #
4 | # _______________| update-yi_quandl_api : get Quandl.com API data module.
5 | #
6 | # Usage: $ ./update-yi_quandl_api
7 | # # ^MUST execute from bin due to relative dir target.
8 | #
9 | # Dependencies: curl [ Used to download the following Python script: ]
10 | # Quandl.py ( https://github.com/quandl/quandl-python )
11 | #
12 | # man: see yi_quandl.py for implementation details.
13 | # #
14 | # # Note: use of API key will generate a token file
15 | # # called authtoken.p which git should ignore.
16 | # # See setQuandlToken function in yi_quandl module.
17 |
18 | # CHANGE LOG ORIGIN: https://git.io/fecon235
19 | # 2016-01-17 topdir changed from nb to lib directory (per v3).
20 | # 2015-08-03 This script retrieves the latest dependent code.
21 |
22 |
23 | # _____ PREAMBLE_v3: settings, variables, and error handling.
24 | #
25 | LC_ALL=POSIX
26 | # locale means "ASCII, US English, no special rules,
27 | # output per ISO and RFC standards."
28 | # Esp. use ASCII encoding for glob and sorting characters.
29 | shopt -s extglob
30 | # ^set extended glob for pattern matching.
31 | shopt -s failglob
32 | # ^failed pattern matching signals error.
33 | set -e
34 | # ^errors checked: immediate exit if a command has non-zero status.
35 | set -o pipefail
36 | # ^exit status on fail within pipe, not (default) last command.
37 | set -u
38 | # ^unassigned variables shall be errors.
39 | # Example of default VARIABLE ASSIGNMENT: arg1=${1:-'foo'}
40 |
41 | topdir=${1:-'lib'}
42 | fname=${2:-'yi_quandl_api.py'}
43 |
44 |
45 | program=${0##*/} # similar to using basename
46 | memf=$( mktemp /dev/shm/88_${program}_tmp.XXXXXXXXXX )
47 | errf=$( mktemp /dev/shm/88_${program}_tmp.XXXXXXXXXX )
48 |
49 |
50 | cleanup () {
51 | # Delete temporary files, then optionally exit given status.
52 | local status=${1:-'0'}
53 | rm -f $memf $errf
54 | [ $status = '-1' ] || exit $status # thus -1 prevents exit.
55 | } #--------------------------------------------------------------------
56 | warn () {
57 | # Message with basename to stderr. Usage: warn "message"
58 | echo -e "\n !! ${program}: $1 " >&2
59 | } #--------------------------------------------------------------------
60 | die () {
61 | # Exit with status of most recent command or custom status, after
62 | # cleanup and warn. Usage: command || die "message" [status]
63 | local status=${2:-"$?"}
64 | cat $errf >&2
65 | cleanup -1 && warn "$1" && exit $status
66 | } #--------------------------------------------------------------------
67 | trap "die 'SIG disruption: but finish needs about one minute.' 114" 1 2 3 15
68 | # Cleanup after INTERRUPT: 1=SIGHUP, 2=SIGINT, 3=SIGQUIT, 15=SIGTERM
69 | trap "die 'unhandled ERR via trap, but cleanup finished.' 116" ERR
70 | # Cleanup after command failure unless it's part of a test clause.
71 | #
72 | # _______________ :: BEGIN Script ::::::::::::::::::::::::::::::::::::::::
73 |
74 |
75 | cd ../$topdir || die "cannot find $topdir directory" 113
76 |
77 |
78 | # DOWNLOAD LATEST API VERSION:
79 | source='https://raw.githubusercontent.com/quandl/quandl-python/master/Quandl/Quandl.py'
80 | curl -L --silent $source > $fname \
81 | || die "curl unable to retrieve $program source code." 115
82 | chmod 755 $fname
83 |
84 |
85 | echo " :: Successfully installed Quandl.py API as $fname in $topdir directory."
86 |
87 |
88 | cleanup # Instead of: trap arg EXIT
89 | # _______________ EOS :: END of Script ::::::::::::::::::::::::::::::::::::::::
90 |
91 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=sh :
92 |
--------------------------------------------------------------------------------
/tests/test_matrix.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2017-06-19
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| test_matrix : Test fecon235 yi_matrix module.
5 |
6 | Doctests display at lower precision since equality test becomes fuzzy across
7 | different systems if full floating point representation is used.
8 |
9 | Testing: As of fecon235 v4, we favor pytest over nosetests, so e.g.
10 | $ py.test --doctest-modules
11 |
12 | REFERENCE
13 | pytest: https://pytest.org/latest/getting-started.html
14 | or PDF at http://pytest.org/latest/pytest.pdf
15 |
16 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
17 | 2017-06-19 Add test for cov2cor().
18 | 2017-06-17 First version.
19 | '''
20 |
21 | from __future__ import absolute_import, print_function, division
22 | import numpy as np
23 |
24 | from fecon235.lib import yi_matrix as matrix
25 | #
26 | # N.B. - In this tests directory without __init__.py,
27 | # we use absolute import as if outside the fecon235 package,
28 | # not relative import (cf. modules within lib).
29 |
30 | # A is non-singular, hence invertible:
31 | A = np.array([[ 7, 2, 1 ],
32 | [ 0, 3, -1 ],
33 | [ -3, 4, -2 ]])
34 |
35 | # First column of Abad is thrice the second column.
36 | Abad = np.array([[ 6, 2, 1 ],
37 | [ 9, 3, -1 ],
38 | [ 12, 4, -2 ]])
39 |
40 | # Aiv is, in fact, A inverted.
41 | Aiv = np.array([[ -2, 8, -5 ],
42 | [ 3, -11, 7 ],
43 | [ 9, -34, 21 ]])
44 |
45 | # V is a covariance array:
46 | V = np.array([[1875.3209, 429.8712, 462.4775 ],
47 | [429.8712, 1306.9817, -262.8231 ],
48 | [462.4775, -262.8231, 755.5193 ]])
49 |
50 |
51 | def test_yi_matrix_fecon235_is_singular():
52 | '''Check if matrix is singular.'''
53 | assert matrix.is_singular(A) == False
54 | assert matrix.is_singular(Abad) == True
55 |
56 |
57 | def test_yi_matrix_fecon235_check_example_matrics():
58 | '''Multiply A by Aiv should return the identity matrix.
59 | >>> A.dot( Aiv )
60 | array([[1, 0, 0],
61 | [0, 1, 0],
62 | [0, 0, 1]])
63 |
64 | '''
65 | assert np.allclose( A.dot( Aiv ), np.identity(3) )
66 |
67 |
68 | def test_yi_matrix_fecon235_invert_caution():
69 | '''Compute inverse using numpy inv() should return float of Aiv.
70 | >>> matrix.invert_caution( A )
71 | array([[ -2., 8., -5.],
72 | [ 3., -11., 7.],
73 | [ 9., -34., 21.]])
74 | '''
75 | assert np.allclose( matrix.invert_caution(A), Aiv )
76 |
77 |
78 | def test_yi_matrix_fecon235_invert_pseudo():
79 | '''Compute inverse using numpy pinv() should return float of Aiv.
80 | >>> matrix.invert_pseudo( A )
81 | array([[ -2., 8., -5.],
82 | [ 3., -11., 7.],
83 | [ 9., -34., 21.]])
84 | '''
85 | assert np.allclose( matrix.invert_pseudo(A), Aiv )
86 |
87 |
88 | def test_yi_matrix_fecon235_Abad_invert():
89 | '''Compute inverse using invert() with singular Abad.
90 | Pseudo-inverse will handle ILL-CONDITION with NONSENSE.
91 | >>> np.round( matrix.invert( Abad ), 5 )
92 | array([[ 0.06774, 0.02903, 0.01935],
93 | [ 0.02258, 0.00968, 0.00645],
94 | [ 0.50538, -0.02151, -0.23656]])
95 | '''
96 | pass
97 |
98 |
99 | def test_yi_matrix_fecon235_cov2cor():
100 | '''Compute correlation array given covariance array.
101 | >>> matrix.cov2cor( V, n=6 )
102 | array([[ 1. , 0.274578, 0.388535],
103 | [ 0.274578, 1. , -0.264488],
104 | [ 0.388535, -0.264488, 1. ]])
105 | '''
106 | # R test: http://rfunction.com/archives/851
107 | pass
108 |
109 |
110 | if __name__ == "__main__":
111 | system.endmodule()
112 |
--------------------------------------------------------------------------------
/bin/ipnbrun:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # bash 4.2.24(1) Linux Ubuntu 12.04 Date : 2014-09-29
3 | #
4 | # _______________| ipnbrun : run ipynb non-interactively and produce html.
5 | #
6 | # Usage: ipnbrun [IPython notebook file(s)]
7 | #
8 | # Examples: ipnbrun foo*.ipynb
9 | #
10 | # Dependencies: runipy, $ pip install runipy
11 | # see https://github.com/paulgb/runipy
12 | # but it is included in the Anaconda distribution.
13 | # (pandoc is used as fallback for "ipython nbconvert")
14 |
15 |
16 | # CHANGE LOG
17 | # 2014-09-29 Amend to Preamble_v3. Change status numbering.
18 | # 2014-09-26 First version also produces HTML output.
19 |
20 |
21 | # _____ PREAMBLE_v3: settings, variables, and error handling.
22 | #
23 | LC_ALL=POSIX
24 | # locale means "ASCII, US English, no special rules,
25 | # output per ISO and RFC standards."
26 | # Esp. use ASCII encoding for glob and sorting characters.
27 | shopt -s extglob
28 | # ^set extended glob for pattern matching.
29 | shopt -s failglob
30 | # ^failed pattern matching signals error.
31 | set -e
32 | # ^errors checked: immediate exit if a command has non-zero status.
33 | set -u
34 | # ^unassigned variables shall be errors.
35 | # Example of default VARIABLE ASSIGNMENT: arg1=${1:-'foo'}
36 |
37 |
38 | program=${0##*/} # similar to using basename
39 | memf=$( mktemp /dev/shm/88_${program}_tmp.XXXXXXXXXX )
40 | # ^used to collect interim results and errors.
41 |
42 |
43 | cleanup () {
44 | # Delete temporary files, then optionally exit given status.
45 | local status=${1:-'0'}
46 | rm -f $memf
47 | [ $status = '-1' ] || exit $status # thus -1 prevents exit.
48 | } #--------------------------------------------------------------------
49 | warn () {
50 | # Message with basename to stderr. Usage: warn "message"
51 | echo -e "\n !! ${program}: $1 " >&2
52 | } #--------------------------------------------------------------------
53 | die () {
54 | # Exit with status of most recent command or custom status, after
55 | # cleanup and warn. Usage: command || die "message" [status]
56 | local status=${2:-"$?"}
57 | cat $memf >&2
58 | cleanup -1 && warn "$1" && exit $status
59 | } #--------------------------------------------------------------------
60 | trap "die 'SIG disruption, but cleanup finished.' 114" 1 2 3 15
61 | # Cleanup after INTERRUPT: 1=SIGHUP, 2=SIGINT, 3=SIGQUIT, 15=SIGTERM
62 | trap "die 'unhandled ERR via trap, but cleanup finished.' 116" ERR
63 | # Cleanup after command failure unless it's part of a test clause.
64 | #
65 | # _______________ :: BEGIN Script ::::::::::::::::::::::::::::::::::::::::
66 |
67 |
68 | fileprefix='tmp'
69 |
70 | for file in "$@" ; do
71 | if [ -f "$file" ] ; then
72 | runf="${fileprefix}-$file"
73 | # ^alternative output name.
74 |
75 | # Keep the original notebook intact,
76 | # save the non-interactive execution as runf:
77 | echo " :: Executing notebook $file, output as $runf ..."
78 | # To overwrite instead: runipy -o "$file"
79 | runipy "$file" "$runf" 2> $memf \
80 | || die "FATAL error in $file" 115
81 |
82 | echo " :: ... starting HTML conversion..."
83 | ipython nbconvert --to html --template full "$runf" 2>> $memf \
84 | || die "failed HTML conversion: $runf" 117
85 | # or: basic
86 | # then produce runf HTML output as well.
87 | # runipy also can convert to html,
88 | # but not advisable since it will hang on error
89 | # (runipy relies on nbconvert anyways).
90 | echo " :: FINISHED conversion of $runf to HTML."
91 | echo " :: :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::"
92 |
93 | # DEBUG: show runipy cell-by-cell interim results,
94 | # nbconvert warnings about latest pandoc.
95 | # Note that "die" function will "cat $memf".
96 | # cat $memf >&2
97 |
98 | else
99 | die "file does not exist: $file" 113
100 | fi
101 | done
102 |
103 |
104 |
105 | cleanup
106 | # _______________ EOS :: END of Script ::::::::::::::::::::::::::::::::::::::::
107 |
108 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=sh :
109 |
--------------------------------------------------------------------------------
/lib/yi_secform.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2016-02-22
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| yi_secform.py : module for SEC forms.
5 |
6 | REFERENCES:
7 | - SEC form 13F, http://www.sec.gov/answers/form13f.htm
8 |
9 | - Jupyter notebook SEC-13F-parse.ipynb derives and debugs this module.
10 | For static view, see https://git.io/13F
11 |
12 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
13 | 2016-02-22 Replace .sort(columns=...) with .sort_values(by=...)
14 | per future deprecation warning in pandas 0.17.1
15 | 2015-12-20 python3 compatible: lib import fix.
16 | 2015-12-17 python3 compatible: fix with yi_0sys
17 | 2015-08-30 First revised version for form 13F.
18 | '''
19 |
20 | from __future__ import absolute_import, print_function
21 |
22 | import numpy as np # for numerical work.
23 | import pandas as pd # for data munging.
24 | # ^for best results, install lxml, and as fallback: bs4 & html5lib.
25 |
26 | from . import yi_0sys as system
27 |
28 |
29 | # For doctest, Stanley Druckenmiller's "Duquesne Family Office" on 2015-08-14:
30 | druck150814='http://www.sec.gov/Archives/edgar/data/1536411/000153641115000006/xslForm13F_X01/form13f_20150630.xml'
31 |
32 |
33 | def parse13f( url=druck150814 ):
34 | '''Parse SEC form 13F into a pandas dataframe.'''
35 | # url should be for so-called Information Table in html/xml format.
36 | url = url.replace('https://', 'http://')
37 | # https cannot be read by lxml, surprisingly!
38 | #
39 | # Use pandas to read in the xml page...
40 | # See http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_html.html
41 | # It searches for
elements and only for and | rows
42 | # and | elements within each |
or | element in the table.
43 | page = pd.read_html( url )
44 | # page is a list of length 4, but only the
45 | # last element of page interests us which turns out to be a dataframe!
46 | df = page[-1]
47 | #
48 | # Let's rename columns for our sanity:
49 | df.columns = [ 'stock', 'class', 'cusip', 'usd', 'size', 'sh_prin',
50 | 'putcall', 'discret', 'manager', 'vote1', 'vote2', 'vote3' ]
51 | # First three ROWS are SEC labels, not data, so delete them:
52 | df = df[3:]
53 | # reset_index to start from 0 instead of 3,
54 | # drop previous 'index', default is to retain:
55 | df.reset_index( drop=True )
56 | #
57 | # df is the pandas DATAFRAME fully representing a 13F view.
58 | # Some columns may need numeric type conversion later.
59 | return df
60 |
61 |
62 |
63 | def pcent13f( url=druck150814, top=7654321 ):
64 | '''Prune, then sort SEC 13F by percentage allocation, showing top N.
65 | >>> pcent13f( top= 7 )
66 | stock cusip usd putcall pcent
67 | 27 SPDR Gold Trust 78463V907 323626 NaN 21.81
68 | 15 Facebook Inc 30303M102 160612 NaN 10.82
69 | 29 Wells Fargo & Co 949746101 94449 NaN 6.36
70 | 31 LyondellBasell Ind's NV N53745100 74219 NaN 5.00
71 | 18 Halliburton Co 406216101 66629 NaN 4.49
72 | 16 Freeport-McMoRan Inc 35671D857 66045 NaN 4.45
73 | 8 Citigroup Inc 172967424 64907 NaN 4.37
74 | '''
75 | df = parse13f( url )
76 | # Drop irrevelant COLUMNS:
77 | df.drop( df.columns[[1, 4, 5, 7, 8, 9, 10, 11]], axis=1, inplace=True )
78 | # inplace=True available after pandas 0.13
79 | #
80 | # Convert usd to float type since it was read as string:
81 | df[['usd']] = df[['usd']].astype( float )
82 | # Gotcha: int as type will fail for NaN !
83 | # Also we need float anyways for Python2 division later.
84 | #
85 | # Sort holdings by dollar value:
86 | dfusd = df.sort_values( by=['usd'], ascending=[False] )
87 | # .sort(columns=...) to be deprecated per pandas 0.17.1
88 | # Sum total portfolio in USD:
89 | usdsum = sum( dfusd.usd )
90 | #
91 | # NEW COLUMN 'pcent' for percentage of total portfolio:
92 | dfusd['pcent'] = np.round(( dfusd.usd / usdsum ) * 100, 2)
93 | #
94 | # Selects top N positions from the portfolio:
95 | return dfusd.head( top )
96 |
97 |
98 |
99 | if __name__ == "__main__":
100 | system.endmodule()
101 |
--------------------------------------------------------------------------------
/tests/test_fred.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2016-11-06
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| test_fred : Test fecon235 yi_fred module.
5 |
6 | - Include test of index_delta_secs()
7 | - Indirect test of resample_main() via rewritten functions:
8 | daily(), monthly(), and quarterly().
9 |
10 | Testing: As of fecon235 v4, we favor pytest over nosetests, so e.g.
11 | $ py.test --doctest-modules
12 |
13 | REFERENCE
14 | pytest: https://pytest.org/latest/getting-started.html
15 | or PDF at http://pytest.org/latest/pytest.pdf
16 |
17 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
18 | 2016-11-06 First version to verify fix #6.
19 | '''
20 |
21 | from __future__ import absolute_import, print_function
22 |
23 | import numpy as np
24 | import pandas as pd
25 | from fecon235.lib import yi_0sys as system
26 | from fecon235.lib import yi_fred as fred
27 | from fecon235.lib import yi_1tools as tools
28 | #
29 | # N.B. - in this tests directory without __init__.py,
30 | # we use absolute import as if outside the fecon235 package,
31 | # not relative import (cf. modules within lib).
32 |
33 | # # Show the CSV file zdata-xau-13hj-c30.csv:
34 | # # ^created in Linux environment...
35 | #
36 | # T,XAU
37 | # 2013-03-08,1581.75
38 | # 2013-03-11,1579.0
39 | # 2013-03-12,1594.0
40 | # 2013-03-13,1589.25
41 | # 2013-03-14,1586.0
42 | # 2013-03-15,1595.5
43 | # 2013-03-18,1603.75
44 | # 2013-03-19,1610.75
45 | # 2013-03-20,1607.5
46 | # 2013-03-21,1613.75
47 | # 2013-03-22,1607.75
48 | # 2013-03-25,1599.25
49 | # 2013-03-26,1598.0
50 | # 2013-03-27,1603.0
51 | # 2013-03-28,1598.25
52 | # 2013-03-29,1598.25
53 | # 2013-04-01,1598.25
54 | # 2013-04-02,1583.5
55 | # 2013-04-03,1574.75
56 | # 2013-04-04,1546.5
57 | # 2013-04-05,1568.0
58 | # 2013-04-08,1575.0
59 | # 2013-04-09,1577.25
60 | # 2013-04-10,1575.0
61 | # 2013-04-11,1565.0
62 | # 2013-04-12,1535.5
63 | # 2013-04-15,1395.0
64 | # 2013-04-16,1380.0
65 | # 2013-04-17,1392.0
66 | # 2013-04-18,1393.75
67 |
68 |
69 | def test_yi_fred_fecon235_Read_CSV_file():
70 | '''Read CSV file then check values.'''
71 | df = fred.readfile('zdata-xau-13hj-c30.csv')
72 | # readfile disregards XAU column name:
73 | assert [ col for col in df.columns ] == ['Y']
74 | assert df.shape == (30, 1)
75 | return df
76 |
77 |
78 | xau = test_yi_fred_fecon235_Read_CSV_file()
79 | xau = tools.todf( xau, 'XAU' )
80 | # todf used to rename column.
81 |
82 |
83 | def test_yi_fred_fecon235_check_xau_DataFrame():
84 | '''Check xau dataframe.'''
85 | assert [ col for col in xau.columns ] == ['XAU']
86 | assert tools.tailvalue( xau ) == 1393.75
87 |
88 |
89 | def test_yi_fred_fecon235_check_xau_frequency():
90 | '''Check xau dataframe frequency.'''
91 | assert fred.index_delta_secs( xau ) == 86400.0
92 | # Expect min daily frequency in seconds.
93 |
94 |
95 | def test_yi_fred_fecon235_check_xau_resample_main():
96 | '''Check daily xau converted by monthly(), then daily().
97 | Demonstrates downsampling, then upsampling --
98 | thus validating fred.resample_main().
99 | Check dates produced from quarterly().
100 | >>> xaumon = fred.monthly( xau )
101 | >>> xaumon
102 | XAU
103 | T
104 | 2013-03-01 1598.25
105 | 2013-04-01 1566.50
106 | >>> xaumondaily = fred.daily( xaumon )
107 | >>> xaumondaily = xaumondaily.round(2) # for sys independence.
108 | >>> xaumondaily # expect linear interpolation.
109 | XAU
110 | T
111 | 2013-03-01 1598.25
112 | 2013-03-04 1596.74
113 | 2013-03-05 1595.23
114 | 2013-03-06 1593.71
115 | 2013-03-07 1592.20
116 | 2013-03-08 1590.69
117 | 2013-03-11 1589.18
118 | 2013-03-12 1587.67
119 | 2013-03-13 1586.15
120 | 2013-03-14 1584.64
121 | 2013-03-15 1583.13
122 | 2013-03-18 1581.62
123 | 2013-03-19 1580.11
124 | 2013-03-20 1578.60
125 | 2013-03-21 1577.08
126 | 2013-03-22 1575.57
127 | 2013-03-25 1574.06
128 | 2013-03-26 1572.55
129 | 2013-03-27 1571.04
130 | 2013-03-28 1569.52
131 | 2013-03-29 1568.01
132 | 2013-04-01 1566.50
133 | >>> xauq = fred.quarterly( xau )
134 | >>> xauq # verify if dates are quarterly.
135 | XAU
136 | T
137 | 2013-01-01 1598.25
138 | 2013-04-01 1566.50
139 | '''
140 | pass
141 |
142 |
143 |
144 | if __name__ == "__main__":
145 | system.endmodule()
146 |
--------------------------------------------------------------------------------
/tests/test_fecon235.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2018-03-10
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| test_fecon235 : Test and demonstrate fecon235 module.
5 |
6 | Jupyter notebooks are indirectly integration tests in the interactive mode.
7 | However, non-interactive testing is desirable for the developer.
8 | Also, demonstration for the casual user is needed. Thus this module.
9 |
10 | This module has code snippets to be read, and then employed as follows:
11 |
12 | $ nosetests --with-doctest # Better from our tests directory: ./smell
13 |
14 | We prefer doctest over the use of assert (with ==) in nose test functions
15 | due to its ease and readability of ascertaining dataframe output.
16 |
17 | If nose is not installed, then this will work as fallback:
18 |
19 | $ python -m doctest fecon_DEMO.py # exit 0 indicates tests passed.
20 |
21 | => As of fecon235 v4, we also favor pytest over nosetests, so e.g.
22 |
23 | $ py.test --doctest-modules [optional dir/file argument]
24 |
25 | REFERENCE:
26 | nosetests: http://nose.readthedocs.org/en/latest/usage.html
27 | pytest: https://pytest.org/latest/getting-started.html
28 | or PDF at http://pytest.org/latest/pytest.pdf
29 |
30 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
31 | 2018-03-10 Add demo for foreholt(). Test forecast() with grids=25.
32 | 2015-12-21 python3 compatible: lib import fix.
33 | Mark very slow tests with "vSlow" suffix, so
34 | $ py.test -k 'not vSlow' # Excludes such tests.
35 | 2015-12-17 python3 compatible: fix with yi_0sys
36 | 2015-09-04 Add demo for forecast().
37 | 2015-09-02 First version for get().
38 | '''
39 |
40 | from __future__ import absolute_import, print_function
41 |
42 | from fecon235.fecon235 import *
43 | # ^in one line we get essential functions from the yi_* modules,
44 | # including yi_0sys as system.
45 | #
46 | # N.B. - in this tests directory without __init__.py,
47 | # we use absolute import as if outside the fecon235 package,
48 | # not relative import (cf. modules within lib).
49 |
50 |
51 | def demo_GET_d4xau_from_FRED_vSlow():
52 | '''Test get() in fecon which uses getfred() in yi_fred module.
53 | Here we get gold quotes from the FRED database.
54 |
55 | >>> xau = get( d4xau )
56 | >>> xau['2015-07-21':'2015-07-28']
57 | Y
58 | T
59 | 2015-07-21 1105.6
60 | 2015-07-22 1088.6
61 | 2015-07-23 1097.4
62 | 2015-07-24 1080.8
63 | 2015-07-27 1100.0
64 | 2015-07-28 1096.2
65 | '''
66 | pass
67 |
68 |
69 |
70 | def demo_GET_w4cotr_metals_from_QUANDL_vSlow():
71 | '''Test get() in fecon which uses getqdl() in yi_quandl module.
72 | Thus it is an indirect test of yi_quandl_api module.
73 | Here we get the CFTC Commitment of Traders Reports
74 | for gold and silver expressed as our position indicator.
75 |
76 | >>> metals = get( w4cotr_metals )
77 | >>> metals['2015-07-21':'2015-07-28']
78 | Y
79 | Date
80 | 2015-07-21 0.458814
81 | 2015-07-28 0.461077
82 | '''
83 | pass
84 |
85 |
86 |
87 | def demo_FORECAST_m4xau_from_FRED_vSlow():
88 | '''Test forecast() in fecon which uses Holt-Winters method.
89 | Values for alpha and beta are somewhat optimized by moderate grids:
90 | alpha, beta, losspc, loss: [0.9167, 0.125, 2.486, 28.45]
91 | We use monthly gold data, and type forecast as integers
92 | to avoid doctest with floats (almost equal problem).
93 |
94 | >>> xau = get( m4xau )
95 | >>> xaufc = forecast( xau['2005-07-28':'2015-07-28'], h=6, grids=25 )
96 | >>> xaufc.astype('int')
97 | Forecast
98 | 0 1144
99 | 1 1135
100 | 2 1123
101 | 3 1112
102 | 4 1100
103 | 5 1089
104 | 6 1078
105 | '''
106 | pass
107 |
108 |
109 |
110 | def demo_foreholt_m4xau_from_FRED_vSlow():
111 | '''Test foreholt() in fecon235 which uses Holt-Winters method.
112 | Default values for alpha and beta are assumed.
113 | We use monthly gold data, and type forecast as integers
114 | to avoid doctest with floats (almost equal problem).
115 |
116 | >>> xau = get( m4xau )
117 | >>> xaufh = foreholt( xau['2005-07-28':'2015-07-28'], h=6 )
118 | >>> xaufh.astype('int')
119 | Forecast
120 | 0 1144
121 | 1 1161
122 | 2 1154
123 | 3 1146
124 | 4 1138
125 | 5 1130
126 | 6 1122
127 | '''
128 | pass
129 |
130 |
131 |
132 | def demo_groupgeoret_test_georet_Geometric_Mean_vSlow():
133 | '''Test groupget, followed by groupgeoret which depends on georet.
134 | First create a group dictionary, then retrieve...
135 |
136 | >>> fxdic = { 'EURUSD' : d4eurusd, 'USDJPY' : d4usdjpy }
137 | >>> fxdf = groupget( fxdic )
138 | >>> groupgeoret( fxdf['2010':'2015'], 256 )
139 | [[4.19, 4.63, 9.3, 256, 1565, '2010-01-01', '2015-12-31', 'USDJPY'], [-4.54, -4.08, 9.64, 256, 1565, '2010-01-01', '2015-12-31', 'EURUSD']]
140 | '''
141 | pass
142 |
143 |
144 |
145 | if __name__ == "__main__":
146 | system.endmodule()
147 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | ## BSD-License
2 |
3 | Copyright (c) 2014-2017, Adriano, http://rsvp.github.com. *All rights reserved.*
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | - Redistributions of source code must retain the above copyright notice,
9 | this list of conditions and the following disclaimer.
10 |
11 | - Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | - Neither the name of Github nor the names of its contributors
16 | may be used to endorse or promote products derived from
17 | this software without specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 |
31 | ### BSD-License APPROVALS
32 |
33 | - FSF approval: Free Software Foundation
34 | - OSI approval: Open Source Initiative
35 | - DFSG approval: Debian Free Software Guidelines
36 |
37 | Code licensed under the BSD licence can be relicensed under the GPL (is
38 | "GPL-compatible") without securing the consent of all original authors.
39 |
40 |
41 | ### Further TERMS and CONDITIONS
42 |
43 | The term "Site" below shall refer either to our web site or to the
44 | downloadable contents of the repository therein, including but not limited to
45 | software code, documentation, data, and forked versions which are accessible
46 | through the internet.
47 |
48 | We make no warranty, express or implied, concerning the service and material
49 | on our Site. The services provided by us and our third party providers are on
50 | an "AS IS" basis at your sole risk. We expressly disclaim any implied
51 | warranty of merchantability or fitness for any purpose with respect to
52 | quality, accuracy, completeness, reliability, performance, timeliness, or
53 | continued availability. We do not assume any responsibility to maintain the
54 | data and services or to supply any corrections and updates. Availability of
55 | data and services are subject to change without notice.
56 |
57 | We shall have no liability, contingent or otherwise, to you or to third
58 | parties, for the quality, accuracy, timeliness, reliability, or any other
59 | aspect of the performance of our service. In no event will we be liable for
60 | any special, indirect, incidental, or consequential damages which may be
61 | incurred or experienced on account of you using the data or our services.
62 | Furthermore, we make no warranty whatsoever to you, express or implied,
63 | regarding the security of our Site.
64 |
65 | Our service may provide external links, sponsored and maintained by third
66 | parties, solely as a convenience to you. We make no representations concerning
67 | external content. The fact that we have provided a web link does not
68 | constitute an endorsement, authorization, sponsorship, or affiliation by us
69 | with respect to the external site, its owners, or its providers. We have not
70 | completely tested information, software, or products found on other sites and
71 | therefore do not make any representations with respect to the suitability or
72 | appropriateness of their content.
73 |
74 |
75 | ### Content should not be construed as a solicitation or recommendation
76 |
77 | Our material has been prepared for informational and educational purposes
78 | only, without regard to any particular user's investment objectives, financial
79 | situation or means. We are not soliciting any action based upon it. Our
80 | material is not to be construed as a recommendation; or an offer to buy or
81 | sell; or the solicitation of an offer to buy or sell any security, financial
82 | product, or instrument.
83 |
84 | Financial instruments, such as futures and options, described on our Site may
85 | involve significant risks, and you should not enter into any transactions
86 | unless you have fully understood all such risks and independently determined
87 | that such transactions are appropriate for you. Trading strategies give rise
88 | to substantial risk and may not be suitable for you.
89 |
90 | Any discussion of risks should not be considered to be a disclosure of all
91 | risks. Although our material is based upon information that we consider
92 | reliable, we do not represent that our material is accurate, current, or
93 | complete and it should not be relied upon as such.
94 |
95 | You should neither construe any of our material as business, financial,
96 | investment, hedging, trading, legal, regulatory, tax, or accounting advice nor
97 | make our service the primary basis for any investment decisions made by or on
98 | behalf of you, your accountants, or your managed or fiduciary accounts.
99 |
100 |
101 |
--------------------------------------------------------------------------------
/tests/10-check-modules.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # bash 4.3.11(1) Linux Ubuntu 14.04.1 Date : 2016-01-06
3 | #
4 | # _______________| 10-check-modules.sh : fecon235 test.
5 | #
6 | # Usage: $ ./10-check-modules.sh
7 | # # Execute outside of package for reasons
8 | # # in the comments below.
9 | #
10 | # Dependencies: python (preferably Python 2.7 and 3 series)
11 | # Repository at https://git.io/fecon235
12 | #
13 | # CHANGE LOG
14 | # 2016-01-06 First version for fecon235 v4.
15 |
16 |
17 | # _____ PREAMBLE_v3: settings, variables, and error handling.
18 | #
19 | LC_ALL=POSIX
20 | # locale means "ASCII, US English, no special rules,
21 | # output per ISO and RFC standards."
22 | # Esp. use ASCII encoding for glob and sorting characters.
23 | shopt -s extglob
24 | # ^set extended glob for pattern matching.
25 | shopt -s failglob
26 | # ^failed pattern matching signals error.
27 | set -e
28 | # ^errors checked: immediate exit if a command has non-zero status.
29 | set -o pipefail
30 | # ^exit status on fail within pipe, not (default) last command.
31 | set -u
32 | # ^unassigned variables shall be errors.
33 | # Example of default VARIABLE ASSIGNMENT: arg1=${1:-'foo'}
34 |
35 |
36 | program=${0##*/} # similar to using basename
37 | errf=$( mktemp /dev/shm/88_${program}_tmp.XXXXXXXXXX )
38 |
39 |
40 | cleanup () {
41 | # Delete temporary files, then optionally exit given status.
42 | local status=${1:-'0'}
43 | rm -f $errf
44 | [ $status = '-1' ] || exit $status # thus -1 prevents exit.
45 | } #--------------------------------------------------------------------
46 | warn () {
47 | # Message with basename to stderr. Usage: warn "message"
48 | echo -e "\n !! ${program}: $1 " >&2
49 | } #--------------------------------------------------------------------
50 | die () {
51 | # Exit with status of most recent command or custom status, after
52 | # cleanup and warn. Usage: command || die "message" [status]
53 | local status=${2:-"$?"}
54 | cat $errf >&2
55 | cleanup -1 && warn "$1" && exit $status
56 | } #--------------------------------------------------------------------
57 | trap "die 'SIG disruption, but cleanup finished.' 114" 1 2 3 15
58 | # Cleanup after INTERRUPT: 1=SIGHUP, 2=SIGINT, 3=SIGQUIT, 15=SIGTERM
59 | trap "die 'unhandled ERR via trap, but cleanup finished.' 116" ERR
60 | # Cleanup after command failure unless it's part of a test clause.
61 | #
62 | # _______________ :: BEGIN Script ::::::::::::::::::::::::::::::::::::::::
63 |
64 |
65 | echo " :: Test integrity of fecon235.py as a module..."
66 | echo " :: all essential lib modules will also be imported in the process..."
67 | echo
68 |
69 | # This test will fail at the system level if PYTHONPATH is set incorrectly,
70 | # -m flag for module should be duly noted...
71 | python -m fecon235.fecon235 \
72 | || warn "PASSED if: FATAL 113: fecon235.py is a MODULE for import..."
73 |
74 | # This is a PERVERSE test since
75 | # running "$ python fecon235.py" WITHIN the fecon235 package
76 | # results in this peculiar Python traceback:
77 | # from .lib import yi_0sys as system
78 | # ValueError: Attempted relative import in non-package
79 | # Trying to fix any relative import is futile because
80 | # the fecon235.py module should be run OUTSIDE the package. <=!!
81 | # See full explanation below.
82 | # So actually our relative import style is valid Python 2 and 3.
83 | #
84 | # Fortunately, the tests directory is NOT a fecon235 package.
85 | # So if we get the system.endmodule() message,
86 | # then there are no interpreter errors, and all is good.
87 |
88 | echo " :: Please check for unusual TRACEBACKS -- despite exit 0 signal."
89 | echo " :: See https://git.io/fecon-intro for introductory help."
90 |
91 |
92 | cleanup # Instead of: trap arg EXIT
93 | # _______________ EOS :: END of Script ::::::::::::::::::::::::::::::::::::::::
94 |
95 |
96 | # PEP-3122 describes the PROBLEM: "Because of how name resolution works for
97 | # relative imports in a world where PEP 328 is implemented, the ABILITY TO
98 | # EXECUTE MODULES WITHIN A PACKAGE CEASES BEING POSSIBLE. This failing stems
99 | # from the fact that the module being executed as the "main" module replaces its
100 | # __name__ attribute with "__main__" instead of leaving it as the absolute name
101 | # of the module. This breaks import's ability to resolve relative imports from
102 | # the main module into absolute names."
103 | # https://www.python.org/dev/peps/pep-3122/
104 | #
105 | # In other words, __main__ doesn't contain any information about package
106 | # structure. And that is why python complains about relative import in
107 | # non-package error.
108 | #
109 | # But note that in PEP-328 Guido has pronounced that relative imports will use
110 | # leading dots, provided that relative imports always use "from <> import" ;
111 | # "import <>" shall always absolute. https://www.python.org/dev/peps/pep-0328/
112 | # Thus the fecon235.py module conforms to official guidelines.
113 | #
114 | # SOLUTION: by using the -m switch you provide the package structure information
115 | # to Python so that it can resolve the relative imports successfully.
116 | #
117 | # Hope this helps someone who is fighting the relative imports problem, because
118 | # going through PEP is really not fun.
119 |
120 |
121 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=sh :
122 |
--------------------------------------------------------------------------------
/tests/test_boltzmann.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2017-07-09
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| test_boltzmann : Test fecon235 ys_prtf_boltzmann module.
5 |
6 | Doctests display at lower precision since equality test becomes fuzzy across
7 | different systems if full floating point representation is used.
8 |
9 | Testing: As of fecon235 v4, we favor pytest over nosetests, so e.g.
10 | $ py.test --doctest-modules
11 |
12 | REFERENCE
13 | pytest: https://pytest.org/latest/getting-started.html
14 | or PDF at http://pytest.org/latest/pytest.pdf
15 |
16 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
17 | 2017-07-09 First version based on notebook nb/prtf-boltzmann-1.ipynb
18 | '''
19 |
20 | from __future__ import absolute_import, print_function, division
21 | import numpy as np
22 |
23 | from fecon235 import fecon235 as fe
24 | from fecon235.lib import ys_prtf_boltzmann as boltz
25 | #
26 | # N.B. - In this tests directory without __init__.py,
27 | # we use absolute import as if outside the fecon235 package,
28 | # not relative import (cf. modules within lib).
29 |
30 |
31 | # Covariance matrix (but we will not use np matrix class) via:
32 | # >>> prices = fe.groupget( fe.world4d, maxi=5650 )
33 | # :: Retrieved from Google Finance: SPY
34 | # :: Retrieved from Google Finance: EEM
35 | # :: Retrieved from Google Finance: EZU
36 | # :: Retrieved from Google Finance: GLD
37 | # :: Retrieved from Google Finance: EWJ
38 | # >>> V = fe.covdiflog( prices['2011-01-01':'2017-06-26'], lags=1 )
39 | V = np.array([[ 8.48312099e-05, 1.02917158e-04, 1.13943470e-04,
40 | -2.79505009e-06, 7.46173310e-05],
41 | [ 1.02917158e-04, 1.87393335e-04, 1.63235156e-04,
42 | 1.80039246e-05, 1.06210719e-04],
43 | [ 1.13943470e-04, 1.63235156e-04, 2.26817214e-04,
44 | 8.18776853e-06, 1.15561266e-04],
45 | [ -2.79505009e-06, 1.80039246e-05, 8.18776853e-06,
46 | 1.13760941e-04, -2.73835320e-08],
47 | [ 7.46173310e-05, 1.06210719e-04, 1.15561266e-04,
48 | -2.73835320e-08, 1.34652717e-04]])
49 |
50 |
51 | # Correlation matrix derived by cov2cor(V), but tested elsewhere:
52 | corr = np.array([[ 1., 0.82, 0.82, -0.03, 0.7 ],
53 | [ 0.82, 1., 0.79, 0.12, 0.67],
54 | [ 0.82, 0.79, 1., 0.05, 0.66],
55 | [-0.03, 0.12, 0.05, 1., 0. ],
56 | [ 0.7, 0.67, 0.66, 0., 1. ]])
57 |
58 |
59 | # Sample weights from a Global Minimum Variance Portfolio:
60 | globalw = np.array([[ 0.87034542],
61 | [-0.2267291 ],
62 | [-0.19612603],
63 | [ 0.40540278],
64 | [ 0.14710693]])
65 |
66 |
67 | def test_ys_prtf_boltzmann_fecon235_weighcov():
68 | '''Check whether weighcov() function produces global weights.'''
69 | assert np.allclose( boltz.weighcov(V), globalw )
70 |
71 |
72 | def test_ys_prtf_boltzmann_fecon235_weighcov_LESSON():
73 | '''Check weighcov() function using related, but wrong, input.
74 | Only for testing purposes, we input the correlation matrix instead
75 | of the covariance matrix V which officially should be the argument.
76 | LESSON: weighcov(corr) != weighcov(V)
77 | i.e. global weights CANNOT be computed from correlation,
78 | we need the messy numbers from the covariance matrix.
79 | (Compare globalw above with globalw_corr here.)
80 | >>> globalw_corr = boltz.weighcov( corr )
81 | >>> np.round( globalw_corr, 2 )
82 | array([[ 0.26],
83 | [-0.06],
84 | [ 0.1 ],
85 | [ 0.46],
86 | [ 0.24]])
87 | '''
88 | pass
89 |
90 |
91 | def test_ys_prtf_boltzmann_fecon235_trimit():
92 | '''Check trimit() function.
93 | >>> weights = boltz.trimit( globalw, floor=0.01, level=0 )
94 | >>> np.round( weights, 4 )
95 | array([[ 0.8703],
96 | [ 0. ],
97 | [ 0. ],
98 | [ 0.4054],
99 | [ 0.1471]])
100 | '''
101 | pass
102 |
103 |
104 | def test_ys_prtf_boltzmann_fecon235_rentrim():
105 | '''Check rentrim() function, but also renormalize() indirectly.
106 | >>> weights = boltz.rentrim( globalw, floor=0.01, level=0 )
107 | >>> np.round( weights, 4 )
108 | array([[ 0.6117],
109 | [ 0. ],
110 | [ 0. ],
111 | [ 0.2849],
112 | [ 0.1034]])
113 | '''
114 | pass
115 |
116 |
117 | def test_ys_prtf_boltzmann_fecon235_boltzportfolio_vSlow():
118 | '''Check weighcov() indirectly through boltzportfolio() function.
119 | ___ATTN___ Very Slow test due to data download, and
120 | could fail years later due to data vendor; esp. check maxi and dates.
121 | >>> prices = fe.groupget( fe.world4d, maxi=5650 )
122 | :: Retrieved from Google Finance: SPY
123 | :: Retrieved from Google Finance: EEM
124 | :: Retrieved from Google Finance: EZU
125 | :: Retrieved from Google Finance: GLD
126 | :: Retrieved from Google Finance: EWJ
127 | >>> prtf = boltz.boltzportfolio( prices['2011-01-01':'2017-06-26'], yearly=256, temp=55, floor=0.01, level=0, n=2 )
128 | >>> prtf
129 | [8.59, [[0.95, 9.16, 'America'], [0.0, -4.96, 'Emerging'], [0.0, -1.41, 'Europe'], [0.03, -4.09, 'Gold'], [0.02, 1.33, 'Japan']]]
130 | '''
131 | pass
132 |
133 |
134 | if __name__ == "__main__":
135 | system.endmodule()
136 |
--------------------------------------------------------------------------------
/bin/docker/rsvp_fecon235/Dockerfile:
--------------------------------------------------------------------------------
1 | # Dockerfile Date : 2016-10-25
2 | #
3 | # _______________| Jupyter notebooks from fecon235 using Docker containers
4 | # Computational data tools for FINANCIAL ECONOMICS
5 | # https://hub.docker.com/r/rsvp/fecon235
6 | #
7 | # Usage: To BUILD this Dockerfile:
8 | # $ docker build -t IMAGENAME -f Dockerfile .
9 | # where the period implies current directory.
10 | #
11 | # Examples: To RUN the image:
12 | # $ docker run -p 8888:8888 -it IMAGENAME
13 | # which should yield a prompt, then as root:
14 | # % nbstart
15 | # and interact with browser at http://localhost:8888
16 | #
17 | # Tips: - Pull the latest git commits from /opt/rsvp/fecon235
18 | # - Learn about data volumes to persist data.
19 | #
20 | # Dependencies: Docker images: continuumio/{anaconda,anaconda3}
21 | # https://hub.docker.com/r/continuumio/anaconda
22 | # https://hub.docker.com/r/continuumio/anaconda3
23 | # The "3" indicates python3, else it is python2 based;
24 | # see FROM line at Begin Script.
25 | # Python code: https://github.com/rsvp/fecon235
26 | #
27 | # CHANGE LOG
28 | # 2016-10-25 Use own tagged base: rsvp/ana2-pd0181 for replication and tests.
29 | # Improve entry UI. Change SHELL from sh to bash.
30 | # Introduce REBUILD line to induce new clone of fecon235.
31 | # Reorder commands to optimize for Docker's copy-on-write.
32 | # 2016-10-09 First stable version produced rsvp/fecon:v4.16.0525
33 |
34 | # _______________ :: BEGIN Script ::::::::::::::::::::::::::::::::::::::::
35 |
36 | # Tip: Obtain the base image in itself by, for example:
37 | # $ docker pull continuumio/anaconda:4.2.0
38 | FROM rsvp/ana2-pd0181
39 | # The size of this image alone is just over 2 GB -- so patience.
40 | # We may switch to miniconda and install only needed packages.
41 |
42 | MAINTAINER Adriano rsvp.github.com
43 | # Lead developer for fecon235
44 |
45 |
46 | # __________ SET-UP
47 |
48 | SHELL ["/bin/bash", "-c"]
49 | # Other RUN will use /bin/sh as default (ugh!)
50 |
51 | # Update the repository sources:
52 | # RUN apt-get update
53 | # ... advisable before any apt-get installs for the Linux environment.
54 |
55 | # Make DIRECTORIES: [Hassle of setting up an USER can be avoided.]
56 | RUN mkdir -p /opt/rsvp/bin /opt/rsvp/dock
57 | # /opt is designed for third-party software.
58 |
59 | # Set ENVIRONMENT variables for Bash and Python:
60 | ENV PATH /opt/rsvp/bin:$PATH
61 | ENV PYTHONPATH /opt/rsvp/bin:/opt/rsvp:$PYTHONPATH
62 |
63 | # conda is like pip for Python, but for anaconda* environments.
64 | # Use conda to INSTALL jupyter and other Python dependencies:
65 | RUN /opt/conda/bin/conda install \
66 | jupyter \
67 | pandas-datareader \
68 | -y --quiet
69 |
70 |
71 | RUN echo " :: REBUILD BELOW, marker: CHANGE THIS LINE: 2016-10-17 ::::::::"
72 | # Editing this line will induce new clone of fecon235.
73 |
74 | # Install fecon235 REPOSITORY from GitHub:
75 | RUN cd /opt/rsvp && git clone https://github.com/rsvp/fecon235.git
76 |
77 |
78 | # Expose the traditional port for Jupyter notebook:
79 | # EXPOSE 8888
80 | # ... we shall instead use "-p 8888:8888" at the command line instead.
81 |
82 |
83 | # Write NOTEBOOK START COMMAND as script: [Using "alias" NOT advisable!]
84 | #
85 | RUN echo '/opt/conda/bin/jupyter notebook --notebook-dir=/opt/rsvp/fecon235/nb \
86 | --ip="0.0.0.0" --port=8888 --no-browser' > /opt/rsvp/bin/nbstart \
87 | && chmod 755 /opt/rsvp/bin/nbstart
88 | #
89 | # DOES NOT WORK: --ip="127.0.0.1" OR --ip="localhost"
90 | # Jupyter docs use: --ip="0.0.0.0" which means all IPv4 addresses on local machine.
91 | # Using http://localhost:8888 on your browser should still work.
92 | #
93 | # Anaconda usess: --ip="*" which generates stern security warnings, but it's for
94 | # Docker Machine VM which will interface at: http://:8888,
95 |
96 |
97 | # Set the MAIN command for this image: [For TESTING command directly.]
98 | # ENTRYPOINT /opt/conda/bin/jupyter notebook --notebook-dir=/opt/rsvp/fecon235/nb \
99 | # --ip="0.0.0.0" --port=8888 --no-browser
100 |
101 |
102 | # Set the MAIN command for this image, with user instructions:
103 | ENTRYPOINT cd /opt/rsvp/fecon235 \
104 | && echo ' PORT required: $ docker run -p 8888:8888 -it rsvp/fecon235' \
105 | && echo ' ' \
106 | && echo ' :: Welcome! Git repository cloned at: /opt/rsvp/fecon235' \
107 | && echo ' :: Run "nbstart" for Jupyter notebooks, then interact with' \
108 | && echo ' :: host browser at http://localhost:8888' \
109 | && echo ' ' \
110 | && /bin/bash
111 | # ... should yield root prompt, all set for action, e.g. nbstart
112 | # (Using WORKDIR only scopes within a Dockerfile, not in a container.)
113 |
114 | # Specify default argument to Entrypoint:
115 | # CMD ['-c', 'nbstart']
116 | # ... fallback when an argument is not given on command line by user,
117 | # however, the container's Bash prompt will not be accessible
118 | # after the execution of 'nbstart'.
119 | # Note that only one CMD allowed per Dockerfile, and it can contain an
120 | # absolute command (not taking an argument from the command line).
121 |
122 |
123 | # _______________ EOS :: END of Script ::::::::::::::::::::::::::::::::::::::::
124 |
125 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=conf :
126 |
--------------------------------------------------------------------------------
/tests/smell:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # bash 4.3.11(1) Linux Ubuntu 14.04.1 Date : 2015-09-04
3 | #
4 | # _______________| smell : run nosetests for fecon235.
5 | #
6 | # Usage: $ ./smell [optional: file with path from project top]
7 | # # ^ ASSUMES execution from the tests directory.
8 | #
9 | # Dependencies: nosetests (Python nose)
10 | #
11 | # Reference: http://nose.readthedocs.org/en/latest/usage.html
12 | #
13 | # CHANGE LOG
14 | # 2015-09-04 Fix bug when nosetests generates exit 1.
15 | # Generate our own exit code 114 when tests fail.
16 | # 2015-09-03 First version.
17 |
18 |
19 | # _____ PREAMBLE_v3: settings, variables, and error handling.
20 | #
21 | LC_ALL=POSIX
22 | # locale means "ASCII, US English, no special rules,
23 | # output per ISO and RFC standards."
24 | # Esp. use ASCII encoding for glob and sorting characters.
25 | shopt -s extglob
26 | # ^set extended glob for pattern matching.
27 | shopt -s failglob
28 | # ^failed pattern matching signals error.
29 | set -e
30 | # ^errors checked: immediate exit if a command has non-zero status.
31 | set -o pipefail
32 | # ^exit status on fail within pipe, not (default) last command.
33 | set -u
34 | # ^unassigned variables shall be errors.
35 | # Example of default VARIABLE ASSIGNMENT: arg1=${1:-'foo'}
36 |
37 | arg1=${1:-''}
38 |
39 |
40 | program=${0##*/} # similar to using basename
41 | memf=$( mktemp /dev/shm/88_${program}_tmp.XXXXXXXXXX )
42 | errf=$( mktemp /dev/shm/88_${program}_tmp.XXXXXXXXXX )
43 |
44 |
45 | cleanup () {
46 | # Delete temporary files, then optionally exit given status.
47 | local status=${1:-'0'}
48 | rm -f $memf $errf
49 | [ $status = '-1' ] || exit $status # thus -1 prevents exit.
50 | } #--------------------------------------------------------------------
51 | warn () {
52 | # Message with basename to stderr. Usage: warn "message"
53 | echo -e "\n !! ${program}: $1 " >&2
54 | } #--------------------------------------------------------------------
55 | die () {
56 | # Exit with status of most recent command or custom status, after
57 | # cleanup and warn. Usage: command || die "message" [status]
58 | local status=${2:-"$?"}
59 | cat $errf >&2
60 | cleanup -1 && warn "$1" && exit $status
61 | } #--------------------------------------------------------------------
62 | trap "die 'SIG disruption, but cleanup finished.' 114" 1 2 3 15
63 | # Cleanup after INTERRUPT: 1=SIGHUP, 2=SIGINT, 3=SIGQUIT, 15=SIGTERM
64 | trap "die 'unhandled ERR via trap, but cleanup finished.' 116" ERR
65 | # Cleanup after command failure unless it's part of a test clause.
66 | #
67 | # _______________ :: BEGIN Script ::::::::::::::::::::::::::::::::::::::::
68 |
69 |
70 | logf='tests/smell-tmp.log'
71 | SECONDS=0
72 | path="$(pwd)"
73 | dir="${path##*/}"
74 |
75 |
76 | instruct () {
77 | echo " !! Please go to the tests directory, "
78 | echo " !! and then execute this program, ./$program "
79 | }
80 |
81 |
82 |
83 | if [ "$dir" = 'tests' ] ; then
84 | cd ..
85 | # ^^ Now at PROJECT TOP DIRECTORY.
86 | warn "STAND-BY, testing may take some time..."
87 | #
88 | nosetests --with-doctest --doctest-tests $arg1 > $memf 2>&1 \
89 | || true
90 | # Use options, per below, instead of config file.
91 | # nosetests outputs to stderr!
92 | # nosetests generate exit 1 if a test fails,
93 | # so || will ignore that for now, but exit 114 later.
94 | timed=$SECONDS
95 | less $memf
96 | # ^page log, else possibly too many error messages.
97 | echo " :: $program took $timed SECONDS." >> $memf
98 | cp $memf $logf
99 | echo
100 | echo " :: $program took $timed SECONDS, results at: $logf "
101 | else
102 | instruct > $errf
103 | die "Current directory $dir yields incorrect relative path." 113
104 | fi
105 |
106 |
107 | # FINALLY, generate our own exit code if some test failed:
108 | grep '^FAILED' $memf > /dev/null && die "Found FAILED test(s)." 114
109 |
110 |
111 | cleanup # Instead of: trap arg EXIT
112 | # _______________ EOS :: END of Script ::::::::::::::::::::::::::::::::::::::::
113 |
114 |
115 | # __________ NOTES on nose with doctest [ARCHIVE]
116 | #
117 | # nose outputs to stderr, so e.g. to page:
118 | #
119 | # $ nosetests --with-doctest --doctest-tests 2>&1 | more
120 | #
121 | # $ nosetests --plugins
122 | #
123 | # Plugin doctest
124 | # score: 100
125 | # Activate doctest plugin to find and run doctests in non-test
126 | # modules.
127 | #
128 | # Options:
129 | # --with-doctest
130 | # Enable plugin Doctest: Activate doctest plugin to find and run
131 | # doctests in non-test modules. [NOSE_WITH_DOCTEST]
132 | # --doctest-tests
133 | # Also look for doctests in test modules. Note that classes, methods
134 | # and functions should have either doctests or non-doctest tests,
135 | # not both. [NOSE_DOCTEST_TESTS]
136 | # --doctest-extension
137 | # Also look for doctests in files with this extension
138 | # [NOSE_DOCTEST_EXTENSION]
139 | # --doctest-result-variable
140 | # Change the variable name set to the result of the last interpreter
141 | # command from the default '_'. Can be used to avoid conflicts with
142 | # the _() function used for text translation.
143 | # [NOSE_DOCTEST_RESULT_VAR]
144 | # --doctest-fixtures
145 | # Find fixtures for a doctest file in module with this name appended
146 | # to the base name of the doctest file
147 | # --doctest-options
148 | # Specify options to pass to doctest. Eg.
149 | # '+ELLIPSIS,+NORMALIZE_WHITESPACE'
150 |
151 |
152 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=sh :
153 |
--------------------------------------------------------------------------------
/lib/yi_matrix.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2017-06-19
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| yi_matrix.py : Linear algebra module
5 |
6 | Usage: To easily invert a matrix, use invert() which includes
7 | testing for ill-conditioned matrix and fallback to
8 | computing the Moore-Penrose pseudo-inverse.
9 |
10 | !!=> IRONY: For numpy work, we want to DISCOURAGE the use of np.matrix,
11 | which is a subclass of np.ndarray, since their
12 | interoperations may produce unexpected results.
13 |
14 | - The np.matrix subclass is confined to 2-D matrices.
15 | - Sticking with array constructs:
16 | Operator "*" means element-wise multiplication.
17 | For matrix multiplication, using .dot() is best,
18 | since operator "@" originates from Python 3.5.
19 | - For our arguments, "mat" is mathematically a matrix,
20 | but not necessarily designed for subclass np.matrix.
21 | - We explicitly avoid np.matrix.I to calculate inverse.
22 | - We will assume matrices are of type np.ndarray.
23 |
24 | Tests: see tests/test_matrix.py, esp. for output examples.
25 |
26 | REFERENCES
27 | - Numpy, https://docs.scipy.org/doc/numpy-dev/user/quickstart.html
28 | - Gilbert Strang, 1980, Linear Algebra and Its Applications, 2nd ed.
29 |
30 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
31 | 2017-06-19 Add cov2cor().
32 | 2017-06-17 First version to numerically understand numpy inverse methods.
33 | '''
34 |
35 | from __future__ import absolute_import, print_function, division
36 | import numpy as np
37 | from . import yi_0sys as system
38 |
39 |
40 | RCOND = 1e-15
41 | # Cutoff for small singular values.
42 |
43 |
44 | def is_singular( mat ):
45 | '''Just test whether matrix is singular (thus not invertible).
46 | Mathematically, iff det(mat)==0 : NOT recommended numerically.
47 | If the condition number is very large, then the matrix is said to
48 | be "ill-conditioned." Practically such a matrix is almost singular,
49 | and the computation of its inverse, or solution of a linear system
50 | of equations is prone to large numerical errors.
51 | A matrix that is not invertible has condition number equal to
52 | infinity mathematically, but here for numerical purposes,
53 | "ill-conditioned" shall mean condition number exceeds 1/epsilon.
54 | '''
55 | # Ref: https://en.wikipedia.org/wiki/Condition_number
56 | # We shall use epsilon for np.float64 data type
57 | # since Python’s floating-point numbers are usually 64-bit.
58 | # >>> np.finfo(np.float32).eps
59 | # 1.1920929e-07
60 | # >>> sys.float_info.epsilon
61 | # 2.220446049250313e-16
62 | # >>> np.finfo(np.float64).eps
63 | # 2.2204460492503131e-16
64 | # >>> 1/np.finfo(np.float64).eps
65 | # 4503599627370496.0
66 | if np.linalg.cond(mat) < 1/ np.finfo(np.float64).eps:
67 | # ^2-norm, computed directly using the SVD.
68 | return False
69 | else:
70 | # Intentionally, no error handling here.
71 | return True
72 |
73 |
74 | def invert_caution( mat ):
75 | '''Compute the multiplicative inverse of a matrix.
76 | Numerically np.linalg.inv() is generally NOT suitable,
77 | especially if the matrix is ill-conditioned,
78 | but it executes faster than invert_pseudo():
79 | np.linalg.inv() calls numpy.linalg.solve(mat, I)
80 | where I is identity and uses LAPACK LU FACTORIZATION.
81 | '''
82 | # Curiously np.linalg.inv() does not test this beforehand:
83 | if is_singular(mat):
84 | system.die("invert_caution(): matrix is SINGULAR.")
85 | else:
86 | # LinAlgError if mat is not square.
87 | return np.linalg.inv( mat )
88 |
89 |
90 | def invert_pseudo( mat, rcond=RCOND ):
91 | '''Compute the pseudo-inverse of a matrix.
92 | If a matrix is invertible, its pseudo-inverse will be its inverse.
93 | Moore-Penrose algorithm here uses SINGULAR-VALUE DECOMPOSITION (SVD).
94 | '''
95 | # Ref: https://en.wikipedia.org/wiki/Moore–Penrose_pseudoinverse
96 | # Mathematically, pseudo-inverse (a.k.a. generalized inverse) is defined
97 | # and unique for all matrices whose entries are real or complex numbers.
98 | # LinAlgError if SVD computation does not converge.
99 | return np.linalg.pinv( mat, rcond )
100 |
101 |
102 | def invert( mat, rcond=RCOND ):
103 | '''Compute the inverse, or pseudo-inverse as fallback, of a matrix.'''
104 | try:
105 | # Faster version first, with is_singular() test...
106 | return invert_caution( mat )
107 | except:
108 | # ... so mat is probably singular:
109 | system.warn("ILL-CONDITION: invert() may output pseudo-nonsense.")
110 | # How did we get here? The problem is most likely collinearity.
111 | return invert_pseudo( mat, rcond )
112 |
113 |
114 | def cov2cor( cov, n=6 ):
115 | '''Covariance array to correlation array, n-decimal places.
116 | Outputs "Pearson product-moment CORRELATION COEFFICIENTS."
117 | '''
118 | # https://en.wikipedia.org/wiki/Covariance_matrix
119 | darr = np.diagonal( cov )
120 | # ^get diagonal elements of cov into a pure array.
121 | # Numpy docs says darr is not writeable, OK.
122 | D = np.diag( 1.0/np.sqrt(darr) )
123 | # ^creates diagonal square "matrix" but not of subclass np.matrix.
124 | cor = D.dot(cov).dot(D)
125 | return np.round( cor, n )
126 |
127 |
128 | if __name__ == "__main__":
129 | system.endmodule()
130 |
--------------------------------------------------------------------------------
/lib/yi_stocks.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2017-02-06
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| yi_stocks.py : Access stock quotes.
5 |
6 | We define procedures to access stock quotes FREELY and directly from
7 | Yahoo Finance or Google Finance. (Quandl's free services uses
8 | these sources indirectly, and since we want to avoid paying their
9 | premium pro vendors, we shall use the convenient pandas API.)
10 |
11 |
12 | Usage: df = getstock( 's4code', 7 )
13 | # ^one week.
14 | # ^begin with s4,
15 | # code is SYMBOL in lower case.
16 |
17 |
18 | Dependencies: pandas-datareader (Another package for pandas >= 0.17)
19 | # Our code still works for older pandas
20 | # by importing deprecated pandas.io
21 | # instead of pandas_datareader.
22 |
23 | REFERENCES:
24 |
25 | - pandas Remote Data Access (also for World Bank data)
26 | http://pandas.pydata.org/pandas-docs/stable/remote_data.html
27 |
28 | - Computational tools for pandas
29 | http://pandas.pydata.org/pandas-docs/stable/computation.html
30 |
31 | - Wes McKinney, 2013, _Python for Data Analysis_.
32 |
33 |
34 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
35 | 2017-02-06 Use names() within getstocks() to standardize names.
36 | 2015-12-20 python3 compatible: lib import fix.
37 | 2015-12-17 python3 compatible: fix with yi_0sys
38 | 2015-11-22 Test on python 2.7.10, IPython 4.0.0, pandas_datareader 0.2.0
39 | As of pandas v0.17, pandas.io is deprecated and
40 | moved to a new package "pandas-datareader",
41 | but imported as "pandas_datareader".
42 | 2015-09-13 First version based on yi_quandl module.
43 | '''
44 |
45 | from __future__ import absolute_import, print_function
46 |
47 | import datetime # pddata necessity.
48 | try:
49 | import pandas_datareader.data as pddata
50 | # for pandas 0.17 and above
51 | except:
52 | import pandas.io.data as pddata
53 | # For old deprecated pandas
54 |
55 | # In pandas 0.17.0, the sub-package pandas.io.data will be removed
56 | # in favor of a separately installable pandas-datareader package.
57 | # This will allow the data modules to be independently updated
58 | # to your pandas installation. The API for pandas-datareader v0.1.1
59 | # is the same as in pandas v0.16.1. (GH8961)
60 |
61 | from . import yi_0sys as system
62 | from . import yi_1tools as tools
63 |
64 |
65 | # __________ Convenient ABBREVIATIONS for less typing of quotes:
66 | T = 'T' # Generic time index.
67 | Y = 'Y' # GENERIC FIRST COLUMN name herein.
68 | y = 'Y' # GENERIC FIRST COLUMN name herein.
69 |
70 |
71 | # __________ Favorite ABBREVIATIONS as variables:
72 | s4spx = 's4spy' # Largest S&P500 ETF.
73 |
74 |
75 |
76 | def stock_decode( slang ):
77 | '''Validate and translate slang string into vendor stock code.
78 | Our short slang must be in all lower case starting with s4,
79 | e.g. 's4spy' with SYMBOL in lower case.
80 |
81 | Using slang helps to avoid collision in our larger namespace.
82 | '''
83 | if slang.isupper() or slang[:2] != 's4':
84 | # So if given argument is in all CAPS,
85 | # or does not begin with 's4'
86 | raise ValueError('Stock slang argument is invalid.')
87 | else:
88 | try:
89 | symbol = slang[2:].upper()
90 | except:
91 | raise ValueError('Stock slang argument is invalid.')
92 | return symbol
93 |
94 |
95 | def stock_all( slang, maxi=3650 ):
96 | '''slang string retrieves ALL columns for single stock.
97 |
98 | The slang string consists of 's4' + symbol, all in lower case,
99 | e.g. 's4spy' for SPY.
100 |
101 | maxi is set to default of ten years past data.
102 | '''
103 | # Typical: start = datetime.datetime(2013, 1, 20)
104 | # but we just want the most current window of data.
105 | now = datetime.datetime.now()
106 | end = now + datetime.timedelta( days=1 )
107 | # ^add just to be safe about timezones.
108 | start = end - datetime.timedelta( days=maxi )
109 | # Date offsets are chronological days,
110 | # NOT trading days.
111 | symbol = stock_decode( slang )
112 | #
113 | # MAIN: use Yahoo Finance before Google Finance:
114 | try:
115 | df = pddata.DataReader( symbol, 'yahoo', start, end )
116 | print(" :: Retrieved from Yahoo Finance: " + symbol )
117 | except:
118 | df = pddata.DataReader( symbol, 'google', start, end )
119 | print(" :: Retrieved from Google Finance: " + symbol)
120 | return df
121 |
122 |
123 | def stock_one( slang, maxi=3650, col='Close' ):
124 | '''slang string retrieves SINGLE column for said stock.
125 | Available col include: Open, High, Low, Close, Volume
126 | '''
127 | df = stock_all( slang, maxi )
128 | # return just a single column dataframe:
129 | return tools.todf( df[[ col ]] )
130 |
131 |
132 | def getstock( slang, maxi=3650 ):
133 | '''Retrieve stock data from Yahoo Finance or Google Finance.
134 | maxi is the number of chronological, not trading, days.
135 | We can SYNTHESIZE a s4 slang by use of string equivalent arg.
136 | '''
137 | if False:
138 | pass
139 | elif False:
140 | pass
141 | else:
142 | df = stock_one( slang, maxi, 'Close' )
143 | #
144 | # _Give default fecon235 names to column and index:
145 | df = tools.names( df )
146 | # Finally NO NULLS, esp. for synthetics derived from
147 | # overlapping indexes (note that readfile does
148 | # fillna with pad beforehand):
149 | return df.dropna()
150 |
151 |
152 | if __name__ == "__main__":
153 | system.endmodule()
154 |
--------------------------------------------------------------------------------
/lib/ys_mlearn.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2017-06-09
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| ys_mlearn.py : Machine learning tools
5 |
6 | - softmax() for cross-entropy, MLE, neural networks, Boltzmann portfolio.
7 | softmax_sort() for ordering and filtering info on the probabilities.
8 |
9 |
10 | REFERENCES
11 | - David J.C. MacKay (2008), Information theory, Inference, and Learning
12 | Algorithms, 7th printing from Cambridge U. Press.
13 |
14 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
15 | 2017-06-09 Revise tau dependence on arrstable mean, not second max.
16 | 2017-06-07 Add softmax_sort() with filter and renormalization.
17 | 2017-06-06 First fecon235 version.
18 | '''
19 |
20 | from __future__ import absolute_import, print_function, division
21 |
22 | import numpy as np
23 | from operator import itemgetter
24 | from . import yi_1tools as tools
25 |
26 |
27 | def softmax( it, temp=55, n=4 ):
28 | '''Softmax probabilities for iterable where temp sets temperature tau.
29 | Temperature tau is set as a temp percent of ensemble mean so that
30 | the scaling of tau works well across many different scenarios.
31 | Experiment with temp around 40 to 70; higher temp (100+)
32 | will make it-scores more equi-probable, whereas probabilities
33 | can be sharpened by decreasing temp towards 1.
34 | Setting temp to 0 results in generic softmax without temperature.
35 | Results are rounded to n decimal places.
36 | >>> softmax( [ 16, 8, 4, 0, -8, -16 ], temp=200, n=4 )
37 | [0, 16, 0.2598, 200, [0.2598, 0.2001, 0.1757, 0.1542, 0.1188, 0.0915]]
38 | >>> softmax( [ 16, 8, 4, 0, -8, -16 ], temp=50, n=4 )
39 | [0, 16, 0.5733, 50, [0.5733, 0.2019, 0.1198, 0.0711, 0.0251, 0.0088]]
40 | >>> softmax( [ 16, 8, 4, 0, -8, -16 ], temp=30, n=4 )
41 | [0, 16, 0.7773, 30, [0.7773, 0.1365, 0.0572, 0.024, 0.0042, 0.0007]]
42 | >>> softmax( [ 16, 8, 4, 0, -8, -16 ], temp=1, n=4 )
43 | [0, 16, 1.0, 1, [1.0, 0.0, 0.0, 0.0, 0.0, 0.0]]
44 | >>> softmax( [ 16, 8, 4, 0, -8, -16 ], temp=0, n=4 )
45 | [0, 16, 0.9997, 0, [0.9997, 0.0003, 0.0, 0.0, 0.0, 0.0]]
46 | >>> softmax( [ 16, 16 ], temp=200, n=4 )
47 | [0, 16, 0.5, 200, [0.5, 0.5]]
48 | >>> softmax( [ 16, 15, -8 ], temp=50, n=4 )
49 | [0, 16, 0.5587, 50, [0.5587, 0.4395, 0.0018]]
50 | '''
51 | # Reference: https://compute.quora.com/What-is-softmax
52 | # Convert iterable to numpy array, then find index of its maximum value:
53 | arr = tools.toar( it )
54 | idmax = np.argmax( arr )
55 | hardmax = arr[idmax]
56 | # Without loss of generality for mathematically defined softmax,
57 | # subtracting an arbitrary constant from each it-element
58 | # always helps NUMERICAL STABILITY, so let it be hardmax:
59 | arrstable = arr - hardmax
60 | # Important to note arrstable will consist of maximum(s) represented
61 | # by zero, and all other values will be necessarily negative.
62 | if temp > 0:
63 | avg = np.mean(arrstable)
64 | if avg:
65 | tau = abs( avg * (temp/100.0) )
66 | # Let temperature be POSITIVE and temp percent of ensemble mean.
67 | else:
68 | # Edge case: avg will be zero if it-scores are all equal,
69 | # which implies they are equi-probable, so any tau should do,
70 | # but tau must be NON-ZERO to avoid division error next.
71 | tau = 1.0
72 | else:
73 | # Whenever temp is set to 0, False, or None => GENERIC softmax.
74 | # Also negative temp will be treated as generic softmax.
75 | temp = 0 # Prefer the numerical eqivalent for return below.
76 | tau = 1.0
77 | # MATHEMATICALLY, (Boltzmann) softmax is defined as follows:
78 | expit = np.exp( arrstable / tau )
79 | sum_expit = np.sum( expit )
80 | softmax_exact = expit / sum_expit
81 | # roundit will output a list, not an array:
82 | softmax_approx = tools.roundit( softmax_exact, n, echo=False )
83 | hardprob = softmax_approx[idmax]
84 | return [ idmax, hardmax, hardprob, temp, softmax_approx ]
85 |
86 |
87 | # __________ SOFTMAX USAGE NOTES
88 | # softmax_sort() is obviously slower to compute than softmax().
89 | # They serve different purposes, for example,
90 | # softmax()[-1][i] can track a particular i-th class of it, whereas
91 | # softmax_sort()[:3] will give information on the top 3 classes.
92 | #
93 | # The TEMPERATURE is a proxy for the degree of uncertainty
94 | # in the relative estimation of it-scores, but can also serve
95 | # to diffuse errors, i.e. a diversification technique with
96 | # mathematical reasoning rooted in statistical mechanics,
97 | # information theory, and maximum likelihood statistics.
98 | # To test temperature variations, softmax() will be much faster.
99 |
100 |
101 | def softmax_sort( it, temp=55, n=4, drop=0.00, renorm=False ):
102 | '''Softmax results sorted, include index; option to drop and renormalize.
103 | Probabilities less than drop are ignored.
104 | Setting renorm=True will make probabilities sum to 1.
105 | >>> softmax_sort([-16, -8, 0, 4, 8, 16], temp=50, drop=0.05, renorm=False)
106 | [(0.5733, 5, 16.0), (0.2019, 4, 8.0), (0.1198, 3, 4.0), (0.0711, 2, 0.0)]
107 | >>> softmax_sort([-16, -8, 0, 4, 8, 16], temp=50, drop=0.05, renorm=True)
108 | [(0.5934, 5, 16.0), (0.209, 4, 8.0), (0.124, 3, 4.0), (0.0736, 2, 0.0)]
109 | '''
110 | arr = tools.toar( it )
111 | softmax_approx = softmax(arr, temp, n)[-1]
112 | tups = [ (p, i, float(arr[i])) for i, p in enumerate(softmax_approx)
113 | if p >= drop ]
114 | # ^so tuples are formatted as (probability, index, it-value).
115 | if renorm:
116 | subtotal = sum([p for p, i, v in tups])
117 | tups = [(round(p/subtotal, n), i, v) for p, i, v in tups]
118 | # Want softmax_sort()[0] to yield the maximum candidate:
119 | return sorted( tups, key=itemgetter(2), reverse=True )
120 |
121 |
122 | if __name__ == "__main__":
123 | system.endmodule()
124 |
--------------------------------------------------------------------------------
/tests/test_1tools.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2016-04-18
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| test_1tools : Test fecon235 yi_1tools module.
5 |
6 | Using a CSV file offline, we construct and test two dataframes:
7 | xau and foo, then paste(), and finally test lagdf().
8 |
9 |
10 | Testing: As of fecon235 v4, we favor pytest over nosetests, so e.g.
11 | $ py.test --doctest-modules
12 |
13 | REFERENCE
14 | pytest: https://pytest.org/latest/getting-started.html
15 | or PDF at http://pytest.org/latest/pytest.pdf
16 |
17 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
18 | 2016-04-18 First version tests lagdf().
19 | '''
20 |
21 | from __future__ import absolute_import, print_function
22 |
23 | import pandas as pd
24 | from fecon235.lib import yi_0sys as system
25 | from fecon235.lib import yi_fred as fred
26 | from fecon235.lib import yi_1tools as tools
27 | #
28 | # N.B. - in this tests directory without __init__.py,
29 | # we use absolute import as if outside the fecon235 package,
30 | # not relative import (cf. modules within lib).
31 |
32 | # # Show the CSV file zdata-xau-13hj-c30.csv:
33 | # # ^created in Linux environment...
34 | #
35 | # T,XAU
36 | # 2013-03-08,1581.75
37 | # 2013-03-11,1579.0
38 | # 2013-03-12,1594.0
39 | # 2013-03-13,1589.25
40 | # 2013-03-14,1586.0
41 | # 2013-03-15,1595.5
42 | # 2013-03-18,1603.75
43 | # 2013-03-19,1610.75
44 | # 2013-03-20,1607.5
45 | # 2013-03-21,1613.75
46 | # 2013-03-22,1607.75
47 | # 2013-03-25,1599.25
48 | # 2013-03-26,1598.0
49 | # 2013-03-27,1603.0
50 | # 2013-03-28,1598.25
51 | # 2013-03-29,1598.25
52 | # 2013-04-01,1598.25
53 | # 2013-04-02,1583.5
54 | # 2013-04-03,1574.75
55 | # 2013-04-04,1546.5
56 | # 2013-04-05,1568.0
57 | # 2013-04-08,1575.0
58 | # 2013-04-09,1577.25
59 | # 2013-04-10,1575.0
60 | # 2013-04-11,1565.0
61 | # 2013-04-12,1535.5
62 | # 2013-04-15,1395.0
63 | # 2013-04-16,1380.0
64 | # 2013-04-17,1392.0
65 | # 2013-04-18,1393.75
66 |
67 |
68 | def test_yi_1tools_fecon235_Read_CSV_file():
69 | '''Read CSV file then check values.'''
70 | df = fred.readfile('zdata-xau-13hj-c30.csv')
71 | # readfile disregards XAU column name:
72 | assert [ col for col in df.columns ] == ['Y']
73 | assert df.shape == (30, 1)
74 | return df
75 |
76 |
77 | xau = test_yi_1tools_fecon235_Read_CSV_file()
78 | xau = tools.todf( xau, 'XAU' )
79 | # todf used to rename column.
80 |
81 |
82 | def test_yi_1tools_fecon235_check_xau_DataFrame():
83 | '''Check xau dataframe.'''
84 | assert [ col for col in xau.columns ] == ['XAU']
85 | assert tools.tailvalue( xau ) == 1393.75
86 |
87 |
88 | foo = tools.todf( xau + 5000.00, 'FOO' )
89 |
90 |
91 | def test_yi_1tools_fecon235_check_foo_DataFrame():
92 | '''Check foo dataframe which is just xau + 5000.00 increase.'''
93 | assert [ col for col in foo.columns ] == ['FOO']
94 | assert tools.tailvalue( foo ) == 6393.75
95 |
96 |
97 | xaufoo = tools.paste([ xau, foo ])
98 |
99 |
100 | def test_yi_1tools_fecon235_paste_function():
101 | '''Test xau and foo pasted together as xaufoo dataframe.'''
102 | assert [ col for col in xaufoo.columns ] == ['XAU', 'FOO']
103 | assert xaufoo.shape == (30, 2)
104 | assert tools.tailvalue( xaufoo, pos=0 ) == 1393.75
105 | assert tools.tailvalue( xaufoo, pos=1 ) == 6393.75
106 | assert xaufoo.index[0] == pd.Timestamp('2013-03-08 00:00:00')
107 | assert xaufoo.index[-1] == pd.Timestamp('2013-04-18 00:00:00')
108 | # Timestamp is yet another pandas type.
109 | # Default time is midnight.
110 |
111 |
112 | xaufoolag = tools.lagdf( xaufoo, lags=3 )
113 |
114 |
115 | def test_yi_1tools_fecon235_lagdf_function():
116 | '''Test xaufoolag dataframe created by lagdf on xaufoo with lags=3.'''
117 | assert [ col for col in xaufoolag.columns ] == [ 'XAU_0', 'FOO_0',
118 | 'XAU_1', 'FOO_1', 'XAU_2', 'FOO_2', 'XAU_3', 'FOO_3' ]
119 | # Number after underscore indicates lag.
120 | assert xaufoolag.shape == (27, 8)
121 | # lags will introduce NaN, which are then dropped,
122 | # so rows are reduced from 30 to 27.
123 |
124 |
125 | # Making sure LAGGED VALUES are correctly placed...
126 | assert tools.tailvalue( xaufoolag, pos=0, row=1 ) == 1393.75
127 | assert tools.tailvalue( xaufoolag, pos=1, row=1 ) == 6393.75
128 | assert tools.tailvalue( xaufoolag, pos=2, row=1 ) == 1392.0
129 | assert tools.tailvalue( xaufoolag, pos=3, row=1 ) == 6392.0
130 | assert tools.tailvalue( xaufoolag, pos=4, row=1 ) == 1380.0
131 | assert tools.tailvalue( xaufoolag, pos=5, row=1 ) == 6380.0
132 | assert tools.tailvalue( xaufoolag, pos=6, row=1 ) == 1395.0
133 | assert tools.tailvalue( xaufoolag, pos=7, row=1 ) == 6395.0
134 |
135 | assert tools.tailvalue( xaufoolag, pos=0, row=2 ) == 1392.0
136 | assert tools.tailvalue( xaufoolag, pos=1, row=2 ) == 6392.0
137 | assert tools.tailvalue( xaufoolag, pos=2, row=2 ) == 1380.0
138 | assert tools.tailvalue( xaufoolag, pos=3, row=2 ) == 6380.0
139 | assert tools.tailvalue( xaufoolag, pos=4, row=2 ) == 1395.0
140 | assert tools.tailvalue( xaufoolag, pos=5, row=2 ) == 6395.0
141 |
142 | assert tools.tailvalue( xaufoolag, pos=0, row=3 ) == 1380.0
143 | assert tools.tailvalue( xaufoolag, pos=1, row=3 ) == 6380.0
144 | assert tools.tailvalue( xaufoolag, pos=2, row=3 ) == 1395.0
145 | assert tools.tailvalue( xaufoolag, pos=3, row=3 ) == 6395.0
146 |
147 | assert tools.tailvalue( xaufoolag, pos=0, row=4 ) == 1395.0
148 | assert tools.tailvalue( xaufoolag, pos=1, row=4 ) == 6395.0
149 |
150 | assert xaufoolag.index[0] == pd.Timestamp('2013-03-13 00:00:00')
151 | assert xaufoolag.index[-1] == pd.Timestamp('2013-04-18 00:00:00')
152 | # Timestamp is yet another pandas type.
153 | # Default time is midnight.
154 |
155 |
156 |
157 | if __name__ == "__main__":
158 | system.endmodule()
159 |
--------------------------------------------------------------------------------
/lib/ys_opt_holt.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2016-12-29
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| ys_opt_holt.py : optimize Holt-Winters parameters/forecast
5 |
6 | Here we rely on a single method to find optimal alpha and beta:
7 |
8 | - minBrute() from ys_optimize module: non-convex problem: GLOBAL optimizers:
9 | If your problem does NOT admit a unique local minimum (which can be hard
10 | to test unless the function is convex), and you do not have prior
11 | information to initialize the optimization close to the solution: Brute
12 | force uses a grid search: scipy.optimize.brute() evaluates the function on
13 | a given grid of parameters and returns the parameters corresponding to the
14 | minimum value.
15 |
16 | See lib/ys_optimize.py for implementation details and references.
17 | Also tests/test_optimize.py is intended as a TUTORIAL for USAGE.
18 |
19 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
20 | 2016-12-29 Include percentage loss in alphabetaloss list.
21 | 2016-12-28 Add optimize_holtforecast() for forecasting.
22 | Noted: 2*sigma approximated by 3*(median_absolute_error)
23 | 2016-12-19 First version.
24 | '''
25 |
26 | from __future__ import absolute_import, print_function
27 |
28 | import numpy as np
29 | from fecon235.lib import yi_0sys as system
30 | from fecon235.lib import yi_1tools as tools
31 | from fecon235.lib import yi_timeseries as ts
32 | from fecon235.lib import ys_optimize as yop
33 | # Assuming that DISPLAY=0 at ys_optimize module.
34 |
35 |
36 | # GOAL: given some data and a MODEL, we want to MINIMIZE the LOSS FUNCTION
37 | # over possible values of the model's PARAMETERS.
38 | # Parameters which satisfy that goal are called BEST estimates
39 | # for the specified functional form.
40 |
41 | # Loss function should DISTINGUISH between parameters to be optimized,
42 | # and other supplemental arguments. The latter is introduced
43 | # via a tuple called funarg, frequently used to inject data.
44 | #
45 | # We forego using RMSE (root mean squared errors) in favor of a more
46 | # robust loss function since the squaring magnifies large errors.
47 |
48 |
49 | def loss_holt(params, *args):
50 | '''Loss function for holt() using np.median of absolute errors.
51 | This is much more robust than using np.sum or np.mean
52 | (and perhaps better than editing "outliers" out of data).
53 | The error array will consist of 1-step ahead prediction errors.
54 | '''
55 | # Specify arguments:
56 | alpha, beta = params
57 | data = args[0] # Primary data assumed to be single column.
58 |
59 | # Information from the Holt-Winters filter is distilled
60 | # to the holt() multi-column workout dataframe;
61 | # see tests/test_optimize.py for numerical examples.
62 | holtdf = ts.holt( data, alpha, beta )
63 | # Henceforth use numpy arrays, rather than dataframes:
64 | y = holtdf['Y'].values # Actual data, without index
65 | l = holtdf['Level'].values
66 | b = holtdf['Growth'].values
67 |
68 | error = y[1:] - (l[:-1] + b[:-1])
69 | # # Equivalent, but more expensive, version of previous line...
70 | # # Compute error array, taking one lag into account:
71 | # N = y.size
72 | # error = np.zeros(( N-1, )) # Fill level array with zeros.
73 | # for i in range(N-1):
74 | # error[i] = y[i+1] - (l[i] + b[i])
75 | # # ^Actual ^Prediction MODEL
76 | # Ignore the first ten errors due to initialization warm-up:
77 | return np.median( np.absolute(error[10:]) )
78 |
79 | # STATISTICAL NOTE: if L is the median absolute error, then by definition,
80 | # prob(-L <= error <= L) = 0.5
81 | # If we assume the errors are Gaussian centered around zero,
82 | # then L = 0.675*sigma (by table look-up), thus sigma = 1.48*L.
83 | #
84 | # Rule of thumb: Two sigma confidence can be approximated by 3*L.
85 |
86 |
87 |
88 | # NOTICE: TUPLE "funarg" is used to specify arguments to function "fun"
89 | # which are NOT the parameters to be optimized (e.g. data).
90 | # Gotcha: Remember a single-element tuple must include
91 | # that mandatory comma: ( alone, )
92 |
93 |
94 | def optimize_holt(dataframe, grids=50, alphas=(0.0, 1.0), betas=(0.0, 1.0)):
95 | '''Optimize Holt-Winters parameters alpha and beta for given data.
96 | The alphas and betas are boundaries of respective explored regions.
97 | Function interpolates "grids" from its low bound to its high bound,
98 | inclusive. Final output: [alpha, beta, losspc, median absolute loss]
99 | TIP: narrow down alphas and betas using optimize_holt iteratively.
100 | '''
101 | if grids > 49:
102 | system.warn("Optimizing Holt-Winters alphabetaloss may take TIME!")
103 | # Exploring loss at all the grids is COMPUTATIONALLY INTENSE
104 | # due to holt(), especially if the primary data is very large.
105 | # Tip: truncate dataframe to recent data.
106 | result = yop.minBrute(fun=loss_holt, funarg=( dataframe, ),
107 | boundpairs=[alphas, betas], grids=grids)
108 | # result is a numpy array, so convert to list:
109 | alpha, beta = list(result)
110 | # Compute loss, given optimal parameters:
111 | loss = loss_holt((alpha, beta), dataframe)
112 | # Compute percentage loss relative to absolute tailvalue:
113 | losspc = (float(loss) / abs(tools.tailvalue(dataframe))) * 100
114 | # Since np.round and np.around print ugly, use Python round() to
115 | # display alpha and beta. Also include losspc and median absolute loss:
116 | return [round(alpha, 4), round(beta, 4), round(losspc, 4), loss]
117 |
118 |
119 | def optimize_holtforecast( dataframe, h=12, grids=50 ):
120 | '''Forecast ahead h periods using optimized Holt-Winters parameters.'''
121 | # Note: default hw_alpha and hw_beta from yi_timeseries module
122 | # are NOT necessarily optimal given specific data.
123 | alphabetaloss = optimize_holt(dataframe, grids=grids)
124 | # alphabetaloss will be a list: [alpha, beta, losspc, loss]
125 | # computed from default boundpairs: alphas=(0.0, 1.0), betas=(0.0, 1.0)
126 | holtdf = ts.holt(dataframe, alpha=alphabetaloss[0], beta=alphabetaloss[1])
127 | # holdf is our Holt-Winters "workout" dataframe.
128 | forecasts_df = ts.holtforecast(holtdf, h)
129 | # forecasts_df is a dataframe containing h period forecasts.
130 | #
131 | # Unlike ts.holtforecast(), here we append alphabetaloss to the output
132 | # so that the model parameters and median absolute loss are available.
133 | # Forecasts alone can be obtained directly by func(...)[0]
134 | return [ forecasts_df, alphabetaloss ]
135 |
136 |
137 |
138 | if __name__ == "__main__":
139 | system.endmodule()
140 |
--------------------------------------------------------------------------------
/tests/test_gauss_mix.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2017-06-05
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| test_gauss_mix : Test fecon235 ys_gauss_mix module.
5 |
6 | Assuming testing will occur in tests directory, to locate small data file.
7 |
8 | Doctests display at lower precision since equality test becomes fuzzy across
9 | different systems if full floating point representation is used.
10 |
11 | Testing: As of fecon235 v4, we favor pytest over nosetests, so e.g.
12 | $ py.test --doctest-modules
13 |
14 | REFERENCE
15 | pytest: https://pytest.org/latest/getting-started.html
16 | or PDF at http://pytest.org/latest/pytest.pdf
17 |
18 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
19 | 2017-06-05 Add test for gm2gem().
20 | 2017-05-21 Add tests for gemrate() and gemrat(). Note the deprecation of
21 | gm2_georet() and georet_gm2() due to math proof.
22 | 2017-05-19 First version.
23 | '''
24 |
25 | from __future__ import absolute_import, print_function
26 |
27 | from fecon235.lib import yi_0sys as system
28 | from fecon235.lib import yi_fred as fred
29 | from fecon235.lib import yi_1tools as tools
30 | from fecon235.lib import ys_gauss_mix as gmix
31 | #
32 | # N.B. - In this tests directory without __init__.py,
33 | # we use absolute import as if outside the fecon235 package,
34 | # not relative import (cf. modules within lib).
35 |
36 |
37 | # # Show the CSV file zdata-xau-13hj-c30.csv:
38 | # # ^created in Linux environment...
39 | # # Warning: last four points may look like outliers, but they are actual.
40 | #
41 | # T,XAU
42 | # 2013-03-08,1581.75
43 | # 2013-03-11,1579.0
44 | # 2013-03-12,1594.0
45 | # 2013-03-13,1589.25
46 | # 2013-03-14,1586.0
47 | # 2013-03-15,1595.5
48 | # 2013-03-18,1603.75
49 | # 2013-03-19,1610.75
50 | # 2013-03-20,1607.5
51 | # 2013-03-21,1613.75
52 | # 2013-03-22,1607.75
53 | # 2013-03-25,1599.25
54 | # 2013-03-26,1598.0
55 | # 2013-03-27,1603.0
56 | # 2013-03-28,1598.25
57 | # 2013-03-29,1598.25
58 | # 2013-04-01,1598.25
59 | # 2013-04-02,1583.5
60 | # 2013-04-03,1574.75
61 | # 2013-04-04,1546.5
62 | # 2013-04-05,1568.0
63 | # 2013-04-08,1575.0
64 | # 2013-04-09,1577.25
65 | # 2013-04-10,1575.0
66 | # 2013-04-11,1565.0
67 | # 2013-04-12,1535.5
68 | # 2013-04-15,1395.0
69 | # 2013-04-16,1380.0
70 | # 2013-04-17,1392.0
71 | # 2013-04-18,1393.75
72 |
73 |
74 | def test_ys_gauss_mix_fecon235_Read_CSV_file():
75 | '''Read CSV file then check values.'''
76 | df = fred.readfile('zdata-xau-13hj-c30.csv')
77 | # readfile disregards XAU column name:
78 | assert [ col for col in df.columns ] == ['Y']
79 | assert df.shape == (30, 1)
80 | return df
81 |
82 |
83 | # Establish REFERENCE dataframe for tests below:
84 | xau = test_ys_gauss_mix_fecon235_Read_CSV_file()
85 |
86 |
87 | def test_ys_gauss_mix_fecon235_check_xau_DataFrame():
88 | '''Check xau dataframe.'''
89 | assert tools.tailvalue( xau ) == 1393.75
90 |
91 |
92 | def test_ys_gauss_mix_fecon235_check_gm2_strategy_feasible():
93 | '''Test sympy solving for "a" in Proposition 2 numerically.'''
94 | a_feasible = round( gmix.gm2_strategy(kurtosis=7, b=2), 4 )
95 | assert a_feasible == 0.7454
96 |
97 |
98 | def test_ys_gauss_mix_fecon235_check_gm2_strategy_infeasible():
99 | '''Destroy sympy solving for "a" in Proposition 2 numerically.'''
100 | try:
101 | a_feasible = round( gmix.gm2_strategy(kurtosis=13, b=2), 4 )
102 | # INTENTIONAL FAIL: That b is too low for high kurtosis.
103 | # Previous test shows feasible when kurtosis=7.
104 | # sympy actually fails correctly, and will raise its exception.
105 | except:
106 | a_feasible = "Intentionally_FATAL_since_INFEASIBLE"
107 | # Avoids reproducing the traceback to assert next:
108 | assert a_feasible == "Intentionally_FATAL_since_INFEASIBLE"
109 |
110 |
111 | def test_ys_gauss_mix_fecon235_check_gm2_vols():
112 | '''Check the annualized version of gm2_vols_fit() on test data.'''
113 | xauvols = gmix.gm2_vols( xau[:'2013-04-12'], b=2.5, yearly=256 )
114 | # ^else severe drop in price for small sample.
115 | mu, sigma1, sigma2, q, k_Pearson, sigma, b, yearly, N = xauvols
116 | assert round(mu, 4) == -30.3880 # mu annualized
117 | assert round(sigma1, 4) == 11.1829 # sigma1 annualized
118 | assert round(sigma2, 4) == 28.7713 # sigma2 annualized
119 | assert round(q, 4) == 0.0105 # q
120 | assert round(k_Pearson, 4) == 3.8787 # kurtosis
121 | assert round(sigma, 4) == 11.5085 # sigma
122 | assert b == 2.5 # b
123 | assert yearly == 256 # yearly
124 | assert N == 25 # N, sample size
125 |
126 |
127 | def test_ys_gauss_mix_fecon235_check_gemrate():
128 | '''Check on geometric mean rate gemrate() based on gemreturn_Jean().'''
129 | assert 0.05 - ((0.20*0.20)/2.) == 0.03
130 | # ^most well-known approx. for mu=0.05 and sigma=0.20
131 | assert round(gmix.gemrate(0.05, 0.20, kurtosis=3, yearly=1), 7) == 0.0301066
132 | # Jean (1983) adds just 1 bp for Gaussian over usual approximation.
133 | assert round(gmix.gemrate(0.05, 0.20, kurtosis=13, yearly=1), 7) == 0.0267223
134 | # So increase in kurtosis lowered geometric mean rate by 34 bp.
135 | assert round(gmix.gemrate(0.05, 0.20, kurtosis=3, yearly=10), 7) == 0.3453084
136 | # OK, compounding works as intended.
137 |
138 |
139 | def test_ys_gauss_mix_fecon235_check_gemrat():
140 | '''Check on geometric mean rate of data, gemrat() in percentage form.'''
141 | xaugem = gmix.gemrat( xau[:'2013-04-12'], yearly=256 )
142 | # ^else severe drop in price for small sample.
143 | grate, mu, sigma, k_Pearson, yearly, N = xaugem
144 | assert round(grate, 4) == -31.3826 # gemrat annualized
145 | assert round(mu, 4) == -30.388 # arithmetic mean annualized
146 | assert round(sigma, 4) == 11.5085 # sigma
147 | assert round(k_Pearson, 4) == 3.8787 # kurtosis
148 | assert yearly == 256 # yearly
149 | assert N == 25 # N, sample size
150 |
151 |
152 | def test_ys_gauss_mix_fecon235_check_gm2gem():
153 | '''Check on geometric mean rate of data and GM(2) model: print gm2gemrat().
154 | >>> gmix.gm2gem( xau[:'2013-04-12'], yearly=256, b=2.5, pc=True, n=4 )
155 | Geometric mean rate: -31.3826
156 | Arithmetic mean rate: -30.388
157 | sigma: 11.5085
158 | kurtosis (Pearson): 3.8787
159 | GM(2), sigma1: 11.1829
160 | GM(2), sigma2: 28.7713
161 | GM(2), q: 0.0105
162 | GM(2), b: 2.5
163 | yearly: 256
164 | N: 25
165 | '''
166 | pass
167 |
168 |
169 | if __name__ == "__main__":
170 | system.endmodule()
171 |
--------------------------------------------------------------------------------
/lib/yi_simulation.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2017-05-15
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| yi_simulation.py : simulation module for financial economics.
5 |
6 | - Essential probabilistic functions for simulations.
7 | - Simulate Gaussian mixture model GM(2).
8 | - Pre-compute pool of asset returns.
9 | - SPX 1957-2014
10 | - Normalize, but include fat tails, so that mean and volatility can be specified.
11 | - Design bootstrap to study alternate histories and small-sample statistics.
12 | - Visualize price paths.
13 |
14 |
15 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
16 | 2017-05-15 Rewrite simug_mix() in terms of prob(second Gaussian).
17 | Let N generally be the count := sample size.
18 | 2017-05-06 Add uniform randou(). Add maybe() random indicator function.
19 | Add Gaussian randog(), simug(), and simug_mix().
20 | 2015-12-20 python3 compatible: lib import fix.
21 | 2015-12-17 python3 compatible: fix with yi_0sys
22 | 2014-12-12 First version adapted from yi_fred.py
23 | '''
24 |
25 | from __future__ import absolute_import, print_function, division
26 |
27 | import numpy as np
28 |
29 | from . import yi_0sys as system
30 | from .yi_1tools import todf, georet
31 | from .yi_fred import readfile
32 | from .yi_plot import plotn
33 |
34 |
35 | # ACTUAL SPX mean and volatility from 1957-01-03 to 2014-12-11 in percent.
36 | # N = 15116
37 | MEAN_PC_SPX = 7.6306
38 | STD_PC_SPX = 15.5742
39 | N_PC_SPX = 15116
40 |
41 |
42 | def randou( upper=1.0 ):
43 | '''Single random float, not integer, from Uniform[0.0, upper).'''
44 | # Closed lower bound of zero, and argument for open upper bound.
45 | # To generate arrays, please use np.random.random().
46 | return np.random.uniform(low=0.0, high=upper, size=None)
47 |
48 |
49 | def maybe( p=0.50 ):
50 | '''Uniformly random indicator function such that prob(I=1=True) = p.'''
51 | # Nice to have for random "if" conditional branching.
52 | # Fun note: Python's boolean True is actually mapped to int 1.
53 | if randou() <= p:
54 | return 1
55 | else:
56 | return 0
57 |
58 |
59 | def randog( sigma=1.0 ):
60 | '''Single random float from Gaussian N(0.0, sigma^2).'''
61 | # Argument sigma is the standard deviation, NOT the variance!
62 | # For non-zero mean, just add it to randog later.
63 | # To generate arrays, please use simug().
64 | return np.random.normal(loc=0.0, scale=sigma, size=None)
65 |
66 |
67 | def simug( sigma, N=256 ):
68 | '''Simulate array of shape (N,) from Gaussian Normal(0.0, sigma^2).'''
69 | # Argument sigma is the standard deviation, NOT the variance!
70 | arr = sigma * np.random.randn( N )
71 | # For non-zero mean, simply add it later: mu + simug(sigma)
72 | return arr
73 |
74 |
75 | def simug_mix( sigma1, sigma2, q=0.10, N=256 ):
76 | '''Simulate array from zero-mean Gaussian mixture GM(2).'''
77 | # Mathematical details in nb/gauss-mix-kurtosis.ipynb
78 | # Pre-populate an array of shape (N,) with the FIRST Gaussian,
79 | # so that most work is done quickly and memory efficient...
80 | arr = simug( sigma1, N )
81 | # ... except for some random replacements:
82 | for i in range(N):
83 | # p = 1-q = probability drawing from FIRST Gaussian.
84 | # So with probability q, replace an element of arr
85 | # with a float from the SECOND Gaussian:
86 | if maybe( q ):
87 | arr[i] = randog( sigma2 )
88 | return arr
89 |
90 |
91 | #==============================================================================
92 |
93 |
94 | def GET_simu_spx_pcent():
95 | '''Retrieve normalized SPX daily percent change 1957-2014.'''
96 | # NORMALIZED s.t. sample mean=0 and std=1%.
97 | datafile = 'SIMU-mn0-sd1pc-d4spx_1957-2014.csv.gz'
98 | try:
99 | df = readfile( datafile, compress='gzip' )
100 | # print(' :: Import success: ' + datafile)
101 | except:
102 | df = 0
103 | print(' !! Failed to find: ' + datafile)
104 | return df
105 |
106 |
107 | def SHAPE_simu_spx_pcent( mean=MEAN_PC_SPX, std=STD_PC_SPX ):
108 | '''Generate SPX percent change (defaults are ACTUAL annualized numbers).'''
109 | # Thus the default arguments can replicate actual time series
110 | # given initial value: 1957-01-02 46.20
111 | # Volatility is std := standard deviation.
112 | spxpc = GET_simu_spx_pcent()
113 | mean_offset = mean / 256.0
114 | # Assumed days in a year.
115 | std_multiple = std / 16.0
116 | # sqrt(256)
117 | return (spxpc * std_multiple) + mean_offset
118 |
119 |
120 | def SHAPE_simu_spx_returns( mean=MEAN_PC_SPX, std=STD_PC_SPX ):
121 | '''Convert percent form to return form.'''
122 | # So e.g. 2% gain is converted to 1.02.
123 | spxpc = SHAPE_simu_spx_pcent( mean, std )
124 | return 1 + (spxpc / 100.0)
125 |
126 |
127 | def array_spx_returns( mean=MEAN_PC_SPX, std=STD_PC_SPX ):
128 | '''Array of SPX in return form.'''
129 | # Array far better than list because of numpy efficiency.
130 | # But if needed, use .tolist()
131 | spxret = SHAPE_simu_spx_returns( mean, std )
132 | # Use array to conveniently bootstrap sample later.
133 | # The date index will no longer matter.
134 | return spxret['Y'].values
135 |
136 |
137 | def bootstrap( N, yarray ):
138 | '''Randomly pick out N without replacment from yarray.'''
139 | # In repeated simulations, yarray should be pre-computed,
140 | # using array_spx_returns( ... ).
141 | # http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.choice.html
142 | return np.random.choice( yarray, size=N, replace=False )
143 |
144 |
145 | def simu_prices( N, yarray ):
146 | '''Convert bootstrap returns to price time-series into pandas DATAFRAME.'''
147 | # Initial price implicitly starts at 1.
148 | # Realize that its history is just the products of the returns.
149 | ret = bootstrap( N, yarray )
150 | # Cumulative product of array elements:
151 | # cumprod is very fast, and keeps interim results!
152 | # http://docs.scipy.org/doc/numpy/reference/generated/numpy.cumprod.html
153 | return todf( np.cumprod( ret ) )
154 |
155 |
156 | def simu_plots_spx( charts=1, N=N_PC_SPX, mean=MEAN_PC_SPX, std=STD_PC_SPX ):
157 | '''Display simulated SPX price charts of N days, given mean and std.'''
158 | yarray = array_spx_returns( mean, std )
159 | # Read in the data only once BEFORE the loop...
160 | for i in range( charts ):
161 | px = simu_prices( N, yarray )
162 | plotn( px )
163 | # Plot, then for the given prices, compute annualized:
164 | # geometric mean, arithmetic mean, volatility.
165 | print(' georet: ' + str( georet(px) ))
166 | print(' ____________________________________')
167 | print('')
168 | return
169 |
170 |
171 | if __name__ == "__main__":
172 | system.endmodule()
173 |
--------------------------------------------------------------------------------
/lib/yi_timeseries.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2017-04-11
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| yi_timeseries : essential time series functions.
5 |
6 | TESTS for this module are carried out in tests/test_timeseries.py
7 | and the doctests there show numerical examples of how some of our
8 | time-series algorithms are built.
9 |
10 | OPTIMIZATION will be carried out in separate modules to avoid clutter here.
11 | For example, to optimize Holt-Winters parameters alpha and beta,
12 | conditional on a particular dataset, for forecasting purposes (rather than
13 | smoothing), please kindly see our module lib/ys_opt_holt.py
14 |
15 | USAGE of the code for the Holt-Winters time-series model is illustrated
16 | in the Jupyter notebook at https://git.io/gdpspx which is a rendering of
17 | nb/fred-gdp-spx.ipynb in the fecon235 repository.
18 |
19 |
20 | REFERENCES:
21 |
22 | - Holt-Winters two-parameter linear growth exponential smoothing model:
23 |
24 | - Spyros Makridakis, 1978, _FORECASTING_, pp. 64-66.
25 | H-W does extremely well against ARIMA models.
26 | - Rob Hyndman, 2008, _Forecasting with Exponential Smoothing_,
27 | discusses level, growth (linear), and seasonal variants.
28 | - Sarah Gelper, 2007, _Robust Forecasting with Exponential
29 | and Holt-Winters smoothing_, useful for parameter values.
30 |
31 | - Computational tools for pandas
32 | http://pandas.pydata.org/pandas-docs/stable/computation.html
33 |
34 | N.B. - rolling_* methods, including rolling_apply, only work on one-dimensional
35 | array, thus we may work outside pandas in numpy, then bring back the results.
36 | See holt_winters_growth() vs. holt().
37 |
38 |
39 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
40 | 2016-12-20 Update introduction: tests and optimization.
41 | 2016-12-14 Fix initial guess of b[0] for holt_winters_growth(),
42 | especially critical when beta=0 e.g. in new ema().
43 | 2016-10-29 Per issue #5, ema() moved here from yi_1tools module.
44 | 2015-12-20 python3 compatible: lib import fix.
45 | 2015-12-17 python3 compatible: fix with yi_0sys
46 | 2015-02-21 Add holtgrow and holtpc functions.
47 | Fix holtlevel to truly include alpha and beta.
48 | 2014-09-21 Improve holt() by eliminating paste operation,
49 | and using todf from yi_1tools.
50 | 2014-09-15 Change HW alpha and beta defaults based on Gelper 2007.
51 | 2014-08-09 Clean-up graveyard and comments.
52 | 2014-08-08 First version covers Holt-Winters linear model.
53 | '''
54 |
55 | from __future__ import absolute_import, print_function
56 |
57 | import matplotlib.pyplot as plt
58 | import pandas as pd
59 | import numpy as np
60 |
61 | from . import yi_0sys as system
62 | from .yi_1tools import todf
63 |
64 |
65 | # Holt-Winters default parameters
66 | hw_alpha = 0.26 # Based on robust optimization in Gelper 2007,
67 | hw_beta = 0.19 # for Gaussian, fat tail, and outlier data.
68 |
69 |
70 | def holt_winters_growth( y, alpha=hw_alpha, beta=hw_beta ):
71 | '''Helper for Holt-Winters growth (linear) model using numpy arrays.'''
72 | # N.B. - SEASONAL variant of Holt-Winters is omitted.
73 | N = y.size # y should be a numpy array.
74 | # 0 < alpha and beta < 1
75 | alphac = 1 - alpha # Complements of alpha and beta
76 | betac = 1 - beta # pre-computed before the loop.
77 | # Create ndarrays filled with zeros to be updated
78 | # as the y data comes in:
79 | l = np.zeros(( N, )) # Fill level array with zeros.
80 | l[0] = y[0] # Initialize level.
81 | b = np.zeros(( N, )) # Smoothed one-step growths.
82 | # b[0] = y[1] - y[0] # Propagates errors if beta=0; fixed 2016-12-14:
83 | b[0] = 0 # Algorithmically the correct guess if beta=0.
84 | for i in range( 1, N ):
85 | l[i] = (alpha * y[i]) + (alphac * (l[i-1] + b[i-1]))
86 | ldelta = l[i] - l[i-1]
87 | # ^change in smoothed data = proxy for implicit growth.
88 | b[i] = (beta * ldelta) + (betac * b[i-1])
89 | # ^not ydelta !!
90 | return [ l, b ]
91 | # ^^^^ these are arrays.
92 |
93 |
94 | def holt( data, alpha=hw_alpha, beta=hw_beta ):
95 | '''Holt-Winters growth (linear) model outputs workout dataframe.'''
96 | # holt is an EXPENSIVE function, so retain its output for later.
97 | holtdf = todf( data ).dropna()
98 | # 'Y' ^else:
99 | # "ValueError: Length of values does not match length of index"
100 | y = holtdf.values # Convert to array.
101 | l, b = holt_winters_growth( y, alpha, beta )
102 | holtdf['Level'] = l
103 | holtdf['Growth'] = b
104 | # In effect, additional columns 'Level' and 'Growth'
105 | # for smoothed data and local slope,
106 | # along side the original index and given data:
107 | return holtdf
108 |
109 |
110 | def holtlevel( data, alpha=hw_alpha, beta=hw_beta ):
111 | '''Just smoothed Level dataframe from Holt-Winters growth model.'''
112 | # Useful to filter out seasonals, e.g. see X-11 method:
113 | # http://www.sa-elearning.eu/basic-algorithm-x-11
114 | return todf( holt( data, alpha, beta )['Level'] )
115 |
116 |
117 | def holtgrow( data, alpha=hw_alpha, beta=hw_beta ):
118 | '''Just the Growth dataframe from Holt-Winters growth model.'''
119 | # In terms of units expressed in data.
120 | return todf( holt( data, alpha, beta )['Growth'] )
121 |
122 |
123 | def holtpc( data, yearly=256, alpha=hw_alpha, beta=hw_beta ):
124 | '''Annualized percentage growth dataframe from H-W growth model.'''
125 | # yearly is the multiplier to annualize Growth.
126 | #
127 | # MOST VALUABLE H-W function <= !!
128 | # It contains the HISTORY of FORECASTED RATES!
129 | #
130 | holtdf = holt( data, alpha, beta )
131 | level = todf( holtdf['Level'] )
132 | grow = todf( holtdf['Growth'] )
133 | growan = todf( grow * yearly )
134 | return todf( 100 * ( growan / level ) )
135 |
136 |
137 | def holtforecast( holtdf, h=12 ):
138 | '''Given a dataframe from holt, forecast ahead h periods.'''
139 | # N.B. - holt forecasts by multiplying latest growth
140 | # by the number of periods ahead. Somewhat naive...
141 | # notice that the growth is based on smoothed levels.
142 | last = holtdf[-1:]
143 | y, l, b = last.values.tolist()[0]
144 | # df to array to list, but extract first element :-(
145 | forecasts = [y] + [ l + (b*(i+1)) for i in range(h) ]
146 | # ^last actual point
147 | return todf( forecasts, 'Forecast' )
148 |
149 |
150 | def plotholt( holtdf, h=12 ):
151 | '''Given a dataframe from holt, plot forecasts h periods ahead.'''
152 | # plotdf will not work since index there is assumed to be dates.
153 | holtforecast( holtdf, h ).plot( title='Holt-Winters linear forecast')
154 | return
155 |
156 |
157 | def ema( y, alpha=0.20 ):
158 | '''EXPONENTIAL MOVING AVERAGE using traditional weight arg.'''
159 | # y could be a dataframe.
160 | # ema is mathematically equivalent to holtlevel with beta=0,
161 | # thus issue #5 can be easily resolved for all pandas versions.
162 | return holtlevel( y, alpha, beta=0 )
163 |
164 |
165 | if __name__ == "__main__":
166 | system.endmodule()
167 |
168 |
169 | # ====================================== GRAVEYARD =============================
170 |
171 | # # Table 3-8 from Makridakis 1978:
172 | # makridakis_p65 = np.array( [ 143, 152, 161, 139, 137, 174, 142, 141, 162,
173 | # 180, 164, 171, 206, 193, 207, 218, 229, 225, 204, 227,
174 | # 223, 242, 239, 266 ] )
175 |
176 |
--------------------------------------------------------------------------------
/docs/others/Brown-2016-Data_worth.md:
--------------------------------------------------------------------------------
1 | ## How Much Is Your Data Worth? (edited excerpt)
2 |
3 | by Rich Brown, 24 Oct 2016, [Original version](https://www.linkedin.com/pulse/how-much-your-data-worth-rich-brown)
4 |
5 | Big banks and hedge funds make billions from trading and investing with data,
6 | and you’ve got some of the most unique alternative data on the market.
7 | So why shouldn’t you get millions of dollars per customer for your data?
8 | Well, the answer may not be that straightforward, but
9 | here are some factors that may help you in developing your case.
10 |
11 | ### Predictive Properties
12 |
13 | The million-dollar question and the one that trumps just about everything else
14 | is *whether your data set has predictive powers for the market*.
15 | You’ll need to know if it can predict something specific about financial markets
16 | (volume, volatility, returns) – and ideally back that up with proof.
17 | You will want to understand how to use it, what it does, when it works, when it doesn’t,
18 | what it is correlated with and what it contradicts, its Sharpe ratio, and
19 | its alpha (excess return over the market and other factors).
20 | As you look at the potential use cases, consider the following:
21 |
22 | - **Trading vs Investing** - As you approach the exercise, it is important to
23 | put your data in the context of how it is likely to be used.
24 | More precisely, is this a fast-moving data set likely to drive trading behavior
25 | (sub-second to days of position holding), or is it an infrequently updated data set
26 | likely to be used to understand trends over time and drive investment use cases
27 | (weeks to months or longer of position holding)?
28 |
29 | - **Historical Depth of Data** - The simple rule here is that the
30 | longer one expects the data to influence investment decisions, the longer the history you need.
31 | Trading use cases can usually be served with two years or less of data (assuming there are ample
32 | tradable events within that history to validate the data’s value, consistency, frequency, etc.).
33 | Investment data sets will often require far more than two years of history,
34 | sometimes needing to span multiple economic cycles.
35 |
36 | - **Macro vs Micro** - Similar to the trading vs investing use case,
37 | does your data likely drive macro or microstructure value/decisions.
38 | For example, is your data stock or instrument specific (microstructure in nature)
39 | or is it telling of a larger macro trend like US GDP or consumer confidence?
40 | In the former, there are many ways to both trade and invest and the
41 | timings of such plays can vary. In the latter case, there are also many
42 | ways to play this out, but because of the interconnected markets,
43 | once the primary trigger begins to move, the second-degree effects begin
44 | to take hold and your data may not be needed for those opportunities to be realized.
45 |
46 | - **Liquidity Considerations** - If you get past the predictive argument in your data,
47 | you still need to understand if there is enough liquidity in those trades –
48 | both to enter the position, and more importantly due to higher risk, to exit the position.
49 | If one can not capture the alpha your data shows that it can generate,
50 | it is of significantly less practical use.
51 |
52 | Aside from direct market correlations, consider also whether it can predict
53 | something else that quant firms are already using in their models
54 | (e.g. consumer confidence, interest rate movements).
55 | Predicting these events/signals may provide a quicker way to capitalize on the
56 | opportunities in front of you. While your original studies in this realm
57 | may be somewhat limited, it can guide your sales team on who to target,
58 | how to position the data, and how to help clients test the data.
59 |
60 | ### Exclusivity
61 |
62 | Exclusivity tends to be a double-edged sword when it comes to how broadly
63 | you offer your data set and the premium you may be able to command.
64 | Too many clients and your data’s value may likely erode.
65 | Too few clients and you may attract regulatory and media scrutiny,
66 | neither of which your prospects are keen on facing.
67 |
68 | Your data is not likely the only game in town.
69 | Direct substitutes may include data from other geolocation services,
70 | and may be numerous in nature (consider all of the apps on your phone that
71 | likely track your location). Also, while you may have the largest collection
72 | of consumer location data on the planet, a competitor offering a much
73 | smaller sampling may be able to generate the same value, and do it at a cheaper cost.
74 | This will decrease the premium you may command for your service, so
75 | consider focusing on making the product more usable as a differentiator.
76 |
77 | Indirect substitutes can also undermine the value of your data set.
78 | Satellite imagery, credit/debit card purchase history, and even *aggregations*
79 | of your raw data may provide additional proxies for same store sales and thus
80 | reasonable alternatives to your offering. So don’t be too boastful about
81 | how special your data is or too greedy when it comes to pricing.
82 |
83 | ### Complexity and Product Variants
84 |
85 | Complicated data sets that can not be easily explained or consumed
86 | without significant subject matter expertise make it harder for your prospects to test.
87 | It will consume more resources on their side, lengthen your sales cycle,
88 | and limit the scale of your reach. In the case of location-based intelligence on consumers
89 | (a potential proxy for same store sales), billions of location data points per day
90 | in latitude/longitude format might be a hard for the average hedge fund to consume,
91 | even by those with geospatial analytic expertise. Instead, consider mapping the
92 | lat/lon coordinates to store locations and offer your service as a number of
93 | visits per store per day/week/month or normalize data in several other ways.
94 |
95 | While you may be able to capture premiums from those firms that have the
96 | ability and willingness to pay for your “fire hose” of data, you may be missing
97 | quite a large segment of the market and should consider offering variants
98 | of your product which could include store chain reports, sector or sub-sector
99 | level offerings. You may also want to account for your audience and
100 | adjust your data snapshots to suit specific behaviors.
101 | For instance, futures traders are more interested in underlying commodity behavior
102 | and may look at all meat-serving restaurant sales to predict cattle futures, while an
103 | equities trader may be more interested in a specific publicly traded food service company.
104 |
105 | ### Content Consistency
106 |
107 | Does the data look the same and act the same over time?
108 | If your analytic models have evolved over time or
109 | if you’ve added significant features such as new fields and ontologies,
110 | it is important that you have a true Point-In-Time representation of the data.
111 | More aptly, at the time the data was published, what did you know about it or
112 | what would you have known about it if your models in their current state existed back then?
113 | This may often mean rescoring the entire history so the historical data looks like
114 | how it would have been scored had those advancements been available at the
115 | time of content creation. You will also need a versioning of your data set
116 | along with a robust data dictionary of how those changes occurred over time.
117 | This also helps minimize *look-ahead bias* when evaluating your data set.
118 |
119 | Do not underestimate the importance of good documentation!
120 | Well documented data includes any relevant indexing by time, descriptions on how the data has
121 | changed over time, what the metadata fields mean, how it is assembled, any adjustments
122 | that are made, frequently asked questions, back-testing results showcasing
123 | how you performed the studies, etc. This will help ensure your data is
124 | interpreted correctly, speed up the evaluation, and reduce your overall support burden.
125 |
126 |
--------------------------------------------------------------------------------
/tests/test_timeseries.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2016-12-18
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| test_timeseries : Test fecon235 yi_timeseries module.
5 |
6 | - Include test of holt() and its workout dataframe.
7 | - Include test of ema() which is a special case of Holt-Winters.
8 |
9 | Doctests display at lower precision since equality test becomes fuzzy across
10 | different systems if full floating point representation is used.
11 |
12 | Testing: As of fecon235 v4, we favor pytest over nosetests, so e.g.
13 | $ py.test --doctest-modules
14 |
15 | REFERENCE
16 | pytest: https://pytest.org/latest/getting-started.html
17 | or PDF at http://pytest.org/latest/pytest.pdf
18 |
19 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
20 | 2016-12-18 First version to verify fix #5 which revises ema():
21 | https://github.com/rsvp/fecon235/issues/5
22 | '''
23 |
24 | from __future__ import absolute_import, print_function
25 |
26 | from fecon235.lib import yi_0sys as system
27 | from fecon235.lib import yi_fred as fred
28 | from fecon235.lib import yi_1tools as tools
29 | from fecon235.lib import yi_timeseries as ts
30 | #
31 | # N.B. - In this tests directory without __init__.py,
32 | # we use absolute import as if outside the fecon235 package,
33 | # not relative import (cf. modules within lib).
34 |
35 |
36 | # # Show the CSV file zdata-xau-13hj-c30.csv:
37 | # # ^created in Linux environment...
38 | #
39 | # T,XAU
40 | # 2013-03-08,1581.75
41 | # 2013-03-11,1579.0
42 | # 2013-03-12,1594.0
43 | # 2013-03-13,1589.25
44 | # 2013-03-14,1586.0
45 | # 2013-03-15,1595.5
46 | # 2013-03-18,1603.75
47 | # 2013-03-19,1610.75
48 | # 2013-03-20,1607.5
49 | # 2013-03-21,1613.75
50 | # 2013-03-22,1607.75
51 | # 2013-03-25,1599.25
52 | # 2013-03-26,1598.0
53 | # 2013-03-27,1603.0
54 | # 2013-03-28,1598.25
55 | # 2013-03-29,1598.25
56 | # 2013-04-01,1598.25
57 | # 2013-04-02,1583.5
58 | # 2013-04-03,1574.75
59 | # 2013-04-04,1546.5
60 | # 2013-04-05,1568.0
61 | # 2013-04-08,1575.0
62 | # 2013-04-09,1577.25
63 | # 2013-04-10,1575.0
64 | # 2013-04-11,1565.0
65 | # 2013-04-12,1535.5
66 | # 2013-04-15,1395.0
67 | # 2013-04-16,1380.0
68 | # 2013-04-17,1392.0
69 | # 2013-04-18,1393.75
70 |
71 |
72 | def test_yi_timeseries_fecon235_Read_CSV_file():
73 | '''Read CSV file then check values.'''
74 | df = fred.readfile('zdata-xau-13hj-c30.csv')
75 | # readfile disregards XAU column name:
76 | assert [ col for col in df.columns ] == ['Y']
77 | assert df.shape == (30, 1)
78 | return df
79 |
80 |
81 | # Establish REFERENCE dataframe for tests below:
82 | xau = test_yi_timeseries_fecon235_Read_CSV_file()
83 |
84 |
85 | def test_yi_timeseries_fecon235_check_xau_DataFrame():
86 | '''Check xau dataframe.'''
87 | assert tools.tailvalue( xau ) == 1393.75
88 |
89 |
90 | def test_yi_timeseries_fecon235_check_workout_dataframe_from_holt():
91 | '''Get workout dataframe from holt(), then display in low precision.
92 | Arguments alpha and beta are explicitly set to default values.
93 | >>> xauholt = ts.holt( xau, alpha=0.26, beta=0.19 )
94 | >>> xauholt2 = xauholt.round(2)
95 | >>> xauholt2
96 | Y Level Growth
97 | T
98 | 2013-03-08 1581.75 1581.75 0.00
99 | 2013-03-11 1579.00 1581.04 -0.14
100 | 2013-03-12 1594.00 1584.31 0.51
101 | 2013-03-13 1589.25 1585.97 0.73
102 | 2013-03-14 1586.00 1586.52 0.70
103 | 2013-03-15 1595.50 1589.37 1.11
104 | 2013-03-18 1603.75 1593.93 1.76
105 | 2013-03-19 1610.75 1599.60 2.51
106 | 2013-03-20 1607.50 1603.51 2.77
107 | 2013-03-21 1613.75 1608.22 3.14
108 | 2013-03-22 1607.75 1610.42 2.96
109 | 2013-03-25 1599.25 1609.71 2.26
110 | 2013-03-26 1598.00 1608.34 1.57
111 | 2013-03-27 1603.00 1608.12 1.23
112 | 2013-03-28 1598.25 1606.46 0.68
113 | 2013-03-29 1598.25 1604.83 0.24
114 | 2013-04-01 1598.25 1603.30 -0.09
115 | 2013-04-02 1583.50 1598.08 -1.07
116 | 2013-04-03 1574.75 1591.23 -2.17
117 | 2013-04-04 1546.50 1578.00 -4.27
118 | 2013-04-05 1568.00 1572.24 -4.55
119 | 2013-04-08 1575.00 1569.59 -4.19
120 | 2013-04-09 1577.25 1568.48 -3.61
121 | 2013-04-10 1575.00 1567.51 -3.11
122 | 2013-04-11 1565.00 1564.56 -3.08
123 | 2013-04-12 1535.50 1554.73 -4.36
124 | 2013-04-15 1395.00 1509.97 -12.03
125 | 2013-04-16 1380.00 1467.27 -17.86
126 | 2013-04-17 1392.00 1434.49 -20.70
127 | 2013-04-18 1393.75 1408.58 -21.69
128 | '''
129 | # This output can be used to verify the initialization
130 | # and subsequent recursive computation by hand (with precision).
131 | pass
132 |
133 |
134 |
135 | def test_yi_timeseries_fecon235_check_workout_beta0_from_holt():
136 | '''Get workout dataframe from holt(), then display in low precision.
137 | Argument beta=0 esp. for ema() check, where its alpha defaults to 0.20.
138 | >>> xauholt_b0 = ts.holt( xau, alpha=0.20, beta=0 )
139 | >>> xauholt2_b0 = xauholt_b0.round(2)
140 | >>> xauholt2_b0
141 | Y Level Growth
142 | T
143 | 2013-03-08 1581.75 1581.75 0.0
144 | 2013-03-11 1579.00 1581.20 0.0
145 | 2013-03-12 1594.00 1583.76 0.0
146 | 2013-03-13 1589.25 1584.86 0.0
147 | 2013-03-14 1586.00 1585.09 0.0
148 | 2013-03-15 1595.50 1587.17 0.0
149 | 2013-03-18 1603.75 1590.49 0.0
150 | 2013-03-19 1610.75 1594.54 0.0
151 | 2013-03-20 1607.50 1597.13 0.0
152 | 2013-03-21 1613.75 1600.45 0.0
153 | 2013-03-22 1607.75 1601.91 0.0
154 | 2013-03-25 1599.25 1601.38 0.0
155 | 2013-03-26 1598.00 1600.70 0.0
156 | 2013-03-27 1603.00 1601.16 0.0
157 | 2013-03-28 1598.25 1600.58 0.0
158 | 2013-03-29 1598.25 1600.11 0.0
159 | 2013-04-01 1598.25 1599.74 0.0
160 | 2013-04-02 1583.50 1596.49 0.0
161 | 2013-04-03 1574.75 1592.14 0.0
162 | 2013-04-04 1546.50 1583.02 0.0
163 | 2013-04-05 1568.00 1580.01 0.0
164 | 2013-04-08 1575.00 1579.01 0.0
165 | 2013-04-09 1577.25 1578.66 0.0
166 | 2013-04-10 1575.00 1577.93 0.0
167 | 2013-04-11 1565.00 1575.34 0.0
168 | 2013-04-12 1535.50 1567.37 0.0
169 | 2013-04-15 1395.00 1532.90 0.0
170 | 2013-04-16 1380.00 1502.32 0.0
171 | 2013-04-17 1392.00 1480.25 0.0
172 | 2013-04-18 1393.75 1462.95 0.0
173 | '''
174 | # This test helped to fix the bug described in #5:
175 | # https://github.com/rsvp/fecon235/issues/5
176 | # Growth column must be all zeros when beta=0.
177 | pass
178 |
179 |
180 |
181 | def test_yi_timeseries_fecon235_check_ema():
182 | '''Function ema() reads off the Level column via holtlevel(),
183 | given beta fixed at 0. Its alpha defaults to 0.20.
184 | >>> xauema = ts.ema( xau, alpha=0.20 )
185 | >>> xauema2 = xauema.round(2)
186 | >>> xauema2
187 | Y
188 | T
189 | 2013-03-08 1581.75
190 | 2013-03-11 1581.20
191 | 2013-03-12 1583.76
192 | 2013-03-13 1584.86
193 | 2013-03-14 1585.09
194 | 2013-03-15 1587.17
195 | 2013-03-18 1590.49
196 | 2013-03-19 1594.54
197 | 2013-03-20 1597.13
198 | 2013-03-21 1600.45
199 | 2013-03-22 1601.91
200 | 2013-03-25 1601.38
201 | 2013-03-26 1600.70
202 | 2013-03-27 1601.16
203 | 2013-03-28 1600.58
204 | 2013-03-29 1600.11
205 | 2013-04-01 1599.74
206 | 2013-04-02 1596.49
207 | 2013-04-03 1592.14
208 | 2013-04-04 1583.02
209 | 2013-04-05 1580.01
210 | 2013-04-08 1579.01
211 | 2013-04-09 1578.66
212 | 2013-04-10 1577.93
213 | 2013-04-11 1575.34
214 | 2013-04-12 1567.37
215 | 2013-04-15 1532.90
216 | 2013-04-16 1502.32
217 | 2013-04-17 1480.25
218 | 2013-04-18 1462.95
219 | '''
220 | # Our revised exponential moving average function was recently
221 | # written as a special case of our Holt-Winters routines,
222 | # instead of the rolling average function offered by pandas.
223 | pass
224 |
225 |
226 |
227 | if __name__ == "__main__":
228 | system.endmodule()
229 |
--------------------------------------------------------------------------------
/lib/ys_prtf_boltzmann.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2017-07-08
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| ys_prtf_boltzmann.py : Boltzmann portfolio
5 |
6 | Alternative to Markowitz portfolio. Usage demonstrated in notebook, see
7 | nb/prtf-boltzmann-1.ipynb for explicit details and derivation.
8 |
9 | The softmax() function is in lib/ys_mlearn.py since it applies more widely
10 | in machine learning.
11 |
12 | One virtually has no control over how the assets perform and interact. Only
13 | the portfolio allocation over time is in our decision set. Let's recast the
14 | underlying assets as agents which supposedly will help increase our wealth.
15 | Our task will be to select the expert(s) among the agents and to allocate
16 | portions of our current wealth.
17 |
18 | To discriminate among the agents we need their performance metrics. Since our
19 | objective is to maximize future wealth, the optimal metric is the geometric
20 | mean rate of each agent. From our research we know how to include risks,
21 | including leptokurtotic events ("fat tails"), into that single metric.
22 |
23 | There is evidence that the performance of some agents are inter-correlated.
24 | Therefore, rather than select a single expert, we choose to diversify our bets
25 | among a few agents, and call that our "portfolio." To maximize the geometric
26 | mean rate of the portfolio, the second order condition is to minimize its
27 | variance. That problem is easily solved by borrowing the weights of what is
28 | known as the "Markowitz global minimum variance portfolio."
29 |
30 | Those weights depend on the covariance structure of the agents' performance
31 | which is unfortunately not stable over time. There may be some information
32 | which can be exploited to tilt our bets favorably.
33 |
34 |
35 | prices ---> cov ---> globalw
36 | | |
37 | | trimit <-- floor
38 | | renormalize
39 | | |
40 | v v
41 | | |
42 | gemrat weights
43 | | |
44 | |________scores______|
45 | |
46 | | Boltzmann
47 | temp --> softmax --> probs --> pweights
48 |
49 |
50 | The Markowitz weights may suggest that we bet against the consistently poor
51 | performance of some agents. We shall generally regard the weights as
52 | advisory, taking what suits us and renormalizing.
53 |
54 | To summarize the information set so far, we cast the agents in a game, each
55 | with some score. When the game consists of multiple rounds, we can use tools
56 | from reinforcement learning to help us make the best sequential decisions.
57 |
58 | The softmax function is fed the scores to compute the probability of a
59 | particular agent being the expert. This function takes temperature as a
60 | diffusion parameter, that is, an optimal way to diversify our bets across
61 | possible experts. The theory here is due to Ludwig Boltzmann and his work
62 | on statistical mechanics and entropy. But the temperature setting can also be
63 | seen as a Bayesian way to express the overall uncertainty involved with
64 | estimating the various measures.
65 |
66 | Finally, those probabilities are combined with our renormalized weights to
67 | arrive at "pweights," our portfolio weights.
68 |
69 |
70 | REFERENCES
71 |
72 | - John H. Cochrane, 2005, Asset Pricing, Princeton U. Press.
73 |
74 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
75 | 2017-07-08 Add narrative.
76 | 2017-06-30 Revise boltzportfolio() as list, not print.
77 | Increase default precision when using groupgemrat().
78 | 2017-06-28 Condense functions described in Part 1 notebook.
79 | 2017-06-26 First version.
80 | '''
81 |
82 | from __future__ import absolute_import, print_function, division
83 | import numpy as np
84 | import fecon235.fecon235
85 | # ^SOLE circular import style which works for Python 2 & 3.
86 | from . import yi_0sys as system
87 | from . import yi_1tools as tools
88 | from . import yi_matrix as matrix
89 | # ^avoiding the np matrix type, stick with arrays!
90 | from . import ys_mlearn as mlearn
91 |
92 |
93 | def weighcov( cov ):
94 | '''WEIGHT array (N,1) for Global Min Var Portfolio, given cov.'''
95 | # Derived in Cochrane (2005), chp. 5, p.83.
96 | Viv = matrix.invert_pseudo( cov )
97 | # ^in case covariance matrix is ill-conditioned.
98 | one = np.ones( (cov.shape[0], 1) )
99 | top = Viv.dot(one)
100 | bot = one.T.dot(Viv).dot(one)
101 | return top / bot
102 |
103 |
104 | def weighcovdata( dataframe ):
105 | '''WEIGHT array (N,1) for Global Min Var Portfolio, given data.'''
106 | V = fecon235.fecon235.covdiflog( dataframe )
107 | return weighcov(V)
108 |
109 |
110 | def trimit( it, floor, level ):
111 | '''For an iterable, accept values > floor, else set to level.'''
112 | try:
113 | # ... in case "it" array elements are integers,
114 | # else we cannot assign floats later when enumerating:
115 | it = it.astype(np.float64)
116 | except:
117 | pass
118 | cpit = it[:]
119 | for i, x in enumerate(it):
120 | cpit[i] = x if x > floor else level
121 | # Output should be of same type as it:
122 | return cpit
123 |
124 |
125 | def renormalize( it ):
126 | '''Let elements of an iterable proportionally abs(sum) to 1.
127 | Renormalization of portfolio weights is treated differently
128 | than probabilities which cannot be negative.
129 | '''
130 | # Remember that a list is an iterable, too.
131 | arr = np.array([ float(x) for x in it ])
132 | sumit = float(np.sum(arr))
133 | try:
134 | # ... in case "it" array elements are integers,
135 | # else we cannot assign floats later when enumerating:
136 | it = it.astype(np.float64)
137 | except:
138 | pass
139 | cpit = it[:]
140 | for i, x in enumerate(it):
141 | cpit[i] = x / abs(sumit)
142 | # ^preserves signs within it.
143 | # Output should be of same type as it:
144 | return cpit
145 |
146 |
147 | def rentrim( weights, floor, level ):
148 | '''Accept weight > floor, else set to level, then renormalize.'''
149 | trimmed = trimit( weights, floor, level )
150 | return renormalize(trimmed)
151 |
152 |
153 | def gemratarr( dataframe, yearly=256 ):
154 | '''Extract geometric mean rate of each column into an array.'''
155 | gems = fecon235.fecon235.groupgemrat( dataframe, yearly, order=False, n=8 )
156 | return np.array([item[0] for item in gems]).reshape(len(gems), 1)
157 |
158 |
159 | def weighsoft( weights, rates, temp, floor, level ):
160 | '''Given weights, compute pweights as array by softmax transform.'''
161 | scores = weights * rates
162 | problist = mlearn.softmax( scores, temp )[-1]
163 | probs = np.array( problist ).reshape(len(problist), 1)
164 | # Revise weights based on softmax probabilities:
165 | pweights = probs * weights
166 | # Then appropriately adjust:
167 | return rentrim(renormalize(pweights), floor, level)
168 |
169 |
170 | def boltzweigh(dataframe, yearly=256, temp=55, floor=0.01, level=0):
171 | '''Given data, compute pweights as array by softmax transform.'''
172 | rates = gemratarr(dataframe, yearly)
173 | globalw = weighcovdata(dataframe)
174 | weights = rentrim(globalw, floor, level)
175 | pweights = weighsoft(weights, rates, temp, floor, level)
176 | return pweights
177 |
178 |
179 | def boltzportfolio(dataframe, yearly=256, temp=55, floor=0.01, level=0, n=4):
180 | '''MAIN: SUMMARY of Boltzmann portfolio, rounded to n-decimal places.
181 | Return list where computed values are Python floats, not array type, e.g.
182 | [2.7833,
183 | [[0.6423, 2.05, 'America'],
184 | [0.0, -11.17, 'Emerging'],
185 | [0.0, -10.47, 'Europe'],
186 | [0.3577, 4.1, 'Gold'],
187 | [0.0, -4.99, 'Japan']]]
188 | The portfolio's geometric mean rate is included first.
189 | Each sub-sublist will consist of weight, rate, and key.
190 | The order of keys from the dataframe is preserved.
191 | '''
192 | rates = gemratarr(dataframe, yearly)
193 | globalw = weighcovdata(dataframe)
194 | weights = rentrim(globalw, floor, level)
195 | pweights = weighsoft(weights, rates, temp, floor, level)
196 | # ---- so far should be the same as boltzweigh()
197 | scores = pweights * rates
198 | grat = round(float(np.sum(scores)), n)
199 | keys = list(dataframe.columns)
200 | # wrk, i.e. "weight, rate, key", is a list of lists:
201 | wrk = [tools.roundit([float(w), float(rates[i]), keys[i]], n, echo=False)
202 | for i, w in enumerate(pweights)]
203 | return [ grat, wrk ]
204 |
205 |
206 | if __name__ == "__main__":
207 | system.endmodule()
208 |
--------------------------------------------------------------------------------
/lib/ys_optimize.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2016-04-08
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| ys_optimize.py : Convex optimization given noisy data.
5 |
6 | We smooth some of the rough edges among the "scipy.optimize" algorithms. Our
7 | "optimize" algorithm first begins by a coarse grid search, then unconstrained
8 | Nelder-Mead simplex method, and finally the refined L-BFGS-B method which
9 | approximates a low-rank Hessian so that we can work in high (>250) dimensions.
10 |
11 | USAGE: please see tests/test_optimize.py which also serves as a TUTORIAL for
12 | optimization of loss functions, given data and model.
13 |
14 | Our selected methods feature the following and their unification:
15 |
16 | Suitability: non-convex problem: GLOBAL optimizers:
17 | If your problem does NOT admit a unique local minimum (which can be hard
18 | to test unless the function is convex), and you do not have prior
19 | information to initialize the optimization close to the solution, you may
20 | need a global optimizer. Note that Simulated Annealing has been deprecated
21 | as of scipy version 0.14.0. Brute force uses a grid search:
22 | scipy.optimize.brute() evaluates the function on a given grid of
23 | parameters and returns the parameters corresponding to the minimum value.
24 |
25 | Suitability: if data is NOISY:
26 | Nelder-Mead simplex method (scipy.optimize.fmin()) has a long history of
27 | successful use in applications, but it will usually be slower than an
28 | algorithm that uses first or second derivative information. In practice it
29 | can have poor performance in high-dimensional problems and is not robust
30 | to minimizing complicated functions. Currently there is no complete
31 | theory describing when the algorithm will successfully converge to the
32 | minimum, or how fast it will if it does.
33 |
34 | Suitability: WITH knowledge of the gradient: quasi-Newton methods:
35 | BFGS (scipy.optimize.fmin_bfgs()), or
36 | L-BFGS-B (scipy.optimize.fmin_l_bfgs_b())
37 | where the former has larger computational overhead.
38 | Knowledge here means analytical representation.
39 | BFGS abbreviates Broyden-Fletcher-Goldfarb-Shanno.
40 |
41 | Suitability: WITHOUT knowledge of the gradient:
42 | L-BFGS-B (scipy.optimize.fmin_l_bfgs_b())
43 | where gradient need not be provided analytically.
44 | Constraints are optional, so this method is excellent
45 | if you have a specific strategy.
46 |
47 | General strategy:
48 | For scipy.optimize.minimize():
49 | http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html
50 | a singe method must be selected. However, our optimize() is a sequence of
51 | methods which starts from brute to refined, in above order.
52 |
53 | References:
54 | - Mathematical optimization using scipy
55 | http://www.scipy-lectures.org/advanced/mathematical_optimization
56 |
57 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
58 | 2016-04-08 Clarify comments.
59 | 2016-04-06 Semantic change of names to avoid misunderstanding.
60 | minimize() -> optimize()
61 | For optimize(): boundpairs -> initialpairs
62 | Make funarg=() as default.
63 | 2016-04-04 First fecon235 version.
64 | '''
65 |
66 | from __future__ import absolute_import, print_function, division
67 |
68 | import numpy as np # for numerical work.
69 | import scipy.optimize as sop # optimization routines.
70 | from . import yi_0sys as system
71 |
72 |
73 | DISPLAY = 0
74 | # 0 suppresses display, 1 for stdout, 2 for logging to iterate.dat
75 | # Non-zero for debugging which could change output format of "result" below.
76 | # Some routines offer "full_output" if you want messy iterative evaluations.
77 |
78 |
79 | # NOTICE: TUPLE "funarg" is used to specify arguments to function "fun"
80 | # which are NOT the parameters to be optimized (e.g. data).
81 | # Gotcha: Remember a single-element tuple must include
82 | # that mandatory comma: ( alone, )
83 | #
84 | # Please see tests/test_optimize.py which also serves as a TUTORIAL.
85 |
86 |
87 | def minBrute( fun, boundpairs, funarg=(), grids=20 ):
88 | '''Minimization by brute force grid search.
89 | fun is our function to minimize, given parameters for optimization.
90 | boundpairs is a list of (min, max) pairs for fun parameters.
91 | funarg is a tuple of supplemental arguments for fun.
92 | grids are number of steps are taken in each direction.
93 | '''
94 | # http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.brute.html
95 | boundpairs = tuple( boundpairs )
96 | # boundpairs actually must be a tuple consisting of (min,max) tuples.
97 | if DISPLAY:
98 | print(" :: Display for minBrute() ... ")
99 | result = sop.brute( func=fun, args=funarg, ranges=boundpairs, Ns=grids,
100 | finish=None, full_output=DISPLAY )
101 | # finish default is "fmin" (Nelder-Mead),
102 | # which may not respect boundpairs !!!
103 | # https://github.com/scipy/scipy/issues/1613
104 | # Estimated minimum is returned as ndarray if DISPLAY=0,
105 | # otherwise we see all grid evaluations inside a tuple
106 | # but the minimum in ndarray format is available as result[0].
107 | return result
108 |
109 |
110 | def minNelder( fun, initial, funarg=() ):
111 | '''Nelder-Mead simplex algorithm.
112 | fun is our function to minimize, given parameters for optimization.
113 | initial parameter guesses must be an ndarray, i.e. np.array([...])
114 | funarg is a tuple of supplemental arguments for fun.
115 | '''
116 | # http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin.html
117 | # Nelder, J.A. and Mead, R. (1965), "A simplex method for function
118 | # minimization", The Computer Journal, 7, pp. 308-313
119 | if DISPLAY:
120 | print(" :: Display for minNelder() ... ")
121 | result = sop.fmin( func=fun, args=funarg, x0=initial, disp=DISPLAY)
122 | # Estimated minimum is returned as ndarray:
123 | return result
124 |
125 |
126 | def minBroyden( fun, initial, funarg=(), boundpairs=None ):
127 | '''Broyden-Fletcher-Goldfarb-Shanno L-BFGS-B algorithm with box boundaries.
128 | At each step an approximate low-rank Hessian is refined,
129 | so this should work in high (>250) dimensions.
130 | fun is our function to minimize, given parameters for optimization.
131 | initial parameter guesses must be an ndarray, i.e. np.array([...])
132 | funarg is a tuple of supplemental arguments for fun.
133 | boundpairs is an OPTIONAL list of (min, max) pairs for fun parameters,
134 | where None can be used for either min or max to indicate no bound.
135 | '''
136 | # http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_l_bfgs_b.html
137 | # Ref: C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
138 | # FORTRAN routines for large scale bound constrained optimization (1997),
139 | # ACM Transactions on Mathematical Software, 23, 4, pp. 550-560.
140 | # scipy function is actually a Python wrapper around Fortran code.
141 | if DISPLAY:
142 | print(" :: Display for minBroyden() ... ")
143 | result = sop.fmin_l_bfgs_b( func=fun, args=funarg, x0=initial, bounds=boundpairs,
144 | approx_grad=True, disp=DISPLAY )
145 | # MUST set approx_grad=True unless you want to compute the gradient analytically
146 | # and provide it to a flag called fprime.
147 | #
148 | # Sample result which is a tuple:
149 | # (array([ 88.79999999, 77.70000008]), 1.639480801226924e-13,
150 | # {'warnflag': 0, 'task': 'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH',
151 | # 'grad': array([ 4.17384459e-05, 6.34588833e-06]), 'nit': 6, 'funcalls': 30})
152 | #
153 | # So just the estimated minimum as ndarray:
154 | return result[0]
155 |
156 |
157 | def optimize( fun, initialpairs, funarg=(), grids=20 ):
158 | '''Optimize by grid search, Nelder-Mead simplex, and L-BFGS-B methods.
159 | First a broad global search, followed by coarse non-gradient method,
160 | then refined quasi-Newton method by approximate low-rank Hessian.
161 | fun is our function to minimize, given parameters for optimization.
162 | funarg is a tuple of supplemental arguments for fun.
163 | initialpairs is a list of (min, max) pairs for fun parameters.
164 | grids are number of steps are taken in each direction.
165 | However, here we are intentionally NOT CONSTRAINED by initialpairs.
166 | '''
167 | # The argument initialpairs can be just our preliminary wild guess.
168 | # minBrute will respect initialpairs as strict boundpairs using grids,
169 | # however, better and better initial point estimates are passed
170 | # along to other algorithms which will ignore any strict bounds
171 | # if the minimization can be improved.
172 | brute = minBrute(fun=fun, funarg=funarg, boundpairs=initialpairs, grids=grids)
173 | if DISPLAY:
174 | print( brute )
175 | brute = brute[0]
176 | # ^but just the ndarray part for next initial:
177 | nelder = minNelder( fun=fun, funarg=funarg, initial=brute )
178 | if DISPLAY:
179 | print( nelder )
180 | broyden = minBroyden(fun=fun, funarg=funarg, initial=nelder, boundpairs=None)
181 | # broyden should NOT set boundpairs=initialpairs because
182 | # nelder may have found something better outside initialpairs.
183 | # Thus nelder and broyden are both unconstrained results.
184 | if DISPLAY:
185 | print( broyden )
186 | # broyden is our final estimated minimum as ndarray:
187 | return broyden
188 |
189 |
190 | if __name__ == "__main__":
191 | system.endmodule()
192 |
--------------------------------------------------------------------------------
/lib/yi_0sys.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2017-05-15
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| yi_0sys.py : system and date functions including specs.
5 |
6 | Code in this module must be compatible with both Python 2 and 3.
7 | It is a bridge and a guardian between the two Pythons.
8 |
9 | For example, it is used in the preamble of fecon235 Jupyter notebooks.
10 |
11 |
12 | REFERENCES:
13 | - Compatible IDIOMS: http://python-future.org/compatible_idioms.html
14 | Nice presentation.
15 |
16 | - SIX module is exhaustive: https://pythonhosted.org/six/
17 | Single file source: https://bitbucket.org/gutworth/six
18 |
19 |
20 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
21 | 2017-05-15 Add timestamp() per strict RFC-3339 standard.
22 | Also include scipy and sympy in specs().
23 | 2017-03-16 Revise: minimumPandas = 18.0
24 | 2016-04-28 Use version('jupyter_core') instead of version('jupyter').
25 | Update to PREAMBLE-p6.16.0428
26 | 2015-12-29 For errf in gitinfo(), our dev_null instead of os.devnull
27 | Add minimumPandas variable esp. for tests.
28 | 2015-12-27 Get jupyter version among specs().
29 | Fix run command to accept errf argument.
30 | For specs(), move gitinfo try clause therein.
31 | 2015-12-23 Add run command and gitinfo functions.
32 | Update to PREAMBLE-p6.15.1223
33 | 2015-12-19 python3 compatible: absolute_import
34 | 2015-12-03 First version.
35 | '''
36 |
37 | from __future__ import absolute_import, print_function
38 | # __future__ for Python 2 and 3 compatibility; must be first in file.
39 | import sys
40 | import os
41 | import time
42 | from subprocess import check_output, STDOUT
43 | # ^for Python 2.7 and 3+
44 |
45 |
46 | minimumPython = ( 2, 7, 0 )
47 | # ... else a warning is generated in specs().
48 | minimumPandas = 18.0
49 | # ^after replacing the first dot, then float.
50 |
51 |
52 | # Open /dev/null equivalent file for Unix and Windows:
53 | dev_null = os.open(os.devnull, os.O_RDWR)
54 | # ^Cross-platform read and write.
55 | # os.devnull is just a string: "/dev/null" or "nul"
56 | # thus redirecting to os.devnull is insufficient
57 | # and that alone will cause a fileno error.
58 | # We could later close it by: os.close(dev_null). Leave open.
59 | # See gitinfo() for example of usage.
60 |
61 |
62 | def getpwd():
63 | '''Get present working directory (Linux command is pwd).
64 | Works cross-platform, giving absolute path.
65 | '''
66 | return os.getcwd()
67 |
68 |
69 | def program():
70 | '''Get name of present script; works cross-platform.'''
71 | # Note: __file__ can get only the name of this module.
72 | return os.path.basename(sys.argv[0])
73 |
74 |
75 | def warn( message, stub="WARNING:", prefix=" !. "):
76 | '''Write warning solely to standard error.'''
77 | print(prefix, stub, program(), message, sep=' ', file=sys.stderr)
78 |
79 |
80 | def die( message, errcode=1, prefix=" !! "):
81 | '''Gracefully KILL script, optionally specifying error code.'''
82 | stub = "FATAL " + str(errcode) + ":"
83 | warn( message, stub, prefix )
84 | sys.exit( errcode )
85 | # ^interpretation is system dependent;
86 | # generally non-zero is considered as some error.
87 | # Note: "os._exit" exits without calling cleanup handlers,
88 | # flushing stdio buffers, etc. Thus, it is not a standard way.
89 |
90 |
91 | def date( hour=True, utc=True, localstr=' Local' ):
92 | '''Get date, and optionally time, as ISO string representation.
93 | Boolean hour variable also gives minutes and seconds.
94 | Setting utc to False will give local time instead of UTC,
95 | then localstr can be used to indicate location.
96 | '''
97 | if hour:
98 | form = "%Y-%m-%d, %H:%M:%S"
99 | else:
100 | form = "%Y-%m-%d"
101 | if utc:
102 | form += ' UTC'
103 | tup = time.gmtime()
104 | else:
105 | form += localstr
106 | tup = time.localtime()
107 | return time.strftime( form, tup )
108 |
109 |
110 | def timestamp():
111 | '''Timestamp per strict RFC-3339 standard where timezone Z:=UTC.'''
112 | form = "%Y-%m-%dT%H:%M:%SZ"
113 | tup = time.gmtime()
114 | return time.strftime( form, tup )
115 |
116 |
117 | def pythontup():
118 | '''Represent invoked Python version as an integer 3-tuple.'''
119 | # Using sys.version is overly verbose.
120 | # Here we get something like (2, 7, 10) which can be compared.
121 | return sys.version_info[:3]
122 |
123 |
124 | def versionstr( module="IPython" ):
125 | '''Represent version as a string, or None if not installed.'''
126 | # Unfortunately must treat Python vs its modules differently...
127 | if module=="Python" or module=="python":
128 | ver = pythontup()
129 | return str(ver[0]) + '.' + str(ver[1]) + '.' + str(ver[2])
130 | else:
131 | try:
132 | exec( "import " + module )
133 | exec( "vermod = " + module + ".__version__" )
134 | return vermod
135 | except:
136 | return None
137 |
138 |
139 | def versiontup( module="IPython" ):
140 | '''Parse version string into some integer 3-tuple.'''
141 | s = versionstr(module)
142 | try:
143 | v = [ int(k) for k in s.split('.') ]
144 | return tuple(v)
145 | except:
146 | # e.g. if not installed or not convertible to integers...
147 | if s == None:
148 | return ( 0, 0, 0)
149 | else:
150 | return (-9, -9, -9)
151 |
152 |
153 | def version( module="IPython" ):
154 | '''Pretty print Python or module version info.'''
155 | print(" :: ", module, versionstr(module))
156 |
157 |
158 | def utf( immigrant, xnl=True ):
159 | '''Convert to utf-8, and possibly delete new line character.
160 | xnl means "delete new line"
161 | '''
162 | if xnl:
163 | # Decodes to utf-8, plus deletes new line.
164 | return immigrant.decode('utf-8').strip('\n')
165 | else:
166 | # Decode for compliance to utf-8:
167 | return immigrant.decode('utf-8')
168 |
169 |
170 | def run( command, xnl=True, errf=None ):
171 | '''RUN **quote and space insensitive** SYSTEM-LEVEL command.
172 | OTHERWISE: use check_output directly and list component
173 | parts of the command, e.g.
174 | check_output(["git", "describe", "--abbrev=0"])
175 | then generally use our utf() since check_output
176 | usually does not return utf-8, so be prepared to
177 | receive bytes and also new line.
178 | '''
179 | # N.B. - errf=None means the usual error transmittal.
180 | # Cross-platform /dev/stdout is STDOUT
181 | # Cross-platform /dev/null is our dev_null above.
182 | # https://docs.python.org/2/library/subprocess.html
183 | return utf( check_output(command.split(), stderr=errf), xnl )
184 |
185 |
186 |
187 | def gitinfo():
188 | '''From git, get repo name, current branch and annotated tag.'''
189 | # Suppressing error messages by os.devnull seems cross-platform,
190 | # but it is just a string, so use our open file dev_null instead.
191 | try:
192 | repopath = run("git rev-parse --show-toplevel", errf=dev_null)
193 | # ^returns the dir path plus working repo name.
194 | repo = os.path.basename(repopath)
195 | tag = run("git describe --abbrev=0", errf=dev_null)
196 | # ^no --tags because we want annotated tags.
197 | bra = run("git symbolic-ref --short HEAD", errf=dev_null)
198 | # ^returns the current working branch name.
199 | return [repo, tag, bra]
200 | except CalledProcessError:
201 | # Probably outside git boundaries...
202 | return ['git_repo_None', 'tag_None', 'branch_None']
203 |
204 |
205 | def specs():
206 | '''Show ecosystem specifications, including execution timestamp.'''
207 | # APIs are subject to change, so versions are critical for debugging:
208 | version("Python")
209 | if pythontup() < minimumPython:
210 | warn("may need newer Python version.")
211 | version("IPython")
212 | version("jupyter_core")
213 | version("notebook")
214 | # ^worked for Jupyter notebook 4.0.6
215 | version("matplotlib")
216 | version("numpy")
217 | # ^dependency for pandas
218 | version("scipy")
219 | version("sympy")
220 | version("pandas")
221 | version("pandas_datareader")
222 | # ^but package is "pandas-datareader" esp. for financial quotes.
223 | repo, tag, bra = gitinfo()
224 | print(" :: Repository:", repo, tag, bra )
225 | print(" :: Timestamp:", timestamp() )
226 |
227 |
228 | if pythontup() < (3, 0, 0):
229 | '''ROSETTA STONE FUNCTIONS approximately bridging Python 2 and 3.
230 | e.g. answer = get_input("Favorite animal? ")
231 | print(answer)
232 | '''
233 | get_input = raw_input
234 | else:
235 | get_input = input
236 | # ^beware of untrustworthy arguments!
237 |
238 |
239 | def endmodule():
240 | '''Procedure after __main__ conditional in modules.'''
241 | die("is a MODULE for import, not for direct execution.", 113)
242 |
243 |
244 | if __name__ == "__main__":
245 | endmodule()
246 |
247 |
248 |
249 | '''
250 | _______________ Appendix 1: PREAMBLE for Jupyter NOTEBOOKS
251 | Input cell for settings and system details:
252 |
253 | CHANGE LOG
254 | 2016-04-28 Remove old LaTeX warnings:
255 | # Beware, for MATH display, use %%latex, NOT the following:
256 | # from IPython.display import Math
257 | # from IPython.display import Latex
258 |
259 |
260 | # PREAMBLE-p6.16.0428 :: Settings and system details
261 | from __future__ import absolute_import, print_function
262 | system.specs()
263 | pwd = system.getpwd() # present working directory as variable.
264 | print(" :: $pwd:", pwd)
265 | # If a module is modified, automatically reload it:
266 | %load_ext autoreload
267 | %autoreload 2
268 | # Use 0 to disable this feature.
269 |
270 | # Notebook DISPLAY options:
271 | # Represent pandas DataFrames as text; not HTML representation:
272 | import pandas as pd
273 | pd.set_option( 'display.notebook_repr_html', False )
274 | from IPython.display import HTML # useful for snippets
275 | # e.g. HTML('')
276 | from IPython.display import Image
277 | # e.g. Image(filename='holt-winters-equations.png', embed=True) # url= also works
278 | from IPython.display import YouTubeVideo
279 | # e.g. YouTubeVideo('1j_HxD4iLn8', start='43', width=600, height=400)
280 | from IPython.core import page
281 | get_ipython().set_hook('show_in_pager', page.as_hook(page.display_page), 0)
282 | # Or equivalently in config file: "InteractiveShell.display_page = True",
283 | # which will display results in secondary notebook pager frame in a cell.
284 |
285 | # Generate PLOTS inside notebook, "inline" generates static png:
286 | %matplotlib inline
287 | # "notebook" argument allows interactive zoom and resize.
288 |
289 |
290 | '''
291 |
292 |
--------------------------------------------------------------------------------
/lib/yi_plot.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2017-05-15
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| yi_plot.py : essential plot functions.
5 |
6 | References:
7 | - http://matplotlib.org/api/pyplot_api.html
8 |
9 | - Computational tools for pandas
10 | http://pandas.pydata.org/pandas-docs/stable/computation.html
11 |
12 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
13 | 2017-05-15 Add plotqq() for quantile-quantile Q-Q probability plot.
14 | 2016-01-21 plotn(): Replace its "dataframe = dataframe.dropna()" with todf.
15 | 2016-01-20 Receive plotdf(), versions 2014-15, from yi_fred module.
16 | plotdf was actually the very first plot routine written.
17 | Replace its "dataframe = dataframe.dropna()" with todf.
18 | 2015-12-20 python3 compatible: lib import fix.
19 | 2015-12-17 python3 compatible: fix with yi_0sys
20 | 2015-11-19 Add scatter, scats, and scat for rainbow scatter plots.
21 | 2014-12-13 Add plotn where index are not dates (cf. plotdf and plotfred).
22 | 2014-08-08 Add dpi for saving image files.
23 | 2014-08-06 For boxplot, remove y_time.yymmdd_t() as fid,
24 | use title instead of fid, and add grid.
25 | 2014-08-05 Revise from yip_plot.py for boxplot to handle dataframe.
26 | '''
27 |
28 | from __future__ import absolute_import, print_function
29 |
30 | import matplotlib.pyplot as plt
31 | import matplotlib.cm as colormap
32 | import pandas as pd
33 | import scipy
34 | from . import yi_0sys as system
35 | from . import yi_1tools as tools
36 |
37 | dotsperinch = 140 # DPI resolution for plot.
38 |
39 |
40 | # The function to plot data looks routine, but in actuality specifying the
41 | # details can be such a hassle involving lots of trial and error.
42 |
43 | def plotdf( dataframe, title='tmp' ):
44 | '''Plot dataframe where its index are dates.'''
45 | dataframe = tools.todf(dataframe)
46 | # ^todf must dropna(),
47 | # otherwise index of last point plotted may be wrong.
48 | # Also helps if dataframe resulted from synthetic operations,
49 | # or if a Series was incorrectly submitted as Dataframe.
50 | fig, ax = plt.subplots()
51 | ax.xaxis_date()
52 | # ^interpret x-axis values as dates.
53 | plt.xticks( rotation='vertical' )
54 | # show x labels vertically.
55 |
56 | ax.plot( dataframe.index, dataframe, 'b-' )
57 | # ^x ^y blue line
58 | # k is black.
59 | ax.set_title( title + ' / last ' + str(dataframe.index[-1]) )
60 | # ^timestamp of last data point
61 | plt.grid(True)
62 | plt.show()
63 |
64 | # Now prepare the image FILE to save,
65 | # but ONLY if the title is not the default
66 | # (since this operation can be very slow):
67 | if title != 'tmp':
68 | title = title.replace( ' ', '_' )
69 | imgf = 'plotdf-' + title + '.png'
70 | fig.set_size_inches(11.5, 8.5)
71 | fig.savefig( imgf, dpi=dotsperinch )
72 | print(" :: Finished: " + imgf)
73 | return
74 |
75 |
76 |
77 | def plotn( dataframe, title='tmp' ):
78 | '''Plot dataframe (or list) where the index is numbered (not dates).'''
79 | # 2014-12-13 Adapted from plotdf which uses date index.
80 | # 2016-01-21 With todf pre-filter, list type will be converted.
81 | dataframe = tools.todf(dataframe)
82 | # ^todf must dropna(),
83 | # otherwise index of last point plotted may be wrong.
84 | # Also helps if dataframe resulted from synthetic operations,
85 | # or if a Series was incorrectly submitted as Dataframe.
86 | fig, ax = plt.subplots()
87 | # ax.xaxis_date()
88 | # # ^interpret x-axis values as dates.
89 | plt.xticks( rotation='vertical' )
90 | # show x labels vertically.
91 |
92 | ax.plot( dataframe.index, dataframe, 'b-' )
93 | # ^x ^y blue line
94 | # k is black.
95 | ax.set_title( title + ' / last ' + str( dataframe.index[-1]) )
96 | # ^index on last data point
97 | plt.grid(True)
98 | plt.show()
99 |
100 | # Now prepare the image FILE to save,
101 | # but ONLY if the title is not the default
102 | # (since this operation can be very slow):
103 | if title != 'tmp':
104 | title = title.replace( ' ', '_' )
105 | imgf = 'plotn-' + title + '.png'
106 | fig.set_size_inches(11.5, 8.5)
107 | fig.savefig( imgf, dpi=dotsperinch )
108 | print(" :: Finished: " + imgf)
109 | return
110 |
111 |
112 |
113 | # # Test data for boxplot:
114 | # import numpy as np
115 | #
116 | # np.random.seed(10)
117 | #
118 | # data = np.random.randn(30, 4)
119 | # labels = ['A', 'B', 'C', 'D']
120 |
121 |
122 | def boxplot( data, title='tmp', labels=[] ):
123 | '''Make boxplot from data which could be a dataframe.'''
124 | # - Use list of strings for labels,
125 | # since we presume data has no column names,
126 | # unless data is a dataframe.
127 | #
128 | # - Directly entering a dataframe as data will fail,
129 | # but dataframe.values will work, so:
130 | lastidx = 'NA'
131 | # ^for part of the plot's title...
132 | # If data is a dataframe, extract some info
133 | # before conversion to values:
134 | if isinstance( data, pd.DataFrame ):
135 | lastidx = str( data.index[-1] )
136 | colnames = list( data.columns )
137 | labels = colnames
138 | data = data.values
139 |
140 | fig, ax = plt.subplots()
141 | ax.boxplot( data )
142 | ax.set_xticklabels( labels )
143 | # HACK to show points of last row as a red dot:
144 | ax.plot( [list(data[-1])[0]] + list(data[-1]), 'or' )
145 | # ^need a dummy first point in the neighborhood
146 | # for autoscale to work properly.
147 | ax.set_title( title + ' / last ' + lastidx )
148 | plt.grid(True)
149 | plt.show()
150 |
151 | # Now prepare the image file to save:
152 | title = title.replace( ' ', '_' )
153 | imgf = 'boxplot-' + title + '.png'
154 | fig.set_size_inches(11.5, 8.5)
155 | fig.savefig( imgf, dpi=dotsperinch )
156 | print(" :: Finished: " + imgf)
157 | return
158 |
159 |
160 |
161 | def scatter( dataframe, title='tmp', col=[0, 1] ):
162 | '''Scatter plot for dataframe by zero-based column positions.'''
163 | # First in col is x-axis, second is y-axis.
164 | # Index itself is excluded from position numbering.
165 | dataframe = dataframe.dropna()
166 | # ^esp. if it resulted from synthetic operations,
167 | # else timestamp of last point plotted may be wrong.
168 | count = len( dataframe )
169 | countf = float( count )
170 | colorseq = [ i / countf for i in range(count) ]
171 | # Default colorseq uses rainbow, same as MATLAB.
172 | # So sequentially: blue, green, yellow, red.
173 | # We could change colormap by cmap below.
174 | fig, ax = plt.subplots()
175 | plt.xticks( rotation='vertical' )
176 | # Show x labels vertically.
177 | ax.scatter( dataframe.iloc[:, col[0]], dataframe.iloc[:, col[1]],
178 | c=colorseq )
179 | # First arg for x-axis, second for y-axis, then
180 | # c is for color sequence. For another type of
181 | # sequential color shading, we could append argument:
182 | # cmap=colormap.coolwarm
183 | # cmap=colormap.Spectral
184 | # cmap=colormap.viridis [perceptual uniform]
185 | # but we leave cmap arg out since viridis will be the
186 | # default soon: http://matplotlib.org/users/colormaps.html
187 | colstr = '_' + str(col[0]) + '-' + str(col[1])
188 | ax.set_title(title + colstr + ' / last ' + str(dataframe.index[-1]))
189 | # ^index on last data point
190 | plt.grid(True)
191 | plt.show()
192 |
193 | # Now prepare the image FILE to save,
194 | # but ONLY if the title is not the default
195 | # (since this operation can be very slow):
196 | if title != 'tmp':
197 | title = title.replace( ' ', '_' ) + colstr
198 | imgf = 'scat-' + title + '.png'
199 | fig.set_size_inches(11.5, 8.5)
200 | fig.savefig( imgf, dpi=dotsperinch )
201 | print(" :: Finished: " + imgf)
202 | return
203 |
204 |
205 |
206 | def scats( dataframe, title='tmp' ):
207 | '''All pair-wise scatter plots for dataframe.'''
208 | # Renaming title will result in file output.
209 | ncol = dataframe.shape[1]
210 | # ^number of columns
211 | pairs = [ [i, j] for i in range(ncol) for j in range(ncol) if i < j ]
212 | npairs = (ncol**2 - ncol) / 2
213 | # e.g. ncol==5 implies npairs==10
214 | # ncol==10 implies npairs==45
215 | # ncol==20 implies npairs==190
216 | print(" :: Number of pair-wise plots: " + str(npairs))
217 | for pair in pairs:
218 | print(" :: Show column pair: " + str(pair))
219 | scatter( dataframe, title, pair )
220 | print("----------------------")
221 | return
222 |
223 |
224 |
225 | def scat( dfx, dfy, title='tmp', col=[0, 1] ):
226 | '''Scatter plot between two pasted dataframes.'''
227 | # Renaming title will result in file output.
228 | scatter( tools.paste([ dfx, dfy ]), title, col )
229 | return
230 |
231 |
232 |
233 | # Note: Leptokurtosis ("fat tails") is much more distinctive in the
234 | # Q-Q plots than P-P plots. Bi-modality and skewness are more distinctive
235 | # in P-P plots (discriminating in regions of high probability density)
236 | # than Q-Q plots (better for regions of low probability density).
237 | # See https://en.wikipedia.org/wiki/P–P_plot
238 | # and http://v8doc.sas.com/sashtml/qc/chap8/sect9.htm
239 |
240 |
241 | def plotqq( data, title='tmp', dist='norm', fitLS=True ):
242 | '''Display/save quantile-quantile Q-Q probability plot.
243 | Q–Q plot here is used to compare data to a theoretical distribution.
244 | Ref: https://en.wikipedia.org/wiki/Q–Q_plot
245 | '''
246 | # Assume "data" to be np.ndarray or single-column DataFrame.
247 | # Theoretical quantiles on horizontal x-axis estimated by Filliben method.
248 | # Green line in plot depicits theoretical distribution; fitLS computes R^2:
249 | # The axes are purposely transformed in order to make the specified
250 | # distribution "dist" appear as a linear green line.
251 | # 'norm' is a Gaussian distribution.
252 | # The "data" plotted along the vertical y-axis.
253 | fig, ax = plt.subplots()
254 | arr = tools.toar( data )
255 | # ^Roundabout way guarantees a pure array needed for MAIN probplot:
256 | _ = scipy.stats.probplot( arr, dist=dist, fit=fitLS, plot=plt )
257 | # Ignore numerical output, just give plot object to matplotlib.
258 | # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.probplot.html
259 | # Prefer scipy version over statsmodels.graphics.gofplots.qqplot()
260 | ax.get_lines()[0].set_marker('.')
261 | ax.get_lines()[0].set_markersize(7.0)
262 | ax.get_lines()[0].set_markerfacecolor('r')
263 | # [0] strangely refers to data points, set to red.
264 | ax.get_lines()[1].set_color('g')
265 | # [1] refers to the straight theoretic line, set to green.
266 | # But points in common should be blue, rather than brown.
267 | plt.title( title + " / plotqq " + dist + ", count=" + str(len(arr)) )
268 | plt.grid(True)
269 | plt.show()
270 | # Prepare image FILE to save, but ONLY if the title is not the default:
271 | if title != 'tmp':
272 | title = title.replace( ' ', '_' )
273 | imgf = 'plotqq-' + title + '.png'
274 | fig.set_size_inches(11.5, 8.5)
275 | fig.savefig( imgf, dpi=dotsperinch )
276 | print(" :: Finished: " + imgf)
277 | return
278 |
279 |
280 |
281 | if __name__ == "__main__":
282 | system.endmodule()
283 |
--------------------------------------------------------------------------------
/tests/test_optimize.py:
--------------------------------------------------------------------------------
1 | # Python Module for import Date : 2016-04-08
2 | # vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=python : per Python PEP 0263
3 | '''
4 | _______________| test_optimize : Test fecon235 ys_optimize module.
5 |
6 | MUST SEE lib/ys_optimize.py for implementation details and references.
7 | This test file is also intended as a TUTORIAL for USAGE,
8 | see especially the section on Robust Estimation for a quick example.
9 |
10 | Our selected methods feature the following and their unification:
11 |
12 | - minBrute(): non-convex problem: GLOBAL optimizers:
13 | If your problem does NOT admit a unique local minimum (which can be hard
14 | to test unless the function is convex), and you do not have prior
15 | information to initialize the optimization close to the solution: Brute
16 | force uses a grid search: scipy.optimize.brute() evaluates the function on
17 | a given grid of parameters and returns the parameters corresponding to the
18 | minimum value.
19 |
20 | - minNelder(): if data is NOISY:
21 | Nelder-Mead simplex method (scipy.optimize.fmin()) has a long history of
22 | successful use in applications, but it will usually be slower than an
23 | algorithm that uses first or second derivative information.
24 |
25 | - minBroyden(): WITHOUT knowledge of the gradient:
26 | L-BFGS-B (scipy.optimize.fmin_l_bfgs_b())
27 | where gradient need not be provided analytically.
28 | Constraints are optional, so GREAT for very NARROW USE.
29 | BFGS abbreviates Broyden-Fletcher-Goldfarb-Shanno.
30 |
31 | - optimize():
32 | For scipy.optimize.minimize(): a single method must be selected.
33 | However, our optimize() is a sequence of methods which starts from brute
34 | to refined in above order. This is the MAIN FUNCTION for GENERAL USE.
35 |
36 | Here we test three types of LOSS FUNCTIONS: sum of squared errors,
37 | sum of absolute errors, and median of absolute errors.
38 |
39 |
40 | Testing: As of fecon235 v4, we favor pytest over nosetests, so e.g.
41 | $ py.test --doctest-modules
42 |
43 | REFERENCE
44 | pytest: https://pytest.org/latest/getting-started.html
45 | or PDF at http://pytest.org/latest/pytest.pdf
46 |
47 | CHANGE LOG For latest version, see https://github.com/rsvp/fecon235
48 | 2016-04-08 Clarify model specification and add median absolute error.
49 | 2016-04-06 Semantic change of names to avoid misunderstanding.
50 | minimize() -> optimize()
51 | For optimize(): boundpairs -> initialpairs
52 | 2016-04-04 First version. Thanks to so12311 (2011) for his example at
53 | Stack Overflow: http://stackoverflow.com/a/8672743
54 | '''
55 |
56 | from __future__ import absolute_import, print_function
57 |
58 | import numpy as np
59 | from fecon235.lib import yi_0sys as system
60 | from fecon235.lib import ys_optimize as yop
61 | #
62 | # N.B. - in this tests directory without __init__.py,
63 | # we use absolute import as if outside the fecon235 package,
64 | # not relative import (cf. modules within lib).
65 | #
66 | # Assuming that DISPLAY=0 at ys_optimize module.
67 |
68 |
69 | # Here are the TRUE PARAMETERS, which we will pretend to discover:
70 | m_true = 88.8
71 | b_true = 77.7
72 |
73 | # Source of data:
74 | x_true = np.arange(0, 10, 0.1)
75 |
76 | # Generate DATA for a LINE with slope m_true and intercept b_true:
77 | y_true = m_true*x_true + b_true
78 |
79 |
80 | # GOAL: given some data and a MODEL, we want to MINIMIZE the LOSS FUNCTION
81 | # over possible values of the model's PARAMETERS.
82 | # Parameters which satisfy that goal are called BEST estimates
83 | # for the specified functional form.
84 |
85 | # Loss function should DISTINGUISH between parameters to be optimized,
86 | # and other supplemental arguments. The latter is introduced
87 | # via a tuple called funarg, frequently used to inject data.
88 | # (Compare to classical optimization, see Rosenbrock test below.)
89 |
90 |
91 | def sqerror(params, *args):
92 | '''LOSS FUNCTION: sum of squared errors for our model.'''
93 | # Notice how params works like a tuple:
94 | m, b = params
95 | # Assignment for non-parameter arguments (see funarg below):
96 | y = args[0]
97 | x = args[1]
98 | # Functional form of our MODEL:
99 | y_model = m*x + b
100 | # Generated ERRORS:
101 | error = y - y_model
102 | # L2 metric:
103 | # Minimizing the sum of squared errors by implication minimizes RMSE,
104 | # the so-called: "Root Mean Squared Error,"
105 | # since taking mean, then the square root preserves minimization,
106 | # but unnecessarily increases computing time:
107 | return np.sum( np.square(error) )
108 |
109 | # Loss function could also have used np.sum(np.absolute(error)), L1 metric.
110 |
111 | def aberror(params, *args):
112 | '''LOSS FUNCTION: sum of absolute errors for our model.'''
113 | m, b = params
114 | y = args[0]
115 | x = args[1]
116 | # Functional form of our MODEL:
117 | y_model = m*x + b
118 | # Generated ERRORS:
119 | error = y - y_model
120 | return np.sum( np.absolute(error) )
121 |
122 |
123 | # NOTICE: TUPLE "funarg" is used to specify arguments to function "fun"
124 | # which are NOT the parameters to be optimized (e.g. data).
125 | # Gotcha: Remember a single-element tuple must include
126 | # that mandatory comma: ( alone, )
127 |
128 | # ============================================= Test helper functions ==========
129 |
130 |
131 | def test_minBrute_ys_optimize_fecon235_Inadequate_boundpairs():
132 | '''Test minBrute using intentionally inadequate boundpairs.'''
133 | # Brute force works with range of guesses to start parameter estimation.
134 | result = yop.minBrute(fun=sqerror, funarg=(y_true, x_true),
135 | boundpairs=[(10.0,50.0),(10.0,30.0)], grids=20)
136 | # We know in advance that the result should NOT fit
137 | # our true parameters. In fact, the result
138 | # should give the upper bounds of boundpairs.
139 | assert result[0] == 50.0
140 | assert result[0] != m_true
141 | assert result[1] == 30.0
142 | assert result[1] != b_true
143 |
144 |
145 | def test_minBrute_ys_optimize_fecon235_Adequate_boundpairs():
146 | '''Test minBrute using adequate boundpairs.'''
147 | result = yop.minBrute(fun=sqerror, funarg=(y_true, x_true),
148 | boundpairs=[(70.0,90.0),(70.0,90.0)], grids=20)
149 | # We know in advance that the result should FIT,
150 | # though approximately since the grid search is coarse.
151 | # We shall accept +/- 1.0 of true values:
152 | assert abs(result[0] - m_true) < 1.0
153 | assert abs(result[1] - b_true) < 1.0
154 |
155 |
156 | def test_minNelder_ys_optimize_fecon235_wild_startparms():
157 | '''Test minNelder using wild starting parameter guesses.'''
158 | startparms = np.array([1000.0, 1000.0])
159 | result = yop.minNelder(fun=sqerror, funarg=(y_true, x_true),
160 | initial=startparms)
161 | # We shall accept +/- 0.01 of true values:
162 | assert abs(result[0] - m_true) < 0.01
163 | assert abs(result[1] - b_true) < 0.01
164 |
165 |
166 | def test_minBroyden_ys_optimize_fecon235_wild_startparms():
167 | '''Test minBroyden using wild starting parameter guesses.'''
168 | startparms = np.array([1000.0, 1000.0])
169 | result = yop.minBroyden(fun=sqerror, funarg=(y_true, x_true),
170 | initial=startparms)
171 | # We shall accept +/- 0.01 of true values:
172 | assert abs(result[0] - m_true) < 0.01
173 | assert abs(result[1] - b_true) < 0.01
174 |
175 |
176 | # ============================================= MAIN FUNCTION: optimize() ======
177 |
178 | # SUMMARY: yop.optimize() accurately integrates all of the helper functions
179 | # while being tolerant of wild guesses for initialpairs!
180 |
181 | def test_optimize_ys_optimize_fecon235_Inadequate_initialpairs():
182 | '''Test optimize() using intentionally inadequate initialpairs.
183 | However, we expect very accurate estimates since we
184 | minimize by grid search, Nelder-Mead simplex, and L-BFGS-B methods.
185 | First a broad global search, followed by coarse non-gradient method,
186 | then refined quasi-Newton method by approximate low-rank Hessian.
187 | initialpairs is a list of (min, max) pairs for fun arguments.
188 | By design, we are intentionally NOT CONSTRAINED by initialpairs.
189 | '''
190 | result = yop.optimize(fun=sqerror, funarg=(y_true, x_true),
191 | initialpairs=[(10.0,50.0),(10.0,30.0)], grids=20)
192 | # We shall accept +/- 0.0001 of true values:
193 | assert abs(result[0] - m_true) < 0.0001
194 | assert abs(result[1] - b_true) < 0.0001
195 |
196 |
197 | def test_optimize_ys_optimize_fecon235_aberror_loss_function():
198 | '''Test optimize() using sum of absolute errors loss function,
199 | instead of sum of squared errors loss function,
200 | and intentionally inadequate initialpairs.
201 | By design, we are intentionally NOT CONSTRAINED by initialpairs.
202 | '''
203 | result = yop.optimize(fun=aberror, funarg=(y_true, x_true),
204 | initialpairs=[(10.0,50.0),(10.0,30.0)], grids=20)
205 | # We shall accept +/- 0.0001 of true values:
206 | assert abs(result[0] - m_true) < 0.0001
207 | assert abs(result[1] - b_true) < 0.0001
208 |
209 |
210 | # =================================================== ROBUST Estimation ========
211 | # We revisit the fitting of the sloped line example,
212 | # but this time more generalized for templating in other applications.
213 | # Usage with other data structures becomes more apparent, e.g. DataFrame.
214 |
215 | # Let's first WRITE THE MODEL in terms of tuple p for parameters.
216 | # This conceptually separates the model specifications from the loss function.
217 | # Also it can output the fitted values for the model given optimal parameters.
218 |
219 | def model_slope( p, X ):
220 | '''Model of sloped line: given parameters p and data X.'''
221 | # Big X could be a pandas DataFrame with interesting columns.
222 | # Note that we are not limited to just a simple equation here.
223 | # This section could have also been a complicated procedure
224 | # with constraints, masks, etc.
225 | return p[0]*X + p[1]
226 |
227 | # For good practice, let the last element of p be the
228 | # estimated constant INTERCEPT. Rewriting the model is thus easier
229 | # because no shifting of locations is involved
230 | # when you want to remove that slack variable.
231 |
232 |
233 | def medaberror( p, *args ):
234 | '''Loss function: np.median of absolute errors for our model.
235 | This is much more robust than using np.sum or np.mean.
236 | Perhaps better than editing "outliers" out of data.
237 | This illustrates a LOSS FUNCTION in its SIMPLICITY.
238 | '''
239 | # y represents the independent variable, while
240 | # X represents the dependent variable(s).
241 | # Here the model is introduced via *args.
242 | y = args[0]
243 | model = args[1]
244 | X = args[2]
245 | error = y - model(p, X)
246 | return np.median( np.absolute(error) )
247 |
248 |
249 | # SUMMARY: optimize() computes the model parameters
250 | # which minimizes a given loss function. There are many types
251 | # of loss functions which can be used to estimate a model.
252 |
253 | def test_optimize_ys_optimize_fecon235_medaberror_loss_function():
254 | '''Test optimize() using median of absolute errors loss function,
255 | instead of sum of squared errors loss function,
256 | and intentionally inadequate initialpairs.
257 | By design, we are intentionally NOT CONSTRAINED by initialpairs.
258 | '''
259 | # We have y_true and x_true as ndarrays to serve as data.
260 | # Note that funarg here also include model specification.
261 | result = yop.optimize(fun=medaberror, funarg=(y_true, model_slope, x_true),
262 | initialpairs=[(10.0,50.0),(10.0,30.0)], grids=20)
263 | # We shall accept +/- 0.0001 of true values:
264 | assert abs(result[0] - m_true) < 0.0001
265 | assert abs(result[1] - b_true) < 0.0001
266 | #
267 | # What exactly was the LEAST ERROR with optimal parameters?
268 | #
269 | least_error = medaberror( result, y_true, model_slope, x_true )
270 | assert abs(least_error - 0.0) < 0.0001
271 |
272 |
273 | # REMARKS: if the loss function is squared error, then theoretically
274 | # Ordinary Least Squares method will directly provide unique unbiased
275 | # linear estimates in closed form. Now if the distribution of
276 | # the error terms is Gaussian then maximum likelihood estimates
277 | # are the same as the OLS estimates asymptotically.
278 | #
279 | # Loss functions based on absolute error does require iterative
280 | # solutions for estimates that are generally neither unique nor
281 | # available in closed form. They can be computationally expensive.
282 | # This is the case for our optimize() algorithm. But the estimates
283 | # are practically more robust, facing outliers and corrupt data.
284 | #
285 | # Reference:
286 | # https://www.quora.com/How-would-a-model-change-if-we-minimized-absolute-error-instead-of-squared-error-What-about-the-other-way-around/answer/Ben-Packer-1
287 |
288 |
289 |
290 | # =================================================== ROSENBROCK test ==========
291 | # This is a classic test in convex optimization.
292 | # Note how simply the parameters to be optimized are expressed in Python:
293 |
294 | def rosenbrock( z ):
295 | '''Famous Rosenbrock test function.'''
296 | # Optimize on two variables z[0] and z[1] by just writing them out:
297 | return 0.5*(1 - z[0])**2 + (z[1] - z[0]**2)**2
298 |
299 |
300 | def test_optimize_ys_optimize_fecon235_rosenbrock_function():
301 | '''Test optimize() using Rosenbrock function.'''
302 | # Test multivariate function without supplemental arguments, so funarg=().
303 | result = yop.optimize(fun=rosenbrock, funarg=(),
304 | initialpairs=[(-98.0,98.0),(-98.0,98.0)], grids=20)
305 | # We shall accept +/- 0.0001 of true values:
306 | assert abs(result[0] - 1.0) < 0.0001
307 | assert abs(result[1] - 1.0) < 0.0001
308 |
309 |
310 | if __name__ == "__main__":
311 | system.endmodule()
312 |
--------------------------------------------------------------------------------
/docs/fecon235-08-sympy.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# SymPy tutorial\n",
8 | "\n",
9 | "**SymPy** is a Python package for performing **symbolic mathematics**\n",
10 | "which can perform algebra, integrate and differentiate equations, \n",
11 | "find solutions to differential equations, and *numerically solve\n",
12 | "messy equations* -- along other uses.\n",
13 | "\n",
14 | "CHANGE LOG\n",
15 | " \n",
16 | " 2017-06-12 First revision since 2015-12-26.\n",
17 | "\n",
18 | "Let's import sympy and initialize its pretty print functionality \n",
19 | "which will print equations using LaTeX.\n",
20 | "Jupyter notebooks uses Mathjax to render equations\n",
21 | "so we specify that option."
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 1,
27 | "metadata": {
28 | "collapsed": false
29 | },
30 | "outputs": [],
31 | "source": [
32 | "import sympy as sym\n",
33 | "sym.init_printing(use_latex='mathjax')\n",
34 | "\n",
35 | "# If you were not in a notebook environment,\n",
36 | "# but working within a terminal, use:\n",
37 | "#\n",
38 | "# sym.init_printing(use_unicode=True)"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "## Usage\n",
46 | "\n",
47 | "These sections are illustrated with examples drawn from\n",
48 | "[rlabbe](https://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python/blob/master/Appendix-A-Installation.ipynb) from his appendix for Kalman Filters.\n",
49 | "\n",
50 | "It is important to distinguish a Python variable\n",
51 | "from a **declared symbol** in sympy."
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 2,
57 | "metadata": {
58 | "collapsed": false
59 | },
60 | "outputs": [
61 | {
62 | "data": {
63 | "text/latex": [
64 | "$$\\left [ \\phi, \\quad x\\right ]$$"
65 | ],
66 | "text/plain": [
67 | "[\\phi, x]"
68 | ]
69 | },
70 | "execution_count": 2,
71 | "metadata": {},
72 | "output_type": "execute_result"
73 | }
74 | ],
75 | "source": [
76 | "phi, x = sym.symbols('\\phi, x')\n",
77 | "\n",
78 | "# x here is a sympy symbol, and we form a list:\n",
79 | "[ phi, x ]"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "Notice how we used a LaTeX expression for the symbol `phi`.\n",
87 | "This is not necessary, but if you do the output will render nicely as LaTeX.\n",
88 | "\n",
89 | "Also notice how $x$ did not have a numerical value for the list to evaluate.\n",
90 | "\n",
91 | "So what is the **derivative** of $\\sqrt{\\phi}$ ?"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 3,
97 | "metadata": {
98 | "collapsed": false
99 | },
100 | "outputs": [
101 | {
102 | "data": {
103 | "text/latex": [
104 | "$$\\frac{1}{2 \\sqrt{\\phi}}$$"
105 | ],
106 | "text/plain": [
107 | " 1 \n",
108 | "────\n",
109 | "2⋅√φ"
110 | ]
111 | },
112 | "execution_count": 3,
113 | "metadata": {},
114 | "output_type": "execute_result"
115 | }
116 | ],
117 | "source": [
118 | "sym.diff('sqrt(phi)')"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | "We can **factor** equations:"
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 4,
131 | "metadata": {
132 | "collapsed": false
133 | },
134 | "outputs": [
135 | {
136 | "data": {
137 | "text/latex": [
138 | "$$\\left(\\phi - 1\\right) \\left(\\phi^{2} + 1\\right)$$"
139 | ],
140 | "text/plain": [
141 | " ⎛ 2 ⎞\n",
142 | "(\\phi - 1)⋅⎝\\phi + 1⎠"
143 | ]
144 | },
145 | "execution_count": 4,
146 | "metadata": {},
147 | "output_type": "execute_result"
148 | }
149 | ],
150 | "source": [
151 | "sym.factor( phi**3 - phi**2 + phi - 1 )"
152 | ]
153 | },
154 | {
155 | "cell_type": "markdown",
156 | "metadata": {},
157 | "source": [
158 | "and we can **expand** them:"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 5,
164 | "metadata": {
165 | "collapsed": false
166 | },
167 | "outputs": [
168 | {
169 | "data": {
170 | "text/latex": [
171 | "$$\\phi^{2} - 3 \\phi - 4$$"
172 | ],
173 | "text/plain": [
174 | " 2 \n",
175 | "\\phi - 3⋅\\phi - 4"
176 | ]
177 | },
178 | "execution_count": 5,
179 | "metadata": {},
180 | "output_type": "execute_result"
181 | }
182 | ],
183 | "source": [
184 | "((phi+1)*(phi-4)).expand()"
185 | ]
186 | },
187 | {
188 | "cell_type": "markdown",
189 | "metadata": {},
190 | "source": [
191 | "You can also use strings for equations that use symbols that you have not defined:"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": 6,
197 | "metadata": {
198 | "collapsed": false
199 | },
200 | "outputs": [
201 | {
202 | "data": {
203 | "text/latex": [
204 | "$$2 t + 2$$"
205 | ],
206 | "text/plain": [
207 | "2⋅t + 2"
208 | ]
209 | },
210 | "execution_count": 6,
211 | "metadata": {},
212 | "output_type": "execute_result"
213 | }
214 | ],
215 | "source": [
216 | "x = sym.expand('(t+1)*2')\n",
217 | "x"
218 | ]
219 | },
220 | {
221 | "cell_type": "markdown",
222 | "metadata": {},
223 | "source": [
224 | "## Symbolic solution\n",
225 | "\n",
226 | "Now let's use sympy to compute the **Jacobian** of a matrix. \n",
227 | "Suppose we have a function,\n",
228 | "\n",
229 | "$$h=\\sqrt{(x^2 + z^2)}$$\n",
230 | "\n",
231 | "for which we want to find the Jacobian with respect to x, y, and z."
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": 7,
237 | "metadata": {
238 | "collapsed": false
239 | },
240 | "outputs": [
241 | {
242 | "data": {
243 | "text/latex": [
244 | "$$\\left[\\begin{matrix}\\frac{x}{\\sqrt{x^{2} + z^{2}}} & 0 & \\frac{z}{\\sqrt{x^{2} + z^{2}}}\\end{matrix}\\right]$$"
245 | ],
246 | "text/plain": [
247 | "⎡ x z ⎤\n",
248 | "⎢──────────── 0 ────────────⎥\n",
249 | "⎢ _________ _________⎥\n",
250 | "⎢ ╱ 2 2 ╱ 2 2 ⎥\n",
251 | "⎣╲╱ x + z ╲╱ x + z ⎦"
252 | ]
253 | },
254 | "execution_count": 7,
255 | "metadata": {},
256 | "output_type": "execute_result"
257 | }
258 | ],
259 | "source": [
260 | "x, y, z = sym.symbols('x y z')\n",
261 | "\n",
262 | "H = sym.Matrix([sym.sqrt(x**2 + z**2)])\n",
263 | "\n",
264 | "state = sym.Matrix([x, y, z])\n",
265 | "\n",
266 | "H.jacobian(state)"
267 | ]
268 | },
269 | {
270 | "cell_type": "markdown",
271 | "metadata": {},
272 | "source": [
273 | "Now let's compute the discrete process noise matrix $\\mathbf{Q}_k$ given the continuous process noise matrix \n",
274 | "$$\\mathbf{Q} = \\Phi_s \\begin{bmatrix}0&0&0\\\\0&0&0\\\\0&0&1\\end{bmatrix}$$\n",
275 | "\n",
276 | "and the equation\n",
277 | "\n",
278 | "$$\\mathbf{Q} = \\int_0^{\\Delta t} \\Phi(t)\\mathbf{Q}\\Phi^T(t) dt$$\n",
279 | "\n",
280 | "where \n",
281 | "$$\\Phi(t) = \\begin{bmatrix}1 & \\Delta t & {\\Delta t}^2/2 \\\\ 0 & 1 & \\Delta t\\\\ 0& 0& 1\\end{bmatrix}$$"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": 8,
287 | "metadata": {
288 | "collapsed": false
289 | },
290 | "outputs": [
291 | {
292 | "data": {
293 | "text/latex": [
294 | "$$\\left[\\begin{matrix}\\frac{\\Delta{t}^{5}}{20} & \\frac{\\Delta{t}^{4}}{8} & \\frac{\\Delta{t}^{3}}{6}\\\\\\frac{\\Delta{t}^{4}}{8} & \\frac{\\Delta{t}^{3}}{3} & \\frac{\\Delta{t}^{2}}{2}\\\\\\frac{\\Delta{t}^{3}}{6} & \\frac{\\Delta{t}^{2}}{2} & \\Delta{t}\\end{matrix}\\right]$$"
295 | ],
296 | "text/plain": [
297 | "⎡ 5 4 3⎤\n",
298 | "⎢\\Delta{t} \\Delta{t} \\Delta{t} ⎥\n",
299 | "⎢────────── ────────── ──────────⎥\n",
300 | "⎢ 20 8 6 ⎥\n",
301 | "⎢ ⎥\n",
302 | "⎢ 4 3 2⎥\n",
303 | "⎢\\Delta{t} \\Delta{t} \\Delta{t} ⎥\n",
304 | "⎢────────── ────────── ──────────⎥\n",
305 | "⎢ 8 3 2 ⎥\n",
306 | "⎢ ⎥\n",
307 | "⎢ 3 2 ⎥\n",
308 | "⎢\\Delta{t} \\Delta{t} ⎥\n",
309 | "⎢────────── ────────── \\Delta{t} ⎥\n",
310 | "⎣ 6 2 ⎦"
311 | ]
312 | },
313 | "execution_count": 8,
314 | "metadata": {},
315 | "output_type": "execute_result"
316 | }
317 | ],
318 | "source": [
319 | "dt = sym.symbols('\\Delta{t}')\n",
320 | "\n",
321 | "F_k = sym.Matrix([[1, dt, dt**2/2],\n",
322 | " [0, 1, dt],\n",
323 | " [0, 0, 1]])\n",
324 | "\n",
325 | "Q = sym.Matrix([[0,0,0],\n",
326 | " [0,0,0],\n",
327 | " [0,0,1]])\n",
328 | "\n",
329 | "sym.integrate(F_k*Q*F_k.T,(dt, 0, dt))"
330 | ]
331 | },
332 | {
333 | "cell_type": "markdown",
334 | "metadata": {},
335 | "source": [
336 | "## Numerical solution\n",
337 | "\n",
338 | "You can find the *numerical value* of an equation by substituting in a value for a variable:"
339 | ]
340 | },
341 | {
342 | "cell_type": "code",
343 | "execution_count": 9,
344 | "metadata": {
345 | "collapsed": false
346 | },
347 | "outputs": [
348 | {
349 | "data": {
350 | "text/latex": [
351 | "$$8$$"
352 | ],
353 | "text/plain": [
354 | "8"
355 | ]
356 | },
357 | "execution_count": 9,
358 | "metadata": {},
359 | "output_type": "execute_result"
360 | }
361 | ],
362 | "source": [
363 | "x = sym.symbols('x')\n",
364 | "\n",
365 | "w = (x**2) - (3*x) + 4\n",
366 | "w.subs(x, 4)"
367 | ]
368 | },
369 | {
370 | "cell_type": "markdown",
371 | "metadata": {
372 | "collapsed": true
373 | },
374 | "source": [
375 | "Typically we want a numerical solution where the analytic solution is messy,\n",
376 | "that is, we want a **solver**.\n",
377 | "This is done by specifying a sympy equation, for example:"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 10,
383 | "metadata": {
384 | "collapsed": false
385 | },
386 | "outputs": [
387 | {
388 | "data": {
389 | "text/latex": [
390 | "$$\\left\\{3, 5\\right\\}$$"
391 | ],
392 | "text/plain": [
393 | "{3, 5}"
394 | ]
395 | },
396 | "execution_count": 10,
397 | "metadata": {},
398 | "output_type": "execute_result"
399 | }
400 | ],
401 | "source": [
402 | "LHS = (x**2) - (8*x) + 15\n",
403 | "RHS = 0\n",
404 | "# where both RHS and LHS can be complicated expressions.\n",
405 | "\n",
406 | "solved = sym.solveset( sym.Eq(LHS, RHS), x, domain=sym.S.Reals )\n",
407 | "# Notice how the domain solution can be specified.\n",
408 | "\n",
409 | "solved\n",
410 | "# A set of solution(s) is returned."
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": 11,
416 | "metadata": {
417 | "collapsed": false
418 | },
419 | "outputs": [
420 | {
421 | "name": "stdout",
422 | "output_type": "stream",
423 | "text": [
424 | "Solution set was not empty.\n"
425 | ]
426 | }
427 | ],
428 | "source": [
429 | "# Testing whether any solution(s) were found:\n",
430 | "if solved != sym.S.EmptySet:\n",
431 | " print(\"Solution set was not empty.\")"
432 | ]
433 | },
434 | {
435 | "cell_type": "code",
436 | "execution_count": 12,
437 | "metadata": {
438 | "collapsed": false
439 | },
440 | "outputs": [
441 | {
442 | "data": {
443 | "text/plain": [
444 | "sympy.sets.sets.FiniteSet"
445 | ]
446 | },
447 | "execution_count": 12,
448 | "metadata": {},
449 | "output_type": "execute_result"
450 | }
451 | ],
452 | "source": [
453 | "# sympy sets are not like the usual Python sets...\n",
454 | "type(solved)"
455 | ]
456 | },
457 | {
458 | "cell_type": "code",
459 | "execution_count": 13,
460 | "metadata": {
461 | "collapsed": false
462 | },
463 | "outputs": [
464 | {
465 | "name": "stdout",
466 | "output_type": "stream",
467 | "text": [
468 | "([3, 5], )\n"
469 | ]
470 | }
471 | ],
472 | "source": [
473 | "# ... but can easily to converted to a Python list:\n",
474 | "l = list(solved)\n",
475 | "print( l, type(l) )"
476 | ]
477 | },
478 | {
479 | "cell_type": "code",
480 | "execution_count": 14,
481 | "metadata": {
482 | "collapsed": false
483 | },
484 | "outputs": [
485 | {
486 | "data": {
487 | "text/latex": [
488 | "$$\\left\\{- 2 i, 2 i\\right\\}$$"
489 | ],
490 | "text/plain": [
491 | "{-2⋅ⅈ, 2⋅ⅈ}"
492 | ]
493 | },
494 | "execution_count": 14,
495 | "metadata": {},
496 | "output_type": "execute_result"
497 | }
498 | ],
499 | "source": [
500 | "LHS = (x**2)\n",
501 | "RHS = -4\n",
502 | "# where both RHS and LHS can be complicated expressions.\n",
503 | "\n",
504 | "solved = sym.solveset( sym.Eq(LHS, RHS), x )\n",
505 | "# Leaving out the domain will include the complex domain.\n",
506 | "\n",
507 | "solved"
508 | ]
509 | },
510 | {
511 | "cell_type": "markdown",
512 | "metadata": {},
513 | "source": [
514 | "## Application to financial economics\n",
515 | "\n",
516 | "We used sympy to deduce parameters of Gaussian mixtures\n",
517 | "in module `lib/ys_gauss_mix.py` and the explanatory notebook\n",
518 | "is rendered at https://git.io/gmix "
519 | ]
520 | }
521 | ],
522 | "metadata": {
523 | "kernelspec": {
524 | "display_name": "Python 2",
525 | "language": "python",
526 | "name": "python2"
527 | },
528 | "language_info": {
529 | "codemirror_mode": {
530 | "name": "ipython",
531 | "version": 2
532 | },
533 | "file_extension": ".py",
534 | "mimetype": "text/x-python",
535 | "name": "python",
536 | "nbconvert_exporter": "python",
537 | "pygments_lexer": "ipython2",
538 | "version": "2.7.13"
539 | }
540 | },
541 | "nbformat": 4,
542 | "nbformat_minor": 0
543 | }
544 |
--------------------------------------------------------------------------------
|