├── .gitignore
├── LICENSE
├── README.md
├── SECURITY.md
├── cookiecutter.json
├── hooks
    └── post_gen_project.py
├── stockvalues.csv
└── {{cookiecutter.app_name}}
    ├── environment.yml
    ├── regression.py
    └── {{cookiecutter.app_name}}.pyproj


/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | ##
  4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
  5 | 
  6 | # User-specific files
  7 | *.suo
  8 | *.user
  9 | *.userosscache
 10 | *.sln.docstates
 11 | 
 12 | # User-specific files (MonoDevelop/Xamarin Studio)
 13 | *.userprefs
 14 | 
 15 | # Build results
 16 | [Dd]ebug/
 17 | [Dd]ebugPublic/
 18 | [Rr]elease/
 19 | [Rr]eleases/
 20 | x64/
 21 | x86/
 22 | bld/
 23 | [Bb]in/
 24 | [Oo]bj/
 25 | [Ll]og/
 26 | 
 27 | # Visual Studio 2015 cache/options directory
 28 | .vs/
 29 | # Uncomment if you have tasks that create the project's static files in wwwroot
 30 | #wwwroot/
 31 | 
 32 | # MSTest test Results
 33 | [Tt]est[Rr]esult*/
 34 | [Bb]uild[Ll]og.*
 35 | 
 36 | # NUNIT
 37 | *.VisualState.xml
 38 | TestResult.xml
 39 | 
 40 | # Build Results of an ATL Project
 41 | [Dd]ebugPS/
 42 | [Rr]eleasePS/
 43 | dlldata.c
 44 | 
 45 | # .NET Core
 46 | project.lock.json
 47 | project.fragment.lock.json
 48 | artifacts/
 49 | **/Properties/launchSettings.json
 50 | 
 51 | *_i.c
 52 | *_p.c
 53 | *_i.h
 54 | *.ilk
 55 | *.meta
 56 | *.obj
 57 | *.pch
 58 | *.pdb
 59 | *.pgc
 60 | *.pgd
 61 | *.rsp
 62 | *.sbr
 63 | *.tlb
 64 | *.tli
 65 | *.tlh
 66 | *.tmp
 67 | *.tmp_proj
 68 | *.log
 69 | *.vspscc
 70 | *.vssscc
 71 | .builds
 72 | *.pidb
 73 | *.svclog
 74 | *.scc
 75 | 
 76 | # Chutzpah Test files
 77 | _Chutzpah*
 78 | 
 79 | # Visual C++ cache files
 80 | ipch/
 81 | *.aps
 82 | *.ncb
 83 | *.opendb
 84 | *.opensdf
 85 | *.sdf
 86 | *.cachefile
 87 | *.VC.db
 88 | *.VC.VC.opendb
 89 | 
 90 | # Visual Studio profiler
 91 | *.psess
 92 | *.vsp
 93 | *.vspx
 94 | *.sap
 95 | 
 96 | # TFS 2012 Local Workspace
 97 | $tf/
 98 | 
 99 | # Guidance Automation Toolkit
100 | *.gpState
101 | 
102 | # ReSharper is a .NET coding add-in
103 | _ReSharper*/
104 | *.[Rr]e[Ss]harper
105 | *.DotSettings.user
106 | 
107 | # JustCode is a .NET coding add-in
108 | .JustCode
109 | 
110 | # TeamCity is a build add-in
111 | _TeamCity*
112 | 
113 | # DotCover is a Code Coverage Tool
114 | *.dotCover
115 | 
116 | # Visual Studio code coverage results
117 | *.coverage
118 | *.coveragexml
119 | 
120 | # NCrunch
121 | _NCrunch_*
122 | .*crunch*.local.xml
123 | nCrunchTemp_*
124 | 
125 | # MightyMoose
126 | *.mm.*
127 | AutoTest.Net/
128 | 
129 | # Web workbench (sass)
130 | .sass-cache/
131 | 
132 | # Installshield output folder
133 | [Ee]xpress/
134 | 
135 | # DocProject is a documentation generator add-in
136 | DocProject/buildhelp/
137 | DocProject/Help/*.HxT
138 | DocProject/Help/*.HxC
139 | DocProject/Help/*.hhc
140 | DocProject/Help/*.hhk
141 | DocProject/Help/*.hhp
142 | DocProject/Help/Html2
143 | DocProject/Help/html
144 | 
145 | # Click-Once directory
146 | publish/
147 | 
148 | # Publish Web Output
149 | *.[Pp]ublish.xml
150 | *.azurePubxml
151 | # TODO: Comment the next line if you want to checkin your web deploy settings
152 | # but database connection strings (with potential passwords) will be unencrypted
153 | *.pubxml
154 | *.publishproj
155 | 
156 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
157 | # checkin your Azure Web App publish settings, but sensitive information contained
158 | # in these scripts will be unencrypted
159 | PublishScripts/
160 | 
161 | # NuGet Packages
162 | *.nupkg
163 | # The packages folder can be ignored because of Package Restore
164 | **/packages/*
165 | # except build/, which is used as an MSBuild target.
166 | !**/packages/build/
167 | # Uncomment if necessary however generally it will be regenerated when needed
168 | #!**/packages/repositories.config
169 | # NuGet v3's project.json files produces more ignorable files
170 | *.nuget.props
171 | *.nuget.targets
172 | 
173 | # Microsoft Azure Build Output
174 | csx/
175 | *.build.csdef
176 | 
177 | # Microsoft Azure Emulator
178 | ecf/
179 | rcf/
180 | 
181 | # Windows Store app package directories and files
182 | AppPackages/
183 | BundleArtifacts/
184 | Package.StoreAssociation.xml
185 | _pkginfo.txt
186 | 
187 | # Visual Studio cache files
188 | # files ending in .cache can be ignored
189 | *.[Cc]ache
190 | # but keep track of directories ending in .cache
191 | !*.[Cc]ache/
192 | 
193 | # Others
194 | ClientBin/
195 | ~$*
196 | *~
197 | *.dbmdl
198 | *.dbproj.schemaview
199 | *.jfm
200 | *.pfx
201 | *.publishsettings
202 | orleans.codegen.cs
203 | 
204 | # Since there are multiple workflows, uncomment next line to ignore bower_components
205 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
206 | #bower_components/
207 | 
208 | # RIA/Silverlight projects
209 | Generated_Code/
210 | 
211 | # Backup & report files from converting an old project file
212 | # to a newer Visual Studio version. Backup files are not needed,
213 | # because we have git ;-)
214 | _UpgradeReport_Files/
215 | Backup*/
216 | UpgradeLog*.XML
217 | UpgradeLog*.htm
218 | 
219 | # SQL Server files
220 | *.mdf
221 | *.ldf
222 | *.ndf
223 | 
224 | # Business Intelligence projects
225 | *.rdl.data
226 | *.bim.layout
227 | *.bim_*.settings
228 | 
229 | # Microsoft Fakes
230 | FakesAssemblies/
231 | 
232 | # GhostDoc plugin setting file
233 | *.GhostDoc.xml
234 | 
235 | # Node.js Tools for Visual Studio
236 | .ntvs_analysis.dat
237 | node_modules/
238 | 
239 | # Typescript v1 declaration files
240 | typings/
241 | 
242 | # Visual Studio 6 build log
243 | *.plg
244 | 
245 | # Visual Studio 6 workspace options file
246 | *.opt
247 | 
248 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
249 | *.vbw
250 | 
251 | # Visual Studio LightSwitch build output
252 | **/*.HTMLClient/GeneratedArtifacts
253 | **/*.DesktopClient/GeneratedArtifacts
254 | **/*.DesktopClient/ModelManifest.xml
255 | **/*.Server/GeneratedArtifacts
256 | **/*.Server/ModelManifest.xml
257 | _Pvt_Extensions
258 | 
259 | # Paket dependency manager
260 | .paket/paket.exe
261 | paket-files/
262 | 
263 | # FAKE - F# Make
264 | .fake/
265 | 
266 | # JetBrains Rider
267 | .idea/
268 | *.sln.iml
269 | 
270 | # CodeRush
271 | .cr/
272 | 
273 | # Python Tools for Visual Studio (PTVS)
274 | __pycache__/
275 | *.pyc
276 | 
277 | # Cake - Uncomment if you are using it
278 | # tools/**
279 | # !tools/packages.config
280 | 
281 | # Telerik's JustMock configuration file
282 | *.jmconfig
283 | 
284 | # BizTalk build output
285 | *.btp.cs
286 | *.btm.cs
287 | *.odx.cs
288 | *.xsd.cs
289 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation. All rights reserved.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # python-sklearn-regression-cookiecutter
 2 | 
 3 | A [Cookiecutter](http://cookiecutter.readthedocs.io/) template for a
 4 | [Python](https://www.python.org/) application that demonstrates the use of
 5 | [scikit-learn](http://scikit-learn.org/) regression learners.
 6 | 
 7 | ## Using this template
 8 | 
 9 | 1. [Install Cookiecutter](http://cookiecutter.readthedocs.io/en/latest/installation.html)
10 | 2. `cookiecutter gh:Microsoft/python-sklearn-regression-cookiecutter`
11 |    (or `cookiecutter https://github.com/Microsoft/python-sklearn-regression-cookiecutter.git`
12 |    if you prefer)
13 | 3. Fill in the Cookiecutter items (see below as to what each item
14 |    represents)
15 | 4. Install required Python packages as needed (these will vary based on which parts of the code you enable).
16 | 
17 | ### Cookiecutter items
18 | 
19 | - `app_name`: the name of the folder/project to create
20 | - `create_vs_project`: `y` to create a Visual Studio project file (.pyproj)
21 | 
22 | # Contributing
23 | 
24 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
25 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
26 | the rights to use your contribution. For details, visit https://cla.microsoft.com.
27 | 
28 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide
29 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions
30 | provided by the bot. You will only need to do this once across all repos using our CLA.
31 | 
32 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
33 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
34 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
35 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/cookiecutter.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "app_name": "regression",
 3 |   "create_vs_project": "y",
 4 |   "_visual_studio" : {
 5 |     "app_name": {
 6 |       "value_source": "ProjectName",
 7 |       "visible": false
 8 |     },
 9 |     "create_vs_project": {
10 |       "value_source": "IsNewProject",
11 |       "visible": false
12 |     }
13 |   },
14 |   "_visual_studio_post_cmds": [
15 |     {
16 |       "name": "File.OpenProject",
17 |       "args": "{{cookiecutter._output_folder_path}}\\{{cookiecutter.app_name}}.pyproj"
18 |     }
19 | ]}


--------------------------------------------------------------------------------
/hooks/post_gen_project.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | 
 4 | def delete_file(filepath):
 5 |     os.remove(os.path.join(os.path.realpath(os.path.curdir), filepath))
 6 | 
 7 | if __name__ == '__main__':
 8 |     if '{{cookiecutter.create_vs_project}}'.lower() != 'y':
 9 |         delete_file('{{cookiecutter.app_name}}.pyproj')
10 | 


--------------------------------------------------------------------------------
/{{cookiecutter.app_name}}/environment.yml:
--------------------------------------------------------------------------------
1 | name: scikit-env
2 | dependencies:
3 |   - python=3.6
4 |   - pandas
5 |   - numpy
6 |   - matplotlib
7 |   - scikit-learn
8 | 


--------------------------------------------------------------------------------
/{{cookiecutter.app_name}}/regression.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This script perfoms the basic process for applying a machine learning
  3 | algorithm to a dataset using Python libraries.
  4 | 
  5 | The four steps are:
  6 |    1. Download a dataset (using pandas)
  7 |    2. Process the numeric data (using numpy)
  8 |    3. Train and evaluate learners (using scikit-learn)
  9 |    4. Plot and compare results (using matplotlib)
 10 | 
 11 | 
 12 | The data is downloaded from URL, which is defined below. As is normal
 13 | for machine learning problems, the nature of the source data affects
 14 | the entire solution. When you change URL to refer to your own data, you
 15 | will need to review the data processing steps to ensure they remain
 16 | correct.
 17 | 
 18 | ============
 19 | Example Data
 20 | ============
 21 | The example is from https://web.archive.org/web/20180322001455/http://mldata.org/repository/data/viewslug/stockvalues/
 22 | It contains stock prices and the values of three indices for each day
 23 | over a five year period. See the linked page for more details about
 24 | this data set.
 25 | 
 26 | This script uses regression learners to predict the stock price for
 27 | the second half of this period based on the values of the indices. This
 28 | is a naive approach, and a more robust method would use each prediction
 29 | as an input for the next, and would predict relative rather than
 30 | absolute values.
 31 | '''
 32 | 
 33 | # Remember to update the script for the new data when you change this URL
 34 | URL = "https://raw.githubusercontent.com/microsoft/python-sklearn-regression-cookiecutter/master/stockvalues.csv"
 35 | 
 36 | # This is the column of the sample data to predict.
 37 | # Try changing it to other integers between 1 and 155.
 38 | TARGET_COLUMN = 32
 39 | 
 40 | # Uncomment this call when using matplotlib to generate images
 41 | # rather than displaying interactive UI.
 42 | #import matplotlib
 43 | #matplotlib.use('Agg')
 44 | 
 45 | from pandas import read_table
 46 | import numpy as np
 47 | import matplotlib.pyplot as plt
 48 | 
 49 | try:
 50 |     # [OPTIONAL] Seaborn makes plots nicer
 51 |     import seaborn
 52 | except ImportError:
 53 |     pass
 54 | 
 55 | # =====================================================================
 56 | 
 57 | def download_data():
 58 |     '''
 59 |     Downloads the data for this script into a pandas DataFrame.
 60 |     '''
 61 | 
 62 |     # If your data is in an Excel file, install 'xlrd' and use
 63 |     # pandas.read_excel instead of read_table
 64 |     #from pandas import read_excel
 65 |     #frame = read_excel(URL)
 66 | 
 67 |     # If your data is in a private Azure blob, install 'azure-storage' and use
 68 |     # BlockBlobService.get_blob_to_path() with read_table() or read_excel()
 69 |     #from azure.storage.blob import BlockBlobService
 70 |     #service = BlockBlobService(ACCOUNT_NAME, ACCOUNT_KEY)
 71 |     #service.get_blob_to_path(container_name, blob_name, 'my_data.csv')
 72 |     #frame = read_table('my_data.csv', ...
 73 | 
 74 |     frame = read_table(
 75 |         URL,
 76 |         
 77 |         # Uncomment if the file needs to be decompressed
 78 |         #compression='gzip',
 79 |         #compression='bz2',
 80 | 
 81 |         # Specify the file encoding
 82 |         # Latin-1 is common for data from US sources
 83 |         encoding='latin-1',
 84 |         #encoding='utf-8',  # UTF-8 is also common
 85 | 
 86 |         # Specify the separator in the data
 87 |         sep=',',            # comma separated values
 88 |         #sep='\t',          # tab separated values
 89 |         #sep=' ',           # space separated values
 90 | 
 91 |         # Ignore spaces after the separator
 92 |         skipinitialspace=True,
 93 | 
 94 |         # Generate row labels from each row number
 95 |         index_col=None,
 96 |         #index_col=0,       # use the first column as row labels
 97 |         #index_col=-1,      # use the last column as row labels
 98 | 
 99 |         # Generate column headers row from each column number
100 |         header=None,
101 |         #header=0,          # use the first line as headers
102 | 
103 |         # Use manual headers and skip the first row in the file
104 |         #header=0,
105 |         #names=['col1', 'col2', ...],
106 |     )
107 | 
108 |     # Return the entire frame
109 |     #return frame
110 | 
111 |     # Return a subset of the columns
112 |     return frame[[156, 157, 158, TARGET_COLUMN]]
113 | 
114 | 
115 | # =====================================================================
116 | 
117 | 
118 | def get_features_and_labels(frame):
119 |     '''
120 |     Transforms and scales the input data and returns numpy arrays for
121 |     training and testing inputs and targets.
122 |     '''
123 | 
124 |     # Replace missing values with 0.0
125 |     # or we can use scikit-learn to calculate missing values below
126 |     #frame[frame.isnull()] = 0.0
127 | 
128 |     # Convert values to floats
129 |     arr = np.array(frame, dtype=np.float)
130 | 
131 |     # Normalize the entire data set
132 |     from sklearn.preprocessing import StandardScaler, MinMaxScaler
133 |     arr = MinMaxScaler().fit_transform(arr)
134 | 
135 |     # Use the last column as the target value
136 |     X, y = arr[:, :-1], arr[:, -1]
137 |     # To use the first column instead, change the index value
138 |     #X, y = arr[:, 1:], arr[:, 0]
139 |     
140 |     # Use 50% of the data for training, but we will test against the
141 |     # entire set
142 |     from sklearn.model_selection import train_test_split
143 |     X_train, _, y_train, _ = train_test_split(X, y, test_size=0.5)
144 |     X_test, y_test = X, y
145 |     
146 |     # If values are missing we could impute them from the training data
147 |     #from sklearn.preprocessing import Imputer
148 |     #imputer = Imputer(strategy='mean')
149 |     #imputer.fit(X_train)
150 |     #X_train = imputer.transform(X_train)
151 |     #X_test = imputer.transform(X_test)
152 |     
153 |     # Normalize the attribute values to mean=0 and variance=1
154 |     from sklearn.preprocessing import StandardScaler
155 |     scaler = StandardScaler()
156 |     # To scale to a specified range, use MinMaxScaler
157 |     #from sklearn.preprocessing import MinMaxScaler
158 |     #scaler = MinMaxScaler(feature_range=(0, 1))
159 |     
160 |     # Fit the scaler based on the training data, then apply the same
161 |     # scaling to both training and test sets.
162 |     scaler.fit(X_train)
163 |     X_train = scaler.transform(X_train)
164 |     X_test = scaler.transform(X_test)
165 | 
166 |     # Return the training and test sets
167 |     return X_train, X_test, y_train, y_test
168 | 
169 | 
170 | # =====================================================================
171 | 
172 | 
173 | def evaluate_learner(X_train, X_test, y_train, y_test):
174 |     '''
175 |     Run multiple times with different algorithms to get an idea of the
176 |     relative performance of each configuration.
177 | 
178 |     Returns a sequence of tuples containing:
179 |         (title, expected values, actual values)
180 |     for each learner.
181 |     '''
182 | 
183 |     # Use a support vector machine for regression
184 |     from sklearn.svm import SVR
185 | 
186 |     # Train using a radial basis function
187 |     svr = SVR(kernel='rbf', gamma=0.1)
188 |     svr.fit(X_train, y_train)
189 |     y_pred = svr.predict(X_test)
190 |     r_2 = svr.score(X_test, y_test)
191 |     yield 'RBF Model ($R^2={:.3f}$)'.format(r_2), y_test, y_pred
192 | 
193 |     # Train using a linear kernel
194 |     svr = SVR(kernel='linear')
195 |     svr.fit(X_train, y_train)
196 |     y_pred = svr.predict(X_test)
197 |     r_2 = svr.score(X_test, y_test)
198 |     yield 'Linear Model ($R^2={:.3f}$)'.format(r_2), y_test, y_pred
199 | 
200 |     # Train using a polynomial kernel
201 |     svr = SVR(kernel='poly', degree=2)
202 |     svr.fit(X_train, y_train)
203 |     y_pred = svr.predict(X_test)
204 |     r_2 = svr.score(X_test, y_test)
205 |     yield 'Polynomial Model ($R^2={:.3f}$)'.format(r_2), y_test, y_pred
206 | 
207 | 
208 | # =====================================================================
209 | 
210 | 
211 | def plot(results):
212 |     '''
213 |     Create a plot comparing multiple learners.
214 | 
215 |     `results` is a list of tuples containing:
216 |         (title, expected values, actual values)
217 |     
218 |     All the elements in results will be plotted.
219 |     '''
220 | 
221 |     # Using subplots to display the results on the same X axis
222 |     fig, plts = plt.subplots(nrows=len(results), figsize=(8, 8))
223 |     fig.canvas.set_window_title('Predicting data from ' + URL)
224 | 
225 |     # Show each element in the plots returned from plt.subplots()
226 |     for subplot, (title, y, y_pred) in zip(plts, results):
227 |         # Configure each subplot to have no tick marks
228 |         # (these are meaningless for the sample dataset)
229 |         subplot.set_xticklabels(())
230 |         subplot.set_yticklabels(())
231 | 
232 |         # Label the vertical axis
233 |         subplot.set_ylabel('stock price')
234 | 
235 |         # Set the title for the subplot
236 |         subplot.set_title(title)
237 | 
238 |         # Plot the actual data and the prediction
239 |         subplot.plot(y, 'b', label='actual')
240 |         subplot.plot(y_pred, 'r', label='predicted')
241 |         
242 |         # Shade the area between the predicted and the actual values
243 |         subplot.fill_between(
244 |             # Generate X values [0, 1, 2, ..., len(y)-2, len(y)-1]
245 |             np.arange(0, len(y), 1),
246 |             y,
247 |             y_pred,
248 |             color='r',
249 |             alpha=0.2
250 |         )
251 | 
252 |         # Mark the extent of the training data
253 |         subplot.axvline(len(y) // 2, linestyle='--', color='0', alpha=0.2)
254 | 
255 |         # Include a legend in each subplot
256 |         subplot.legend()
257 | 
258 |     # Let matplotlib handle the subplot layout
259 |     fig.tight_layout()
260 | 
261 |     # ==================================
262 |     # Display the plot in interactive UI
263 |     plt.show()
264 | 
265 |     # To save the plot to an image file, use savefig()
266 |     #plt.savefig('plot.png')
267 | 
268 |     # Open the image file with the default image viewer
269 |     #import subprocess
270 |     #subprocess.Popen('plot.png', shell=True)
271 | 
272 |     # To save the plot to an image in memory, use BytesIO and savefig()
273 |     # This can then be written to any stream-like object, such as a
274 |     # file or HTTP response.
275 |     #from io import BytesIO
276 |     #img_stream = BytesIO()
277 |     #plt.savefig(img_stream, fmt='png')
278 |     #img_bytes = img_stream.getvalue()
279 |     #print('Image is {} bytes - {!r}'.format(len(img_bytes), img_bytes[:8] + b'...'))
280 | 
281 |     # Closing the figure allows matplotlib to release the memory used.
282 |     plt.close()
283 | 
284 | 
285 | # =====================================================================
286 | 
287 | 
288 | if __name__ == '__main__':
289 |     # Download the data set from URL
290 |     print("Downloading data from {}".format(URL))
291 |     frame = download_data()
292 | 
293 |     # Process data into feature and label arrays
294 |     print("Processing {} samples with {} attributes".format(len(frame.index), len(frame.columns)))
295 |     X_train, X_test, y_train, y_test = get_features_and_labels(frame)
296 | 
297 |     # Evaluate multiple regression learners on the data
298 |     print("Evaluating regression learners")
299 |     results = list(evaluate_learner(X_train, X_test, y_train, y_test))
300 | 
301 |     # Display the results
302 |     print("Plotting the results")
303 |     plot(results)
304 | 


--------------------------------------------------------------------------------
/{{cookiecutter.app_name}}/{{cookiecutter.app_name}}.pyproj:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="4.0">
 3 |   <PropertyGroup>
 4 |     <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
 5 |     <SchemaVersion>2.0</SchemaVersion>
 6 |     <ProjectTypeGuids>{6c0efafa-1a04-41b6-a6d7-511b90951b5b};{888888a0-9f3d-457c-b088-3a5042f75d52}</ProjectTypeGuids>
 7 |     <ProjectHome>.</ProjectHome>
 8 |     <StartupFile>regression.py</StartupFile>
 9 |     <SearchPath>
10 |     </SearchPath>
11 |     <WorkingDirectory>.</WorkingDirectory>
12 |     <OutputPath>.</OutputPath>
13 |   </PropertyGroup>
14 |   <PropertyGroup Condition=" '$(Configuration)' == 'Debug' ">
15 |     <DebugSymbols>true</DebugSymbols>
16 |     <EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
17 |   </PropertyGroup>
18 |   <PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
19 |     <DebugSymbols>true</DebugSymbols>
20 |     <EnableUnmanagedDebugging>false</EnableUnmanagedDebugging>
21 |   </PropertyGroup>
22 |   <ItemGroup>
23 |     <Compile Include="regression.py" />
24 |   </ItemGroup>
25 |   <ItemGroup>
26 |     <Content Include="environment.yml" />
27 |   </ItemGroup>
28 |   <Import Project="$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets" />
29 |   <!-- Uncomment the CoreCompile target to enable the Build command in
30 |        Visual Studio and specify your pre- and post-build commands in
31 |        the BeforeBuild and AfterBuild targets below. -->
32 |   <!--<Target Name="CoreCompile" />-->
33 |   <Target Name="BeforeBuild">
34 |   </Target>
35 |   <Target Name="AfterBuild">
36 |   </Target>
37 | </Project>
38 | 


--------------------------------------------------------------------------------