├── .gitignore ├── LICENSE ├── README.md ├── SECURITY.md ├── cookiecutter.json ├── hooks └── post_gen_project.py ├── stockvalues.csv └── {{cookiecutter.app_name}} ├── environment.yml ├── regression.py └── {{cookiecutter.app_name}}.pyproj /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.suo 8 | *.user 9 | *.userosscache 10 | *.sln.docstates 11 | 12 | # User-specific files (MonoDevelop/Xamarin Studio) 13 | *.userprefs 14 | 15 | # Build results 16 | [Dd]ebug/ 17 | [Dd]ebugPublic/ 18 | [Rr]elease/ 19 | [Rr]eleases/ 20 | x64/ 21 | x86/ 22 | bld/ 23 | [Bb]in/ 24 | [Oo]bj/ 25 | [Ll]og/ 26 | 27 | # Visual Studio 2015 cache/options directory 28 | .vs/ 29 | # Uncomment if you have tasks that create the project's static files in wwwroot 30 | #wwwroot/ 31 | 32 | # MSTest test Results 33 | [Tt]est[Rr]esult*/ 34 | [Bb]uild[Ll]og.* 35 | 36 | # NUNIT 37 | *.VisualState.xml 38 | TestResult.xml 39 | 40 | # Build Results of an ATL Project 41 | [Dd]ebugPS/ 42 | [Rr]eleasePS/ 43 | dlldata.c 44 | 45 | # .NET Core 46 | project.lock.json 47 | project.fragment.lock.json 48 | artifacts/ 49 | **/Properties/launchSettings.json 50 | 51 | *_i.c 52 | *_p.c 53 | *_i.h 54 | *.ilk 55 | *.meta 56 | *.obj 57 | *.pch 58 | *.pdb 59 | *.pgc 60 | *.pgd 61 | *.rsp 62 | *.sbr 63 | *.tlb 64 | *.tli 65 | *.tlh 66 | *.tmp 67 | *.tmp_proj 68 | *.log 69 | *.vspscc 70 | *.vssscc 71 | .builds 72 | *.pidb 73 | *.svclog 74 | *.scc 75 | 76 | # Chutzpah Test files 77 | _Chutzpah* 78 | 79 | # Visual C++ cache files 80 | ipch/ 81 | *.aps 82 | *.ncb 83 | *.opendb 84 | *.opensdf 85 | *.sdf 86 | *.cachefile 87 | *.VC.db 88 | *.VC.VC.opendb 89 | 90 | # Visual Studio profiler 91 | *.psess 92 | *.vsp 93 | *.vspx 94 | *.sap 95 | 96 | # TFS 2012 Local Workspace 97 | $tf/ 98 | 99 | # Guidance Automation Toolkit 100 | *.gpState 101 | 102 | # ReSharper is a .NET coding add-in 103 | _ReSharper*/ 104 | *.[Rr]e[Ss]harper 105 | *.DotSettings.user 106 | 107 | # JustCode is a .NET coding add-in 108 | .JustCode 109 | 110 | # TeamCity is a build add-in 111 | _TeamCity* 112 | 113 | # DotCover is a Code Coverage Tool 114 | *.dotCover 115 | 116 | # Visual Studio code coverage results 117 | *.coverage 118 | *.coveragexml 119 | 120 | # NCrunch 121 | _NCrunch_* 122 | .*crunch*.local.xml 123 | nCrunchTemp_* 124 | 125 | # MightyMoose 126 | *.mm.* 127 | AutoTest.Net/ 128 | 129 | # Web workbench (sass) 130 | .sass-cache/ 131 | 132 | # Installshield output folder 133 | [Ee]xpress/ 134 | 135 | # DocProject is a documentation generator add-in 136 | DocProject/buildhelp/ 137 | DocProject/Help/*.HxT 138 | DocProject/Help/*.HxC 139 | DocProject/Help/*.hhc 140 | DocProject/Help/*.hhk 141 | DocProject/Help/*.hhp 142 | DocProject/Help/Html2 143 | DocProject/Help/html 144 | 145 | # Click-Once directory 146 | publish/ 147 | 148 | # Publish Web Output 149 | *.[Pp]ublish.xml 150 | *.azurePubxml 151 | # TODO: Comment the next line if you want to checkin your web deploy settings 152 | # but database connection strings (with potential passwords) will be unencrypted 153 | *.pubxml 154 | *.publishproj 155 | 156 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 157 | # checkin your Azure Web App publish settings, but sensitive information contained 158 | # in these scripts will be unencrypted 159 | PublishScripts/ 160 | 161 | # NuGet Packages 162 | *.nupkg 163 | # The packages folder can be ignored because of Package Restore 164 | **/packages/* 165 | # except build/, which is used as an MSBuild target. 166 | !**/packages/build/ 167 | # Uncomment if necessary however generally it will be regenerated when needed 168 | #!**/packages/repositories.config 169 | # NuGet v3's project.json files produces more ignorable files 170 | *.nuget.props 171 | *.nuget.targets 172 | 173 | # Microsoft Azure Build Output 174 | csx/ 175 | *.build.csdef 176 | 177 | # Microsoft Azure Emulator 178 | ecf/ 179 | rcf/ 180 | 181 | # Windows Store app package directories and files 182 | AppPackages/ 183 | BundleArtifacts/ 184 | Package.StoreAssociation.xml 185 | _pkginfo.txt 186 | 187 | # Visual Studio cache files 188 | # files ending in .cache can be ignored 189 | *.[Cc]ache 190 | # but keep track of directories ending in .cache 191 | !*.[Cc]ache/ 192 | 193 | # Others 194 | ClientBin/ 195 | ~$* 196 | *~ 197 | *.dbmdl 198 | *.dbproj.schemaview 199 | *.jfm 200 | *.pfx 201 | *.publishsettings 202 | orleans.codegen.cs 203 | 204 | # Since there are multiple workflows, uncomment next line to ignore bower_components 205 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 206 | #bower_components/ 207 | 208 | # RIA/Silverlight projects 209 | Generated_Code/ 210 | 211 | # Backup & report files from converting an old project file 212 | # to a newer Visual Studio version. Backup files are not needed, 213 | # because we have git ;-) 214 | _UpgradeReport_Files/ 215 | Backup*/ 216 | UpgradeLog*.XML 217 | UpgradeLog*.htm 218 | 219 | # SQL Server files 220 | *.mdf 221 | *.ldf 222 | *.ndf 223 | 224 | # Business Intelligence projects 225 | *.rdl.data 226 | *.bim.layout 227 | *.bim_*.settings 228 | 229 | # Microsoft Fakes 230 | FakesAssemblies/ 231 | 232 | # GhostDoc plugin setting file 233 | *.GhostDoc.xml 234 | 235 | # Node.js Tools for Visual Studio 236 | .ntvs_analysis.dat 237 | node_modules/ 238 | 239 | # Typescript v1 declaration files 240 | typings/ 241 | 242 | # Visual Studio 6 build log 243 | *.plg 244 | 245 | # Visual Studio 6 workspace options file 246 | *.opt 247 | 248 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 249 | *.vbw 250 | 251 | # Visual Studio LightSwitch build output 252 | **/*.HTMLClient/GeneratedArtifacts 253 | **/*.DesktopClient/GeneratedArtifacts 254 | **/*.DesktopClient/ModelManifest.xml 255 | **/*.Server/GeneratedArtifacts 256 | **/*.Server/ModelManifest.xml 257 | _Pvt_Extensions 258 | 259 | # Paket dependency manager 260 | .paket/paket.exe 261 | paket-files/ 262 | 263 | # FAKE - F# Make 264 | .fake/ 265 | 266 | # JetBrains Rider 267 | .idea/ 268 | *.sln.iml 269 | 270 | # CodeRush 271 | .cr/ 272 | 273 | # Python Tools for Visual Studio (PTVS) 274 | __pycache__/ 275 | *.pyc 276 | 277 | # Cake - Uncomment if you are using it 278 | # tools/** 279 | # !tools/packages.config 280 | 281 | # Telerik's JustMock configuration file 282 | *.jmconfig 283 | 284 | # BizTalk build output 285 | *.btp.cs 286 | *.btm.cs 287 | *.odx.cs 288 | *.xsd.cs 289 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python-sklearn-regression-cookiecutter 2 | 3 | A [Cookiecutter](http://cookiecutter.readthedocs.io/) template for a 4 | [Python](https://www.python.org/) application that demonstrates the use of 5 | [scikit-learn](http://scikit-learn.org/) regression learners. 6 | 7 | ## Using this template 8 | 9 | 1. [Install Cookiecutter](http://cookiecutter.readthedocs.io/en/latest/installation.html) 10 | 2. `cookiecutter gh:Microsoft/python-sklearn-regression-cookiecutter` 11 | (or `cookiecutter https://github.com/Microsoft/python-sklearn-regression-cookiecutter.git` 12 | if you prefer) 13 | 3. Fill in the Cookiecutter items (see below as to what each item 14 | represents) 15 | 4. Install required Python packages as needed (these will vary based on which parts of the code you enable). 16 | 17 | ### Cookiecutter items 18 | 19 | - `app_name`: the name of the folder/project to create 20 | - `create_vs_project`: `y` to create a Visual Studio project file (.pyproj) 21 | 22 | # Contributing 23 | 24 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 25 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 26 | the rights to use your contribution. For details, visit https://cla.microsoft.com. 27 | 28 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide 29 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions 30 | provided by the bot. You will only need to do this once across all repos using our CLA. 31 | 32 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 33 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 34 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 35 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "app_name": "regression", 3 | "create_vs_project": "y", 4 | "_visual_studio" : { 5 | "app_name": { 6 | "value_source": "ProjectName", 7 | "visible": false 8 | }, 9 | "create_vs_project": { 10 | "value_source": "IsNewProject", 11 | "visible": false 12 | } 13 | }, 14 | "_visual_studio_post_cmds": [ 15 | { 16 | "name": "File.OpenProject", 17 | "args": "{{cookiecutter._output_folder_path}}\\{{cookiecutter.app_name}}.pyproj" 18 | } 19 | ]} -------------------------------------------------------------------------------- /hooks/post_gen_project.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | 4 | def delete_file(filepath): 5 | os.remove(os.path.join(os.path.realpath(os.path.curdir), filepath)) 6 | 7 | if __name__ == '__main__': 8 | if '{{cookiecutter.create_vs_project}}'.lower() != 'y': 9 | delete_file('{{cookiecutter.app_name}}.pyproj') 10 | -------------------------------------------------------------------------------- /{{cookiecutter.app_name}}/environment.yml: -------------------------------------------------------------------------------- 1 | name: scikit-env 2 | dependencies: 3 | - python=3.6 4 | - pandas 5 | - numpy 6 | - matplotlib 7 | - scikit-learn 8 | -------------------------------------------------------------------------------- /{{cookiecutter.app_name}}/regression.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This script perfoms the basic process for applying a machine learning 3 | algorithm to a dataset using Python libraries. 4 | 5 | The four steps are: 6 | 1. Download a dataset (using pandas) 7 | 2. Process the numeric data (using numpy) 8 | 3. Train and evaluate learners (using scikit-learn) 9 | 4. Plot and compare results (using matplotlib) 10 | 11 | 12 | The data is downloaded from URL, which is defined below. As is normal 13 | for machine learning problems, the nature of the source data affects 14 | the entire solution. When you change URL to refer to your own data, you 15 | will need to review the data processing steps to ensure they remain 16 | correct. 17 | 18 | ============ 19 | Example Data 20 | ============ 21 | The example is from https://web.archive.org/web/20180322001455/http://mldata.org/repository/data/viewslug/stockvalues/ 22 | It contains stock prices and the values of three indices for each day 23 | over a five year period. See the linked page for more details about 24 | this data set. 25 | 26 | This script uses regression learners to predict the stock price for 27 | the second half of this period based on the values of the indices. This 28 | is a naive approach, and a more robust method would use each prediction 29 | as an input for the next, and would predict relative rather than 30 | absolute values. 31 | ''' 32 | 33 | # Remember to update the script for the new data when you change this URL 34 | URL = "https://raw.githubusercontent.com/microsoft/python-sklearn-regression-cookiecutter/master/stockvalues.csv" 35 | 36 | # This is the column of the sample data to predict. 37 | # Try changing it to other integers between 1 and 155. 38 | TARGET_COLUMN = 32 39 | 40 | # Uncomment this call when using matplotlib to generate images 41 | # rather than displaying interactive UI. 42 | #import matplotlib 43 | #matplotlib.use('Agg') 44 | 45 | from pandas import read_table 46 | import numpy as np 47 | import matplotlib.pyplot as plt 48 | 49 | try: 50 | # [OPTIONAL] Seaborn makes plots nicer 51 | import seaborn 52 | except ImportError: 53 | pass 54 | 55 | # ===================================================================== 56 | 57 | def download_data(): 58 | ''' 59 | Downloads the data for this script into a pandas DataFrame. 60 | ''' 61 | 62 | # If your data is in an Excel file, install 'xlrd' and use 63 | # pandas.read_excel instead of read_table 64 | #from pandas import read_excel 65 | #frame = read_excel(URL) 66 | 67 | # If your data is in a private Azure blob, install 'azure-storage' and use 68 | # BlockBlobService.get_blob_to_path() with read_table() or read_excel() 69 | #from azure.storage.blob import BlockBlobService 70 | #service = BlockBlobService(ACCOUNT_NAME, ACCOUNT_KEY) 71 | #service.get_blob_to_path(container_name, blob_name, 'my_data.csv') 72 | #frame = read_table('my_data.csv', ... 73 | 74 | frame = read_table( 75 | URL, 76 | 77 | # Uncomment if the file needs to be decompressed 78 | #compression='gzip', 79 | #compression='bz2', 80 | 81 | # Specify the file encoding 82 | # Latin-1 is common for data from US sources 83 | encoding='latin-1', 84 | #encoding='utf-8', # UTF-8 is also common 85 | 86 | # Specify the separator in the data 87 | sep=',', # comma separated values 88 | #sep='\t', # tab separated values 89 | #sep=' ', # space separated values 90 | 91 | # Ignore spaces after the separator 92 | skipinitialspace=True, 93 | 94 | # Generate row labels from each row number 95 | index_col=None, 96 | #index_col=0, # use the first column as row labels 97 | #index_col=-1, # use the last column as row labels 98 | 99 | # Generate column headers row from each column number 100 | header=None, 101 | #header=0, # use the first line as headers 102 | 103 | # Use manual headers and skip the first row in the file 104 | #header=0, 105 | #names=['col1', 'col2', ...], 106 | ) 107 | 108 | # Return the entire frame 109 | #return frame 110 | 111 | # Return a subset of the columns 112 | return frame[[156, 157, 158, TARGET_COLUMN]] 113 | 114 | 115 | # ===================================================================== 116 | 117 | 118 | def get_features_and_labels(frame): 119 | ''' 120 | Transforms and scales the input data and returns numpy arrays for 121 | training and testing inputs and targets. 122 | ''' 123 | 124 | # Replace missing values with 0.0 125 | # or we can use scikit-learn to calculate missing values below 126 | #frame[frame.isnull()] = 0.0 127 | 128 | # Convert values to floats 129 | arr = np.array(frame, dtype=np.float) 130 | 131 | # Normalize the entire data set 132 | from sklearn.preprocessing import StandardScaler, MinMaxScaler 133 | arr = MinMaxScaler().fit_transform(arr) 134 | 135 | # Use the last column as the target value 136 | X, y = arr[:, :-1], arr[:, -1] 137 | # To use the first column instead, change the index value 138 | #X, y = arr[:, 1:], arr[:, 0] 139 | 140 | # Use 50% of the data for training, but we will test against the 141 | # entire set 142 | from sklearn.model_selection import train_test_split 143 | X_train, _, y_train, _ = train_test_split(X, y, test_size=0.5) 144 | X_test, y_test = X, y 145 | 146 | # If values are missing we could impute them from the training data 147 | #from sklearn.preprocessing import Imputer 148 | #imputer = Imputer(strategy='mean') 149 | #imputer.fit(X_train) 150 | #X_train = imputer.transform(X_train) 151 | #X_test = imputer.transform(X_test) 152 | 153 | # Normalize the attribute values to mean=0 and variance=1 154 | from sklearn.preprocessing import StandardScaler 155 | scaler = StandardScaler() 156 | # To scale to a specified range, use MinMaxScaler 157 | #from sklearn.preprocessing import MinMaxScaler 158 | #scaler = MinMaxScaler(feature_range=(0, 1)) 159 | 160 | # Fit the scaler based on the training data, then apply the same 161 | # scaling to both training and test sets. 162 | scaler.fit(X_train) 163 | X_train = scaler.transform(X_train) 164 | X_test = scaler.transform(X_test) 165 | 166 | # Return the training and test sets 167 | return X_train, X_test, y_train, y_test 168 | 169 | 170 | # ===================================================================== 171 | 172 | 173 | def evaluate_learner(X_train, X_test, y_train, y_test): 174 | ''' 175 | Run multiple times with different algorithms to get an idea of the 176 | relative performance of each configuration. 177 | 178 | Returns a sequence of tuples containing: 179 | (title, expected values, actual values) 180 | for each learner. 181 | ''' 182 | 183 | # Use a support vector machine for regression 184 | from sklearn.svm import SVR 185 | 186 | # Train using a radial basis function 187 | svr = SVR(kernel='rbf', gamma=0.1) 188 | svr.fit(X_train, y_train) 189 | y_pred = svr.predict(X_test) 190 | r_2 = svr.score(X_test, y_test) 191 | yield 'RBF Model ($R^2={:.3f}$)'.format(r_2), y_test, y_pred 192 | 193 | # Train using a linear kernel 194 | svr = SVR(kernel='linear') 195 | svr.fit(X_train, y_train) 196 | y_pred = svr.predict(X_test) 197 | r_2 = svr.score(X_test, y_test) 198 | yield 'Linear Model ($R^2={:.3f}$)'.format(r_2), y_test, y_pred 199 | 200 | # Train using a polynomial kernel 201 | svr = SVR(kernel='poly', degree=2) 202 | svr.fit(X_train, y_train) 203 | y_pred = svr.predict(X_test) 204 | r_2 = svr.score(X_test, y_test) 205 | yield 'Polynomial Model ($R^2={:.3f}$)'.format(r_2), y_test, y_pred 206 | 207 | 208 | # ===================================================================== 209 | 210 | 211 | def plot(results): 212 | ''' 213 | Create a plot comparing multiple learners. 214 | 215 | `results` is a list of tuples containing: 216 | (title, expected values, actual values) 217 | 218 | All the elements in results will be plotted. 219 | ''' 220 | 221 | # Using subplots to display the results on the same X axis 222 | fig, plts = plt.subplots(nrows=len(results), figsize=(8, 8)) 223 | fig.canvas.set_window_title('Predicting data from ' + URL) 224 | 225 | # Show each element in the plots returned from plt.subplots() 226 | for subplot, (title, y, y_pred) in zip(plts, results): 227 | # Configure each subplot to have no tick marks 228 | # (these are meaningless for the sample dataset) 229 | subplot.set_xticklabels(()) 230 | subplot.set_yticklabels(()) 231 | 232 | # Label the vertical axis 233 | subplot.set_ylabel('stock price') 234 | 235 | # Set the title for the subplot 236 | subplot.set_title(title) 237 | 238 | # Plot the actual data and the prediction 239 | subplot.plot(y, 'b', label='actual') 240 | subplot.plot(y_pred, 'r', label='predicted') 241 | 242 | # Shade the area between the predicted and the actual values 243 | subplot.fill_between( 244 | # Generate X values [0, 1, 2, ..., len(y)-2, len(y)-1] 245 | np.arange(0, len(y), 1), 246 | y, 247 | y_pred, 248 | color='r', 249 | alpha=0.2 250 | ) 251 | 252 | # Mark the extent of the training data 253 | subplot.axvline(len(y) // 2, linestyle='--', color='0', alpha=0.2) 254 | 255 | # Include a legend in each subplot 256 | subplot.legend() 257 | 258 | # Let matplotlib handle the subplot layout 259 | fig.tight_layout() 260 | 261 | # ================================== 262 | # Display the plot in interactive UI 263 | plt.show() 264 | 265 | # To save the plot to an image file, use savefig() 266 | #plt.savefig('plot.png') 267 | 268 | # Open the image file with the default image viewer 269 | #import subprocess 270 | #subprocess.Popen('plot.png', shell=True) 271 | 272 | # To save the plot to an image in memory, use BytesIO and savefig() 273 | # This can then be written to any stream-like object, such as a 274 | # file or HTTP response. 275 | #from io import BytesIO 276 | #img_stream = BytesIO() 277 | #plt.savefig(img_stream, fmt='png') 278 | #img_bytes = img_stream.getvalue() 279 | #print('Image is {} bytes - {!r}'.format(len(img_bytes), img_bytes[:8] + b'...')) 280 | 281 | # Closing the figure allows matplotlib to release the memory used. 282 | plt.close() 283 | 284 | 285 | # ===================================================================== 286 | 287 | 288 | if __name__ == '__main__': 289 | # Download the data set from URL 290 | print("Downloading data from {}".format(URL)) 291 | frame = download_data() 292 | 293 | # Process data into feature and label arrays 294 | print("Processing {} samples with {} attributes".format(len(frame.index), len(frame.columns))) 295 | X_train, X_test, y_train, y_test = get_features_and_labels(frame) 296 | 297 | # Evaluate multiple regression learners on the data 298 | print("Evaluating regression learners") 299 | results = list(evaluate_learner(X_train, X_test, y_train, y_test)) 300 | 301 | # Display the results 302 | print("Plotting the results") 303 | plot(results) 304 | -------------------------------------------------------------------------------- /{{cookiecutter.app_name}}/{{cookiecutter.app_name}}.pyproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Debug 5 | 2.0 6 | {6c0efafa-1a04-41b6-a6d7-511b90951b5b};{888888a0-9f3d-457c-b088-3a5042f75d52} 7 | . 8 | regression.py 9 | 10 | 11 | . 12 | . 13 | 14 | 15 | true 16 | false 17 | 18 | 19 | true 20 | false 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | --------------------------------------------------------------------------------