├── README.md
├── examples
    ├── boston
    │   ├── boston_model.pkl
    │   ├── boston_model.py
    │   ├── boston_model.pyc
    │   └── fit_boston.py
    ├── boston2
    │   ├── boston2_model.pkl
    │   ├── boston2_model.py
    │   ├── boston2_model.pyc
    │   └── fit_boston2.py
    ├── boston3
    │   ├── boston3_model.pkl
    │   ├── boston3_model.py
    │   ├── boston3_model.pyc
    │   └── fit_boston3.py
    ├── models
    │   ├── boston2_model.pkl
    │   ├── boston2_model.py
    │   ├── boston2_model.pyc
    │   ├── boston3_model.pkl
    │   ├── boston3_model.py
    │   ├── boston3_model.pyc
    │   ├── boston_model.pkl
    │   ├── boston_model.py
    │   ├── boston_model.pyc
    │   └── bostonmodel.pkl
    └── requests
    │   ├── curl_boston.sh
    │   ├── curl_boston2.sh
    │   ├── curl_load_boston3.sh
    │   ├── curl_stats.sh
    │   └── test_boston.sh
├── export_WEBSCIKITMODELSPATH.sh
├── server
    ├── requesthandler.py
    ├── requesthandler.pyc
    ├── webscikit.conf
    └── webserver.py
├── sketch.txt
└── tools
    ├── check_model.py
    └── create_project.py


/README.md:
--------------------------------------------------------------------------------
 1 | # webscikit
 2 | Webscikit is a set of tools to run a HTTPServer as a JSON Webservice for scikit-learn predictions. It comes with two examples: boston and boston2
 3 | 
 4 | It is work in progress, so bug and feature requests are highly appreciated!
 5 | 
 6 | Features:
 7 | 
 8 |   * The server can handle multiple models. The models and urls are registered at webscikit.conf .
 9 | 
10 |   * Multiple data-scientist could work locally on their own models, and then later deploy their model to the server.
11 |   
12 |   * The models can be deployed when the server is online.
13 |   
14 |   * Each model can save additional metadata needed to transform and predict new data.
15 |   
16 |   * You can easily start a new project with create_project.py newProjectName
17 |   
18 |   * In the directory examples/ are examples of different models (boston, boston2 etc.) and also example of requests to the server.
19 | 
20 | How does it work:
21 |   * The model gets fitted by the data scientist, gzip-pickled and then uploaded to the server.
22 |   * Http-Clients make POST-requests and send json-files to transform / predict new data and get a Json - response back.
23 | 
24 | 
25 | If you wan to run the examples:
26 | 
27 |   * source export_WEBSCIKITMODELSPATH.sh
28 | 
29 |   * cd server/
30 | 
31 |   * ./webserver.py
32 |  
33 |   * cd ../example/requests/
34 | 
35 |   * ./curl_boston.sh
36 | 
37 |   * ./curl_boston2.sh
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/examples/boston/boston_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/boston/boston_model.pkl


--------------------------------------------------------------------------------
/examples/boston/boston_model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | This is an example model. Please do not locally import anything in this model, otherwise pickling will not work and your model can not be deployed.
 4 | Instead import from PYTHONPATH.
 5 | """
 6 | import pandas as pd, cPickle as pickle, gzip
 7 | 
 8 | class BostonModel(object):
 9 |     """ Please do not change the class name"""
10 | 
11 |     def __init__(self,metadata):
12 |         """ Do not change this method, as it initializes the instance with metadata"""
13 |         if type(metadata) is dict:
14 |             self.metadata = metadata
15 |         else:
16 |             raise TypeError("metadata must be of type dict, but is of type %s" % type(metadata))
17 | 
18 |     def predict(self,new_data):
19 |         """ Please change this method. It will be called after self.transform(new_data) is called, so you can assume,
20 |             that new_data is already transformed for prediction. You can also assume, that new_data is a pandas.DataFrame.
21 |             If you need an Sklearn-Regressor / Classifier, please supply this when the model is instantiated with metadata:
22 |                 In fit_boston.py:
23 |                        metadata = { 'mySkLearnRegressor' : myRandomForestRegressor, 
24 |                                     'mySkLearnClassifier': myRandomForestClassifier,
25 |                                     'someAdditionalDataFrameNeededForPrediction' : myDataFrame,
26 |                                     'someConstant' : 3.14
27 |                                   }
28 |                        boston_model = BostonModel(metadata)
29 |                 Then you can access in this method with :
30 |                         mySKLearnRegressor = self.metadata["mySKLearnRegressor"]
31 |                         someConstant = self.metadata["someConstant"]
32 |             Also make sure, that you convert your prediction to a pandas.DataFrame, as in this example
33 |         """
34 |         if not type(new_data) is pd.DataFrame:
35 |             raise TypeError("new_data in predict must be of type pandas.DataFrame but is of type %s" % type(new_data))
36 |         
37 |         # start here implenting the prediction
38 |         rf = self.metadata["mySkLearnRegressor"]       
39 |         prediction = rf.predict(new_data)
40 |         myConstant = self.metadata["someConstant"]
41 |         new_prediction = myConstant * prediction + 2.0
42 |         return pd.DataFrame(new_prediction)
43 | 
44 |     def transform(self,new_data):
45 |         """ You can overwrite this method, if the new_data passed per POST as JSON needs to be transformed before one can
46 |             call self.predict. By default, it returns new_data. If new_data is already transformed by the client issuing the POST request, than
47 |             you might leave this method unchanged. If you need access to metadata, please read from self.metadata as in the self.predict method
48 |         """
49 |         if not type(new_data) is pd.DataFrame:
50 |             raise TypeError("new_data in transform must be of type pandas.DataFrame but is of type %s" % type(new_data))
51 |         # start here changing the method, if you want:
52 |         return new_data
53 | 
54 |     def transform_predict(self,new_data):
55 |         """ Do not change this method, as it will be called on the server when a POST request is issued by a client.
56 |         """
57 |         return self.predict(self.transform(new_data))
58 | 
59 | 
60 |     def save(self):
61 |         """Do not change this method.
62 |            Save an object to a compressed disk file.
63 |            Works well with huge objects.
64 |         """
65 |         model_name = self.__class__.__name__.replace("Model","").lower()
66 |         filename = model_name + "_model.pkl"
67 |         file = gzip.GzipFile(filename, 'wb')
68 |         pickle.dump(self, file, -1)
69 |         file.close()
70 | 


--------------------------------------------------------------------------------
/examples/boston/boston_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/boston/boston_model.pyc


--------------------------------------------------------------------------------
/examples/boston/fit_boston.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """
 3 | This is an example python script to fit the model boston.
 4 | Please do not import locally anything other than BostonModel from boston_model,
 5 | otherwise pickling will not work and you can not deploy the model.
 6 | """
 7 | import sys, datetime
 8 | sys.path.insert(1,"/usr/local/lib/python2.7/dist-packages/")
 9 | from boston_model import BostonModel
10 | from sklearn.datasets import load_boston
11 | 
12 | # Load scikit's random forest classifier library
13 | from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
14 | 
15 | # Load pandas
16 | import pandas as pd
17 | 
18 | import pickle, joblib
19 | 
20 | # Load numpy
21 | import numpy as np
22 | 
23 | # Set random seed
24 | np.random.seed(0)
25 | 
26 | boston = load_boston()
27 | df = pd.DataFrame(boston.data, columns=boston.feature_names)
28 | y = boston.target
29 | myRandomForestRegressor = RandomForestRegressor(random_state=0)
30 | myRandomForestRegressor.fit(df, y)
31 | 
32 | myRandomForestClassifier = RandomForestClassifier(random_state=0)
33 | 
34 | myDataFrame = pd.DataFrame([1,2,3])
35 | 
36 | # please overwrite metadata with the data and regressors / classifiers you need to later transform, predict new data:
37 | 
38 | metadata = { 
39 |              'version': 1,
40 |              'created_at' : datetime.datetime.now(),
41 |              'mySkLearnRegressor': myRandomForestRegressor,
42 |              'mySkLearnClassifier' : myRandomForestClassifier,
43 |              'someConstant' : 3.14,
44 |              'someAdditionalDataFrameNeededForPrediction': myDataFrame
45 |              }
46 | 
47 | # please do not change this part of the code:
48 | boston_model = BostonModel(metadata)
49 | boston_model.save()
50 | 


--------------------------------------------------------------------------------
/examples/boston2/boston2_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/boston2/boston2_model.pkl


--------------------------------------------------------------------------------
/examples/boston2/boston2_model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | This is an example model. Please do not locally import anything in this model, otherwise pickling will not work and your model can not be deployed.
 4 | Instead import from PYTHONPATH.
 5 | """
 6 | import pandas as pd, cPickle as pickle, gzip
 7 | 
 8 | class Boston2Model(object):
 9 |     """ Please do not change the class name"""
10 | 
11 |     def __init__(self,metadata):
12 |         """ Do not change this method, as it initializes the instance with metadata"""
13 |         if type(metadata) is dict:
14 |             self.metadata = metadata
15 |         else:
16 |             raise TypeError("metadata must be of type dict, but is of type %s" % type(metadata))
17 | 
18 |     def predict(self,new_data):
19 |         """ Please change this method. It will be called after self.transform(new_data) is called, so you can assume,
20 |             that new_data is already transformed for prediction. You can also assume, that new_data is a pandas.DataFrame.
21 |             If you need an Sklearn-Regressor / Classifier, please supply this when the model is instantiated with metadata:
22 |                 In fit_boston2.py:
23 |                        metadata = { 'mySkLearnRegressor' : myRandomForestRegressor, 
24 |                                     'mySkLearnClassifier': myRandomForestClassifier,
25 |                                     'someAdditionalDataFrameNeededForPrediction' : myDataFrame,
26 |                                     'someConstant' : 3.14
27 |                                   }
28 |                        boston2_model = Boston2Model(metadata)
29 |                 Then you can access in this method with :
30 |                         mySKLearnRegressor = self.metadata["mySKLearnRegressor"]
31 |                         someConstant = self.metadata["someConstant"]
32 |             Also make sure, that you convert your prediction to a pandas.DataFrame, as in this example
33 |         """
34 |         if not type(new_data) is pd.DataFrame:
35 |             raise TypeError("new_data in predict must be of type pandas.DataFrame but is of type %s" % type(new_data))
36 |         
37 |         # start here implenting the prediction
38 |         rf = self.metadata["mySkLearnRegressor"]       
39 |         prediction = rf.predict(new_data)
40 |         myConstant = self.metadata["someConstant"]
41 |         new_prediction = myConstant * prediction + 2.0
42 |         return pd.DataFrame(new_prediction)
43 | 
44 |     def transform(self,new_data):
45 |         """ You can overwrite this method, if the new_data passed per POST as JSON needs to be transformed before one can
46 |             call self.predict. By default, it returns new_data. If new_data is already transformed by the client issuing the POST request, than
47 |             you might leave this method unchanged. If you need access to metadata, please read from self.metadata as in the self.predict method
48 |         """
49 |         if not type(new_data) is pd.DataFrame:
50 |             raise TypeError("new_data in transform must be of type pandas.DataFrame but is of type %s" % type(new_data))
51 |         # start here changing the method, if you want:
52 |         return new_data
53 | 
54 |     def transform_predict(self,new_data):
55 |         """ Do not change this method, as it will be called on the server when a POST request is issued by a client.
56 |         """
57 |         return self.predict(self.transform(new_data))
58 | 
59 | 
60 |     def save(self):
61 |         """Do not change this method.
62 |            Save an object to a compressed disk file.
63 |            Works well with huge objects.
64 |         """
65 |         model_name = self.__class__.__name__.replace("Model","").lower()
66 |         filename = model_name + "_model.pkl"
67 |         file = gzip.GzipFile(filename, 'wb')
68 |         pickle.dump(self, file, -1)
69 |         file.close()
70 | 


--------------------------------------------------------------------------------
/examples/boston2/boston2_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/boston2/boston2_model.pyc


--------------------------------------------------------------------------------
/examples/boston2/fit_boston2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """
 3 | This is an example python script to fit the model boston2.
 4 | Please do not import locally anything other than Boston2Model from boston2_model,
 5 | otherwise pickling will not work and you can not deploy the model.
 6 | """
 7 | import sys, datetime
 8 | sys.path.insert(1,"/usr/local/lib/python2.7/dist-packages/")
 9 | from boston2_model import Boston2Model
10 | from sklearn.datasets import load_boston
11 | 
12 | # Load scikit's random forest classifier library
13 | from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
14 | 
15 | # Load pandas
16 | import pandas as pd
17 | 
18 | import pickle, joblib
19 | 
20 | # Load numpy
21 | import numpy as np
22 | 
23 | # Set random seed
24 | np.random.seed(0)
25 | 
26 | boston = load_boston()
27 | df = pd.DataFrame(boston.data, columns=boston.feature_names)
28 | y = boston.target
29 | myRandomForestRegressor = RandomForestRegressor(random_state=0)
30 | myRandomForestRegressor.fit(df, y)
31 | 
32 | myRandomForestClassifier = RandomForestClassifier(random_state=0)
33 | 
34 | myDataFrame = pd.DataFrame([1,2,3])
35 | 
36 | # please overwrite metadata with the data and regressors / classifiers you need to later transform, predict new data:
37 | 
38 | metadata = { 
39 |              'version': 1,
40 |              'created_at' : datetime.datetime.now(),
41 |              'mySkLearnRegressor': myRandomForestRegressor,
42 |              'mySkLearnClassifier' : myRandomForestClassifier,
43 |              'someConstant' : 3.14,
44 |              'someAdditionalDataFrameNeededForPrediction': myDataFrame
45 |              }
46 | 
47 | # please do not change this part of the code:
48 | boston2_model = Boston2Model(metadata)
49 | boston2_model.save()
50 | 


--------------------------------------------------------------------------------
/examples/boston3/boston3_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/boston3/boston3_model.pkl


--------------------------------------------------------------------------------
/examples/boston3/boston3_model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | This is an example model. Please do not locally import anything in this model, otherwise pickling will not work and your model can not be deployed.
 4 | Instead import from PYTHONPATH.
 5 | """
 6 | import pandas as pd, cPickle as pickle, gzip
 7 | 
 8 | class Boston3Model(object):
 9 |     """ Please do not change the class name"""
10 | 
11 |     def __init__(self,metadata):
12 |         """ Do not change this method, as it initializes the instance with metadata"""
13 |         if type(metadata) is dict:
14 |             self.metadata = metadata
15 |         else:
16 |             raise TypeError("metadata must be of type dict, but is of type %s" % type(metadata))
17 | 
18 |     def predict(self,new_data):
19 |         """ Please change this method. It will be called after self.transform(new_data) is called, so you can assume,
20 |             that new_data is already transformed for prediction. You can also assume, that new_data is a pandas.DataFrame.
21 |             If you need an Sklearn-Regressor / Classifier, please supply this when the model is instantiated with metadata:
22 |                 In fit_boston3.py:
23 |                        metadata = { 'mySkLearnRegressor' : myRandomForestRegressor, 
24 |                                     'mySkLearnClassifier': myRandomForestClassifier,
25 |                                     'someAdditionalDataFrameNeededForPrediction' : myDataFrame,
26 |                                     'someConstant' : 3.14
27 |                                   }
28 |                        boston3_model = Boston3Model(metadata)
29 |                 Then you can access in this method with :
30 |                         mySKLearnRegressor = self.metadata["mySKLearnRegressor"]
31 |                         someConstant = self.metadata["someConstant"]
32 |             Also make sure, that you convert your prediction to a pandas.DataFrame, as in this example
33 |         """
34 |         if not type(new_data) is pd.DataFrame:
35 |             raise TypeError("new_data in predict must be of type pandas.DataFrame but is of type %s" % type(new_data))
36 |         
37 |         # start here implenting the prediction
38 |         rf = self.metadata["mySkLearnRegressor"]       
39 |         prediction = rf.predict(new_data)
40 |         myConstant = self.metadata["someConstant"]
41 |         new_prediction = myConstant * prediction + 2.0
42 |         return pd.DataFrame(new_prediction)
43 | 
44 |     def transform(self,new_data):
45 |         """ You can overwrite this method, if the new_data passed per POST as JSON needs to be transformed before one can
46 |             call self.predict. By default, it returns new_data. If new_data is already transformed by the client issuing the POST request, than
47 |             you might leave this method unchanged. If you need access to metadata, please read from self.metadata as in the self.predict method
48 |         """
49 |         if not type(new_data) is pd.DataFrame:
50 |             raise TypeError("new_data in transform must be of type pandas.DataFrame but is of type %s" % type(new_data))
51 |         # start here changing the method, if you want:
52 |         return new_data
53 | 
54 |     def transform_predict(self,new_data):
55 |         """ Do not change this method, as it will be called on the server when a POST request is issued by a client.
56 |         """
57 |         return self.predict(self.transform(new_data))
58 | 
59 | 
60 |     def save(self):
61 |         """Do not change this method.
62 |            Save an object to a compressed disk file.
63 |            Works well with huge objects.
64 |         """
65 |         model_name = self.__class__.__name__.replace("Model","").lower()
66 |         filename = model_name + "_model.pkl"
67 |         file = gzip.GzipFile(filename, 'wb')
68 |         pickle.dump(self, file, -1)
69 |         file.close()
70 | 


--------------------------------------------------------------------------------
/examples/boston3/boston3_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/boston3/boston3_model.pyc


--------------------------------------------------------------------------------
/examples/boston3/fit_boston3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """
 3 | This is an example python script to fit the model boston3.
 4 | Please do not import locally anything other than Boston3Model from boston3_model,
 5 | otherwise pickling will not work and you can not deploy the model.
 6 | """
 7 | import sys, datetime
 8 | sys.path.insert(1,"/usr/local/lib/python2.7/dist-packages/")
 9 | from boston3_model import Boston3Model
10 | from sklearn.datasets import load_boston
11 | 
12 | # Load scikit's random forest classifier library
13 | from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
14 | 
15 | # Load pandas
16 | import pandas as pd
17 | 
18 | import pickle, joblib
19 | 
20 | # Load numpy
21 | import numpy as np
22 | 
23 | # Set random seed
24 | np.random.seed(0)
25 | 
26 | boston = load_boston()
27 | df = pd.DataFrame(boston.data, columns=boston.feature_names)
28 | y = boston.target
29 | myRandomForestRegressor = RandomForestRegressor(random_state=0)
30 | myRandomForestRegressor.fit(df, y)
31 | 
32 | myRandomForestClassifier = RandomForestClassifier(random_state=0)
33 | 
34 | myDataFrame = pd.DataFrame([1,2,3])
35 | 
36 | # please overwrite metadata with the data and regressors / classifiers you need to later transform, predict new data:
37 | 
38 | metadata = { 
39 |              'version': 1,
40 |              'created_at' : datetime.datetime.now(),
41 |              'mySkLearnRegressor': myRandomForestRegressor,
42 |              'mySkLearnClassifier' : myRandomForestClassifier,
43 |              'someConstant' : 3.14,
44 |              'someAdditionalDataFrameNeededForPrediction': myDataFrame
45 |              }
46 | 
47 | # please do not change this part of the code:
48 | boston3_model = Boston3Model(metadata)
49 | boston3_model.save()
50 | 


--------------------------------------------------------------------------------
/examples/models/boston2_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/models/boston2_model.pkl


--------------------------------------------------------------------------------
/examples/models/boston2_model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | This is an example model. Please do not locally import anything in this model, otherwise pickling will not work and your model can not be deployed.
 4 | Instead import from PYTHONPATH.
 5 | """
 6 | import pandas as pd, cPickle as pickle, gzip
 7 | 
 8 | class Boston2Model(object):
 9 |     """ Please do not change the class name"""
10 | 
11 |     def __init__(self,metadata):
12 |         """ Do not change this method, as it initializes the instance with metadata"""
13 |         if type(metadata) is dict:
14 |             self.metadata = metadata
15 |         else:
16 |             raise TypeError("metadata must be of type dict, but is of type %s" % type(metadata))
17 | 
18 |     def predict(self,new_data):
19 |         """ Please change this method. It will be called after self.transform(new_data) is called, so you can assume,
20 |             that new_data is already transformed for prediction. You can also assume, that new_data is a pandas.DataFrame.
21 |             If you need an Sklearn-Regressor / Classifier, please supply this when the model is instantiated with metadata:
22 |                 In fit_boston2.py:
23 |                        metadata = { 'mySkLearnRegressor' : myRandomForestRegressor, 
24 |                                     'mySkLearnClassifier': myRandomForestClassifier,
25 |                                     'someAdditionalDataFrameNeededForPrediction' : myDataFrame,
26 |                                     'someConstant' : 3.14
27 |                                   }
28 |                        boston2_model = Boston2Model(metadata)
29 |                 Then you can access in this method with :
30 |                         mySKLearnRegressor = self.metadata["mySKLearnRegressor"]
31 |                         someConstant = self.metadata["someConstant"]
32 |             Also make sure, that you convert your prediction to a pandas.DataFrame, as in this example
33 |         """
34 |         if not type(new_data) is pd.DataFrame:
35 |             raise TypeError("new_data in predict must be of type pandas.DataFrame but is of type %s" % type(new_data))
36 |         
37 |         # start here implenting the prediction
38 |         rf = self.metadata["mySkLearnRegressor"]       
39 |         prediction = rf.predict(new_data)
40 |         myConstant = self.metadata["someConstant"]
41 |         new_prediction = myConstant * prediction + 2.0
42 |         return pd.DataFrame(new_prediction)
43 | 
44 |     def transform(self,new_data):
45 |         """ You can overwrite this method, if the new_data passed per POST as JSON needs to be transformed before one can
46 |             call self.predict. By default, it returns new_data. If new_data is already transformed by the client issuing the POST request, than
47 |             you might leave this method unchanged. If you need access to metadata, please read from self.metadata as in the self.predict method
48 |         """
49 |         if not type(new_data) is pd.DataFrame:
50 |             raise TypeError("new_data in transform must be of type pandas.DataFrame but is of type %s" % type(new_data))
51 |         # start here changing the method, if you want:
52 |         return new_data
53 | 
54 |     def transform_predict(self,new_data):
55 |         """ Do not change this method, as it will be called on the server when a POST request is issued by a client.
56 |         """
57 |         return self.predict(self.transform(new_data))
58 | 
59 | 
60 |     def save(self):
61 |         """Do not change this method.
62 |            Save an object to a compressed disk file.
63 |            Works well with huge objects.
64 |         """
65 |         model_name = self.__class__.__name__.replace("Model","").lower()
66 |         filename = model_name + "_model.pkl"
67 |         file = gzip.GzipFile(filename, 'wb')
68 |         pickle.dump(self, file, -1)
69 |         file.close()
70 | 


--------------------------------------------------------------------------------
/examples/models/boston2_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/models/boston2_model.pyc


--------------------------------------------------------------------------------
/examples/models/boston3_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/models/boston3_model.pkl


--------------------------------------------------------------------------------
/examples/models/boston3_model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | This is an example model. Please do not locally import anything in this model, otherwise pickling will not work and your model can not be deployed.
 4 | Instead import from PYTHONPATH.
 5 | """
 6 | import pandas as pd, cPickle as pickle, gzip
 7 | 
 8 | class Boston3Model(object):
 9 |     """ Please do not change the class name"""
10 | 
11 |     def __init__(self,metadata):
12 |         """ Do not change this method, as it initializes the instance with metadata"""
13 |         if type(metadata) is dict:
14 |             self.metadata = metadata
15 |         else:
16 |             raise TypeError("metadata must be of type dict, but is of type %s" % type(metadata))
17 | 
18 |     def predict(self,new_data):
19 |         """ Please change this method. It will be called after self.transform(new_data) is called, so you can assume,
20 |             that new_data is already transformed for prediction. You can also assume, that new_data is a pandas.DataFrame.
21 |             If you need an Sklearn-Regressor / Classifier, please supply this when the model is instantiated with metadata:
22 |                 In fit_boston3.py:
23 |                        metadata = { 'mySkLearnRegressor' : myRandomForestRegressor, 
24 |                                     'mySkLearnClassifier': myRandomForestClassifier,
25 |                                     'someAdditionalDataFrameNeededForPrediction' : myDataFrame,
26 |                                     'someConstant' : 3.14
27 |                                   }
28 |                        boston3_model = Boston3Model(metadata)
29 |                 Then you can access in this method with :
30 |                         mySKLearnRegressor = self.metadata["mySKLearnRegressor"]
31 |                         someConstant = self.metadata["someConstant"]
32 |             Also make sure, that you convert your prediction to a pandas.DataFrame, as in this example
33 |         """
34 |         if not type(new_data) is pd.DataFrame:
35 |             raise TypeError("new_data in predict must be of type pandas.DataFrame but is of type %s" % type(new_data))
36 |         
37 |         # start here implenting the prediction
38 |         rf = self.metadata["mySkLearnRegressor"]       
39 |         prediction = rf.predict(new_data)
40 |         myConstant = self.metadata["someConstant"]
41 |         new_prediction = myConstant * prediction + 2.0
42 |         return pd.DataFrame(new_prediction)
43 | 
44 |     def transform(self,new_data):
45 |         """ You can overwrite this method, if the new_data passed per POST as JSON needs to be transformed before one can
46 |             call self.predict. By default, it returns new_data. If new_data is already transformed by the client issuing the POST request, than
47 |             you might leave this method unchanged. If you need access to metadata, please read from self.metadata as in the self.predict method
48 |         """
49 |         if not type(new_data) is pd.DataFrame:
50 |             raise TypeError("new_data in transform must be of type pandas.DataFrame but is of type %s" % type(new_data))
51 |         # start here changing the method, if you want:
52 |         return new_data
53 | 
54 |     def transform_predict(self,new_data):
55 |         """ Do not change this method, as it will be called on the server when a POST request is issued by a client.
56 |         """
57 |         return self.predict(self.transform(new_data))
58 | 
59 | 
60 |     def save(self):
61 |         """Do not change this method.
62 |            Save an object to a compressed disk file.
63 |            Works well with huge objects.
64 |         """
65 |         model_name = self.__class__.__name__.replace("Model","").lower()
66 |         filename = model_name + "_model.pkl"
67 |         file = gzip.GzipFile(filename, 'wb')
68 |         pickle.dump(self, file, -1)
69 |         file.close()
70 | 


--------------------------------------------------------------------------------
/examples/models/boston3_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/models/boston3_model.pyc


--------------------------------------------------------------------------------
/examples/models/boston_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/models/boston_model.pkl


--------------------------------------------------------------------------------
/examples/models/boston_model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | This is an example model. Please do not locally import anything in this model, otherwise pickling will not work and your model can not be deployed.
 4 | Instead import from PYTHONPATH.
 5 | """
 6 | import pandas as pd, cPickle as pickle, gzip
 7 | 
 8 | class BostonModel(object):
 9 |     """ Please do not change the class name"""
10 | 
11 |     def __init__(self,metadata):
12 |         """ Do not change this method, as it initializes the instance with metadata"""
13 |         if type(metadata) is dict:
14 |             self.metadata = metadata
15 |         else:
16 |             raise TypeError("metadata must be of type dict, but is of type %s" % type(metadata))
17 | 
18 |     def predict(self,new_data):
19 |         """ Please change this method. It will be called after self.transform(new_data) is called, so you can assume,
20 |             that new_data is already transformed for prediction. You can also assume, that new_data is a pandas.DataFrame.
21 |             If you need an Sklearn-Regressor / Classifier, please supply this when the model is instantiated with metadata:
22 |                 In fit_boston.py:
23 |                        metadata = { 'mySkLearnRegressor' : myRandomForestRegressor, 
24 |                                     'mySkLearnClassifier': myRandomForestClassifier,
25 |                                     'someAdditionalDataFrameNeededForPrediction' : myDataFrame,
26 |                                     'someConstant' : 3.14
27 |                                   }
28 |                        boston_model = BostonModel(metadata)
29 |                 Then you can access in this method with :
30 |                         mySKLearnRegressor = self.metadata["mySKLearnRegressor"]
31 |                         someConstant = self.metadata["someConstant"]
32 |             Also make sure, that you convert your prediction to a pandas.DataFrame, as in this example
33 |         """
34 |         if not type(new_data) is pd.DataFrame:
35 |             raise TypeError("new_data in predict must be of type pandas.DataFrame but is of type %s" % type(new_data))
36 |         
37 |         # start here implenting the prediction
38 |         rf = self.metadata["mySkLearnRegressor"]       
39 |         prediction = rf.predict(new_data)
40 |         myConstant = self.metadata["someConstant"]
41 |         new_prediction = myConstant * prediction + 2.0
42 |         return pd.DataFrame(new_prediction)
43 | 
44 |     def transform(self,new_data):
45 |         """ You can overwrite this method, if the new_data passed per POST as JSON needs to be transformed before one can
46 |             call self.predict. By default, it returns new_data. If new_data is already transformed by the client issuing the POST request, than
47 |             you might leave this method unchanged. If you need access to metadata, please read from self.metadata as in the self.predict method
48 |         """
49 |         if not type(new_data) is pd.DataFrame:
50 |             raise TypeError("new_data in transform must be of type pandas.DataFrame but is of type %s" % type(new_data))
51 |         # start here changing the method, if you want:
52 |         return new_data
53 | 
54 |     def transform_predict(self,new_data):
55 |         """ Do not change this method, as it will be called on the server when a POST request is issued by a client.
56 |         """
57 |         return self.predict(self.transform(new_data))
58 | 
59 | 
60 |     def save(self):
61 |         """Do not change this method.
62 |            Save an object to a compressed disk file.
63 |            Works well with huge objects.
64 |         """
65 |         model_name = self.__class__.__name__.replace("Model","").lower()
66 |         filename = model_name + "_model.pkl"
67 |         file = gzip.GzipFile(filename, 'wb')
68 |         pickle.dump(self, file, -1)
69 |         file.close()
70 | 


--------------------------------------------------------------------------------
/examples/models/boston_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/models/boston_model.pyc


--------------------------------------------------------------------------------
/examples/models/bostonmodel.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/examples/models/bostonmodel.pkl


--------------------------------------------------------------------------------
/examples/requests/curl_boston.sh:
--------------------------------------------------------------------------------
1 | curl -s -w "%{time_total}\n"  -H "Content-Type: application/json" -X POST -d '{"CRIM":{"0":0.00632,"1":0.02731,"2":0.02729},"ZN":{"0":18.0,"1":0.0,"2":0.0},"INDUS":{"0":2.31,"1":7.07,"2":7.07},"CHAS":{"0":0.0,"1":0.0,"2":0.0},"NOX":{"0":0.538,"1":0.469,"2":0.469},"RM":{"0":6.575,"1":6.421,"2":7.185},"AGE":{"0":65.2,"1":78.9,"2":61.1},"DIS":{"0":4.09,"1":4.9671,"2":4.9671},"RAD":{"0":1.0,"1":2.0,"2":2.0},"TAX":{"0":296.0,"1":242.0,"2":242.0},"PTRATIO":{"0":15.3,"1":17.8,"2":17.8},"B":{"0":396.9,"1":396.9,"2":392.83},"LSTAT":{"0":4.98,"1":9.14,"2":4.03}}' http://localhost:8000/boston
2 | 


--------------------------------------------------------------------------------
/examples/requests/curl_boston2.sh:
--------------------------------------------------------------------------------
1 | curl -s -w "%{time_total}\n"  -H "Content-Type: application/json" -X POST -d '{"CRIM":{"0":0.00632,"1":0.02731,"2":0.02729},"ZN":{"0":18.0,"1":0.0,"2":0.0},"INDUS":{"0":2.31,"1":7.07,"2":7.07},"CHAS":{"0":0.0,"1":0.0,"2":0.0},"NOX":{"0":0.538,"1":0.469,"2":0.469},"RM":{"0":6.575,"1":6.421,"2":7.185},"AGE":{"0":65.2,"1":78.9,"2":61.1},"DIS":{"0":4.09,"1":4.9671,"2":4.9671},"RAD":{"0":1.0,"1":2.0,"2":2.0},"TAX":{"0":296.0,"1":242.0,"2":242.0},"PTRATIO":{"0":15.3,"1":17.8,"2":17.8},"B":{"0":396.9,"1":396.9,"2":392.83},"LSTAT":{"0":4.98,"1":9.14,"2":4.03}}' http://localhost:8000/boston2
2 | 


--------------------------------------------------------------------------------
/examples/requests/curl_load_boston3.sh:
--------------------------------------------------------------------------------
1 | curl -X GET "http://localhost:8000/loadModelAtURL?url=/boston3&model=boston3_model.pkl"
2 | 


--------------------------------------------------------------------------------
/examples/requests/curl_stats.sh:
--------------------------------------------------------------------------------
1 | curl -X GET "http://localhost:8000/stats"
2 | 


--------------------------------------------------------------------------------
/examples/requests/test_boston.sh:
--------------------------------------------------------------------------------
1 | for i in `seq 1 200`; do ./curl_boston.sh; done
2 | 


--------------------------------------------------------------------------------
/export_WEBSCIKITMODELSPATH.sh:
--------------------------------------------------------------------------------
1 | SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
2 | WEBSCIKITMODELSPATH=$SCRIPTPATH/examples/models/
3 | PATH=$PATH:$SCRIPTPATH/tools
4 | export WEBSCIKITMODELSPATH
5 | export PATH
6 | 


--------------------------------------------------------------------------------
/server/requesthandler.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(1,'/usr/local/lib/python2.7/dist-packages/')
 3 | 
 4 | from BaseHTTPServer import BaseHTTPRequestHandler
 5 | import logging, json
 6 | import pandas as pd, os, cPickle as pickle, gzip
 7 | from urlparse import parse_qs
 8 | 
 9 | def load(filename):
10 |     """Loads a compressed object from disk
11 |     """
12 |     file = gzip.GzipFile(filename, 'rb')
13 |     object = pickle.load(file)
14 |     file.close()
15 |     return object
16 | 
17 | class  RequestHandler(BaseHTTPRequestHandler): 
18 | 
19 |          
20 |     def read_POST_data(self):
21 |         content_length = int(self.headers['Content-Length']) # 
22 |         post_data = self.rfile.read(content_length).decode("utf-8")
23 |         return post_data
24 | 
25 |     def _set_headers(self,httpstatus=200):       
26 |         self.send_response(httpstatus)
27 |         self.send_header('Content-type', 'application/json')
28 |         self.end_headers()
29 | 
30 |     def do_GET(self):
31 |         """ GET is reserved for "/stats" and "/loadModelAtURL?url=url10;model=model10.pkl"""
32 |         if self.path == "/stats":
33 |             resp = {'started_at':str(self.server.started_at)}
34 |             for url in self.server.models.keys():
35 |                 model,filename = self.server.models[url]
36 |                 if not self.server.stats.has_key(url):
37 |                     self.server.stats[url] = 0
38 |                 resp[url] = (self.server.stats[url],filename)
39 |             response = json.dumps(resp)
40 |             self._set_headers()
41 |             self.wfile.write(response.encode("utf-8"))
42 |             self.wfile.close()
43 |         elif "/loadModelAtURL" in self.path:
44 |             query = self.path.replace("/loadModelAtURL?","")
45 |             params = parse_qs(query)
46 |             print params
47 |             url = params["url"][0]
48 |             model_file = params["model"][0]
49 |             model = load(os.path.join(self.server.webscikitmodelspath+"/",model_file))
50 |             self.server.models[url] = (model,model_file)
51 |             self._set_headers(200)
52 |             self.wfile.close()
53 |         else:
54 |             # 404
55 |             self.send_error(404)
56 |             self.wfile.close()
57 | 
58 | 
59 |     def do_POST(self):
60 |         """ POST is reserved for doing only model predictions at urls defined in webscikit.conf"""
61 |         model_found = False
62 |         for url in self.server.models.keys():
63 |             if self.path == url:
64 |                 self.server.stats[url] += 1
65 |                 model_found = True
66 |                 model,filename = self.server.models[url]
67 |                 data = self.read_POST_data()
68 |                 data = pd.read_json(data)
69 |                 prediction = model.transform_predict(data)
70 |                 json_prediction = prediction.to_json()
71 |                 response = json.dumps(json_prediction)
72 |                 self._set_headers()
73 |                 self.wfile.write(response.encode("utf-8"))
74 |                 self.wfile.close()
75 |         if not model_found:
76 |             # 404 model not found
77 |             self.send_error(404)
78 |             self.wfile.close()
79 | 


--------------------------------------------------------------------------------
/server/requesthandler.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/githubuser1983/webscikit/c5afb2dc4b190988f0a288c80c8d54c26e09178b/server/requesthandler.pyc


--------------------------------------------------------------------------------
/server/webscikit.conf:
--------------------------------------------------------------------------------
1 | {
2 |   "/boston": "boston_model.pkl",
3 |   "/boston2": "boston2_model.pkl"
4 | }
5 | 


--------------------------------------------------------------------------------
/server/webserver.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys, os
 3 | sys.path.insert(1,'/usr/local/lib/python2.7/dist-packages/')
 4 | 
 5 | import json, datetime
 6 | from BaseHTTPServer import HTTPServer
 7 | from requesthandler import RequestHandler
 8 | import cPickle as pickle, gzip
 9 | 
10 | def load(filename):
11 |     """Loads a compressed object from disk
12 |     """
13 |     file = gzip.GzipFile(filename, 'rb')
14 |     object = pickle.load(file)
15 |     file.close()
16 |     return object
17 | 
18 | def load_models(conf_file,webscikitmodelspath):
19 |     models = dict([])
20 |     cf = open(conf_file,"r")
21 |     jsonstring = cf.read()
22 |     urlmapping = json.loads(jsonstring)
23 |     for url in urlmapping.keys():
24 |         model_file = urlmapping[url]
25 |         models[url] = (load(os.path.join(webscikitmodelspath+"/",model_file)),model_file)
26 |     cf.close()
27 |     return models
28 | 
29 | def runServerWithModels(models,server_class=HTTPServer, handler_class=RequestHandler, server_address=('',8000),webscikitmodelspath=None):
30 |     httpd = server_class(server_address, handler_class)
31 |     httpd.models = models
32 |     httpd.started_at = datetime.datetime.now()
33 |     httpd.stats = dict([ (url,0) for url in models.keys() ])
34 |     httpd.webscikitmodelspath = webscikitmodelspath
35 |     httpd.serve_forever()
36 | 
37 | if __name__ == '__main__':
38 |     try:
39 |         webscikitmodelspath = os.environ["WEBSCIKITMODELSPATH"]
40 |     except KeyError:
41 |         print "environment variable WEBSCIKITMODELSPATH is not set. Please set this path for example in ~/.bashrc and export it"
42 |         sys.exit(-1)
43 | 
44 |     sys.path[0] = webscikitmodelspath
45 | 
46 |     models = load_models("webscikit.conf",webscikitmodelspath)
47 |     runServerWithModels(models,webscikitmodelspath=webscikitmodelspath)
48 | 
49 |    
50 | 


--------------------------------------------------------------------------------
/sketch.txt:
--------------------------------------------------------------------------------
 1 | env vars:
 2 |   WEBSCIKITMODELSPATH = /home/orges/models
 3 | 
 4 | 
 5 | webscikit -create iris
 6 |   iris/
 7 |      irismodel.py class IrisModel(object):
 8 |      fit_iris.py
 9 | 
10 | webscikit -deploy iris -overwrite
11 |   copies irismodel.py to WEBSCIKITMODELATH
12 |   gzips-pickles IrisModel and copies irismodel.pkl to WEBSCIKITMODELPATH
13 |   if iris exists in WEBSCIKITMODELPATH then it is overwritten
14 |   uploads models iris at url /iris on webserver
15 | 
16 | /etc/init.d/webscikit.sh
17 |   start stop restart status
18 | 
19 | irismodel.py should import only from PYTHONPATH otherwise pickling will not work
20 |    check_model.py -> raises an ImportError if unpickling does not work
21 | 
22 | 


--------------------------------------------------------------------------------
/tools/check_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys,os
 3 | sys.path.insert(1,'/usr/local/lib/python2.7/dist-packages/')
 4 | import cPickle as pickle
 5 | import hashlib, gzip,shutil
 6 | 
 7 | def save(object, filename, protocol = -1):
 8 |     """Save an object to a compressed disk file.
 9 |        Works well with huge objects.
10 |     """
11 |     file = gzip.GzipFile(filename, 'wb')
12 |     pickle.dump(object, file, protocol)
13 |     file.close()
14 | 
15 | def load(filename):
16 |     """Loads a compressed object from disk
17 |     """
18 |     file = gzip.GzipFile(filename, 'rb')
19 |     object = pickle.load(file)
20 |     file.close()
21 |     return object
22 | 
23 | def overwrite_tmp_dir(filename):
24 |     """ Creates tmp-dir based on hash of file given through filename.
25 |         If tmp-dir exists, it will be deleted and created again.
26 |     """
27 |     f = open(filename,"r")
28 |     buf = f.read()
29 |     hasher = hashlib.md5()
30 |     hasher.update(buf)
31 |     dir_name = os.path.join("/tmp/", hasher.hexdigest())
32 |     if os.path.exists(dir_name):
33 |         shutil.rmtree(dir_name)
34 |     os.makedirs(dir_name)
35 |     return dir_name
36 | 
37 | def copy(from_filename,to_filename):
38 |     """ Copies from_file to to_file.
39 |     """
40 |     from_file = open(from_filename,"rb")
41 |     to_file = open(to_filename,"wb")
42 |     to_file.write(from_file.read())
43 |     from_file.close()
44 |     to_file.close()
45 | 
46 | 
47 | if __name__=="__main__":
48 |     model_name = sys.argv[1]
49 |     pkl_file_name = model_name + "_model.pkl"
50 |     py_file_name = model_name + "_model.py"
51 |     try:
52 |         tmp_dir = overwrite_tmp_dir(pkl_file_name)
53 |     except IOError as e:
54 |         print e
55 |         print "Pickled file %s not found. Did you run fit_%s.py in this directory?" % ( pkl_file_name, model_name)
56 |         sys.exit(-1)
57 |     copy(pkl_file_name, os.path.join(tmp_dir,pkl_file_name))
58 |     try:
59 |         copy(py_file_name, os.path.join(tmp_dir,py_file_name))
60 |     except IOError as e:
61 |         print e
62 |         print "Did not find %s in this directory." % py_file_name
63 |         sys.exit(-1)
64 |     #try:
65 |     #    webscikitmodelspath = os.environ["WEBSCIKITMODELSPATH"]
66 |     #except KeyError:
67 |     #    print "environment variable WEBSCIKITMODELSPATH is not set."
68 |     #    sys.exit(-1)
69 |     # overwrite current dir with webscikitpath, so that python does not look in the current dir for additional modules
70 |     #sys.path[0] = webscikitmodelspath
71 |     sys.path[0] = tmp_dir
72 |  
73 |     try:
74 |         model = load(os.path.join(tmp_dir,pkl_file_name))
75 |         attributes = ["metadata", "predict", "transform","transform_predict"]
76 |         for attr in attributes:
77 |             if not hasattr(model,attr):
78 |                 print "%s does not have attribute '%s'" % (model_name, attr)
79 |                 sys.exit(-1)
80 |         print model.metadata
81 |     except ImportError as e:
82 |         print e
83 |         print "Looks like you have imported from another source as PYTHONPATH. Please do not do this as pickle will not work otherwise"
84 |         sys.exit(-1)
85 | 


--------------------------------------------------------------------------------
/tools/create_project.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | import sys, os, shutil
  3 | 
  4 | model = '''
  5 | """
  6 | This is an example model. Please do not locally import anything in this model, otherwise pickling will not work and your model can not be deployed.
  7 | Instead import from PYTHONPATH.
  8 | """
  9 | import pandas as pd, cPickle as pickle, gzip
 10 | 
 11 | class %sModel(object):
 12 |     """ Please do not change the class name"""
 13 | 
 14 |     def __init__(self,metadata):
 15 |         """ Do not change this method, as it initializes the instance with metadata"""
 16 |         if type(metadata) is dict:
 17 |             self.metadata = metadata
 18 |         else:
 19 |             raise TypeError("metadata must be of type dict, but is of type %%s" %% type(metadata))
 20 | 
 21 |     def predict(self,new_data):
 22 |         """ Please change this method. It will be called after self.transform(new_data) is called, so you can assume,
 23 |             that new_data is already transformed for prediction. You can also assume, that new_data is a pandas.DataFrame.
 24 |             If you need an Sklearn-Regressor / Classifier, please supply this when the model is instantiated with metadata:
 25 |                 In fit_%s.py:
 26 |                        metadata = { 'mySkLearnRegressor' : myRandomForestRegressor, 
 27 |                                     'mySkLearnClassifier': myRandomForestClassifier,
 28 |                                     'someAdditionalDataFrameNeededForPrediction' : myDataFrame,
 29 |                                     'someConstant' : 3.14
 30 |                                   }
 31 |                        %s_model = %sModel(metadata)
 32 |                 Then you can access in this method with :
 33 |                         mySKLearnRegressor = self.metadata["mySKLearnRegressor"]
 34 |                         someConstant = self.metadata["someConstant"]
 35 |             Also make sure, that you convert your prediction to a pandas.DataFrame, as in this example
 36 |         """
 37 |         if not type(new_data) is pd.DataFrame:
 38 |             raise TypeError("new_data in predict must be of type pandas.DataFrame but is of type %%s" %% type(new_data))
 39 |         
 40 |         # start here implenting the prediction
 41 |         rf = self.metadata["mySkLearnRegressor"]       
 42 |         prediction = rf.predict(new_data)
 43 |         myConstant = self.metadata["someConstant"]
 44 |         new_prediction = myConstant * prediction + 2.0
 45 |         return pd.DataFrame(new_prediction)
 46 | 
 47 |     def transform(self,new_data):
 48 |         """ You can overwrite this method, if the new_data passed per POST as JSON needs to be transformed before one can
 49 |             call self.predict. By default, it returns new_data. If new_data is already transformed by the client issuing the POST request, than
 50 |             you might leave this method unchanged. If you need access to metadata, please read from self.metadata as in the self.predict method
 51 |         """
 52 |         if not type(new_data) is pd.DataFrame:
 53 |             raise TypeError("new_data in transform must be of type pandas.DataFrame but is of type %%s" %% type(new_data))
 54 |         # start here changing the method, if you want:
 55 |         return new_data
 56 | 
 57 |     def transform_predict(self,new_data):
 58 |         """ Do not change this method, as it will be called on the server when a POST request is issued by a client.
 59 |         """
 60 |         return self.predict(self.transform(new_data))
 61 | 
 62 | 
 63 |     def save(self):
 64 |         """Do not change this method.
 65 |            Save an object to a compressed disk file.
 66 |            Works well with huge objects.
 67 |         """
 68 |         model_name = self.__class__.__name__.replace("Model","").lower()
 69 |         filename = model_name + "_model.pkl"
 70 |         file = gzip.GzipFile(filename, 'wb')
 71 |         pickle.dump(self, file, -1)
 72 |         file.close()
 73 | '''
 74 | 
 75 | fit = '''#!/usr/bin/python
 76 | """
 77 | This is an example python script to fit the model %s.
 78 | Please do not import locally anything other than %sModel from %s_model,
 79 | otherwise pickling will not work and you can not deploy the model.
 80 | """
 81 | import sys, datetime
 82 | sys.path.insert(1,"/usr/local/lib/python2.7/dist-packages/")
 83 | from %s_model import %sModel
 84 | from sklearn.datasets import load_boston
 85 | 
 86 | # Load scikit's random forest classifier library
 87 | from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
 88 | 
 89 | # Load pandas
 90 | import pandas as pd
 91 | 
 92 | import pickle, joblib
 93 | 
 94 | # Load numpy
 95 | import numpy as np
 96 | 
 97 | # Set random seed
 98 | np.random.seed(0)
 99 | 
100 | boston = load_boston()
101 | df = pd.DataFrame(boston.data, columns=boston.feature_names)
102 | y = boston.target
103 | myRandomForestRegressor = RandomForestRegressor(random_state=0)
104 | myRandomForestRegressor.fit(df, y)
105 | 
106 | myRandomForestClassifier = RandomForestClassifier(random_state=0)
107 | 
108 | myDataFrame = pd.DataFrame([1,2,3])
109 | 
110 | # please overwrite metadata with the data and regressors / classifiers you need to later transform, predict new data:
111 | 
112 | metadata = { 
113 |              'version': 1,
114 |              'created_at' : datetime.datetime.now(),
115 |              'mySkLearnRegressor': myRandomForestRegressor,
116 |              'mySkLearnClassifier' : myRandomForestClassifier,
117 |              'someConstant' : 3.14,
118 |              'someAdditionalDataFrameNeededForPrediction': myDataFrame
119 |              }
120 | 
121 | # please do not change this part of the code:
122 | %s_model = %sModel(metadata)
123 | %s_model.save()
124 | '''
125 | 
126 | if __name__ == "__main__":
127 |     model_name = sys.argv[1]
128 |     if os.path.exists(model_name):
129 |         shutil.rmtree(model_name)
130 |     os.makedirs(model_name)
131 | 
132 |     model_file_name = model_name + "_model.py"
133 |     fit_file_name = "fit_"+model_name+".py"
134 |     model_file = open(os.path.join(model_name+"/",model_file_name),"wb")
135 |     model_file.write(model % (model_name.title(),model_name,model_name,model_name.title()))
136 |     model_file.close()
137 |     fit_file = open(os.path.join(model_name+"/",fit_file_name),"wb")
138 |     fit_file.write( fit % (model_name, model_name.title(), model_name, model_name,model_name.title(),model_name,model_name.title(),model_name))
139 |     fit_file.close()
140 |     os.chmod(os.path.join(model_name+"/",fit_file_name), 0o777)
141 | 
142 | 


--------------------------------------------------------------------------------