├── submission
    ├── example_submission
    │   ├── 0.csv
    │   ├── 1.csv
    │   ├── 2.csv
    │   ├── 3.csv
    │   └── 4.csv
    └── bilge
    │   ├── 0.csv
    │   ├── 1.csv
    │   ├── 2.csv
    │   ├── 3.csv
    │   └── 4.csv
├── requirements.txt
├── .DS_Store
├── overview.png
├── result
    └── .DS_Store
├── config
    └── bilge20230301.json
├── src
    ├── price.txt
    ├── utils.py
    ├── example.py
    ├── pricefunction.py
    ├── buyer.py
    ├── seller.py
    ├── dam.py
    ├── evaluator.py
    ├── marketengine.py
    ├── marketengine_demo.ipynb
    ├── evaluator_submission.py
    ├── evaluator_acc_cost.py
    ├── visualize_acc_cost.py
    ├── helper.py
    └── visualizetools.py
├── CONTRIBUTING.md
├── CODE_OF_CONDUCT.md
├── README.md
└── LICENSE


/submission/example_submission/0.csv:
--------------------------------------------------------------------------------
1 | 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,10,10,10,10,15


--------------------------------------------------------------------------------
/submission/example_submission/1.csv:
--------------------------------------------------------------------------------
1 | 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,10,10,10,10,15


--------------------------------------------------------------------------------
/submission/example_submission/2.csv:
--------------------------------------------------------------------------------
1 | 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,10,10,10,10,15


--------------------------------------------------------------------------------
/submission/example_submission/3.csv:
--------------------------------------------------------------------------------
1 | 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,10,10,10,10,15


--------------------------------------------------------------------------------
/submission/example_submission/4.csv:
--------------------------------------------------------------------------------
1 | 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,10,10,10,10,15


--------------------------------------------------------------------------------
/submission/bilge/0.csv:
--------------------------------------------------------------------------------
1 | 32, 26, 4, 23, 2, 30, 24, 7, 12, 8, 19, 8, 17, 6, 12, 6, 14, 20, 10, 20
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.22.0
2 | pandas==1.5.3
3 | scikit_learn==1.2.0
4 | pyarrow==10.0.1
5 | 
6 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/Data_Acquisition_for_ML_Benchmark/HEAD/.DS_Store


--------------------------------------------------------------------------------
/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/Data_Acquisition_for_ML_Benchmark/HEAD/overview.png


--------------------------------------------------------------------------------
/result/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookresearch/Data_Acquisition_for_ML_Benchmark/HEAD/result/.DS_Store


--------------------------------------------------------------------------------
/submission/bilge/1.csv:
--------------------------------------------------------------------------------
1 | 228, 96, 216, 348, 48, 60, 108, 0, 108, 0, 180, 204, 252, 288, 108, 204, 204, 168, 120, 60
2 | 


--------------------------------------------------------------------------------
/submission/bilge/2.csv:
--------------------------------------------------------------------------------
1 | 408, 108, 324, 348, 24, 48, 120, 12, 0, 36, 156, 192, 300, 240, 72, 276, 0, 156, 72, 108
2 | 


--------------------------------------------------------------------------------
/submission/bilge/3.csv:
--------------------------------------------------------------------------------
1 | 408, 108, 324, 348, 24, 48, 120, 12, 0, 36, 156, 192, 300, 240, 72, 276, 0, 156, 72, 108
2 | 


--------------------------------------------------------------------------------
/submission/bilge/4.csv:
--------------------------------------------------------------------------------
1 | 420, 48, 204, 348, 36, 144, 192, 288, 24, 48, 240, 132, 132, 48, 168, 168, 0, 300, 12, 48
2 | 


--------------------------------------------------------------------------------
/config/bilge20230301.json:
--------------------------------------------------------------------------------
1 | {
2 | "instance_ids":["0","1","2","3","4"],
3 | "submission_path":"../submission/bilge/",
4 | "model_name":"knn",
5 | "save_path":"../result/bilge20230301_knn.json"
6 | }
7 | 


--------------------------------------------------------------------------------
/src/price.txt:
--------------------------------------------------------------------------------
 1 | Lin,100
 2 | Lin,100
 3 | Lin,100
 4 | Lin,100
 5 | Lin,100
 6 | Lin,100
 7 | Lin,100
 8 | Lin,100
 9 | Lin,100
10 | Lin,100
11 | Lin,100
12 | Lin,100
13 | Lin,100
14 | Lin,100
15 | Lin,100
16 | Lin,100
17 | Lin,100
18 | Lin,100
19 | Lin,100
20 | Lin,100
21 | Lin,100


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 | 
17 | #!/usr/bin/env python3
18 | # -*- coding: utf-8 -*-
19 | """
20 | Created on Tue Aug 16 18:39:21 2022
21 | 
22 | @author: lingjiao
23 | """
24 | 
25 | def pricefunc_lin(frac = 1,
26 |               max_p = 100):
27 |     p1 = max_p * frac
28 |     return p1
29 |     
30 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to this repository
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from `main`.
 9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 | 
19 | Complete your CLA here: <https://code.facebook.com/cla>
20 | 
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 | 
25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
26 | disclosure of security bugs. In those cases, please go through the process
27 | outlined on that page and do not file a public issue.
28 | 
29 | ## License
30 | By contributing to this repository, you agree that your contributions will be licensed
31 | under the LICENSE file in the root directory of this source tree.
32 | 


--------------------------------------------------------------------------------
/src/example.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 | 
17 | from dam import Dam
18 | print("Loading Dataset...")
19 | instance=2 # instance id, can be 0,1,2,3,4
20 | MyDam = Dam(instance=instance)
21 | print("Dataset loaded!")
22 | budget = MyDam.getbudget() # get budget
23 | print("budget is:",budget)
24 | # 3. Display seller_data 
25 | buyer_data = MyDam.getbuyerdata() # get buyer data
26 | print("buyer data is:",buyer_data)
27 | 
28 | 
29 | mlmodel = MyDam.getmlmodel() # get ml model
30 | print("mlmodel is",mlmodel)
31 | 
32 | sellers_id = MyDam.getsellerid() # seller ids
33 | print("seller ids are", sellers_id)
34 | for i in sellers_id:
35 |     seller_i_price, seller_i_summary, seller_i_samples = MyDam.getsellerinfo(seller_id=int(i))
36 |     print("seller ", i, " price: ", seller_i_price.get_price_samplesize(100))
37 |     print("seller ", i, " summary: ", seller_i_summary)
38 |     print("seller ", i, " samples: ", seller_i_samples)
39 | 
40 | 


--------------------------------------------------------------------------------
/src/pricefunction.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 | 
17 | #!/usr/bin/env python3
18 | # -*- coding: utf-8 -*-
19 | """
20 | Created on Tue Aug 16 18:35:29 2022
21 | 
22 | @author: lingjiao
23 | """
24 | 
25 | 
26 | 
27 | class PriceFunction(object):
28 | 
29 |     def __init__(self):
30 |         return
31 |         
32 |     def setup(self, max_p = 100, method="lin",
33 |             data_size=1):
34 |         self.max_p = max_p
35 |         self.method = "lin"
36 |         self.data_size = data_size
37 | 
38 |     def get_price(self, 
39 |                  frac=1, 
40 |                  ):
41 |         if(frac<0 or frac>1):
42 |             raise ValueError("The fraction of samples must be within [0,1]!")
43 |         max_p = self.max_p
44 |         if(self.method=="lin"):
45 |             p1 = max_p * frac
46 |             return p1
47 |         
48 |         return
49 | 
50 |     def get_price_samplesize(self,
51 |                             samplesize=10,
52 |                             ):
53 |         frac = samplesize/self.data_size
54 |         #print("frac is",frac)
55 |         return self.get_price(frac)
56 | 
57 |         
58 | 
59 |     
60 | def main():
61 |     print("test of the price func")
62 |     
63 |     
64 | if __name__ == '__main__':
65 |     main()        
66 |     
67 | 


--------------------------------------------------------------------------------
/src/buyer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 | 
17 | #!/usr/bin/env python3
18 | # -*- coding: utf-8 -*-
19 | """
20 | Created on Tue Aug 16 18:35:29 2022
21 | 
22 | @author: lingjiao
23 | """
24 | 
25 | from sklearn.linear_model import LogisticRegression
26 | 
27 | import numpy
28 | 
29 | class Buyer(object):
30 | 
31 |     def __init__(self):
32 |         return
33 |         
34 |     def loaddata(self, 
35 |                  data=None, 
36 |                  datapath=None,):
37 |         if(not (data is None)):
38 |             self.data = data
39 |             return
40 |         if(datapath != None):
41 |             self.data = numpy.loadtxt(open(datapath, "rb"), 
42 |                                       delimiter=",", 
43 |                                       skiprows=1)
44 |             return
45 |         raise ValueError("Not implemented load data of buyer")
46 |         return
47 |     
48 |     def load_stretagy(self, 
49 |                       stretagy=None):
50 |         return
51 |     
52 |     def get_stretagy(self):
53 |         return self.stretagy
54 | 
55 |     def load_mlmodel(self,
56 |                      mlmodel):
57 |         self.mlmodel = mlmodel
58 |         return 0
59 | 
60 |     def train_mlmodel(self,
61 |                       train_data):
62 |         
63 |         X = train_data[:,0:-1]
64 |         y = numpy.ravel(train_data[:,-1])
65 |         self.mlmodel.fit(X,y)
66 |         X_1 = self.data[:,0:-1]
67 |         y_1 = numpy.ravel(self.data[:,-1])
68 |         eval_acc = self.mlmodel.score(X_1, y_1)
69 |         return eval_acc
70 |     
71 |         
72 | def main():
73 |     print("test of the buyer")
74 |     MyBuyer = Buyer()
75 |     
76 |     
77 |     
78 |     MyBuyer.loaddata(data=numpy.asmatrix([[0,1,1,1],[1,0,1,0]]))
79 |     
80 |     mlmodel1 = LogisticRegression(random_state=0)
81 |     
82 |     MyBuyer.load_mlmodel(mlmodel1)
83 | 
84 |     train_data = numpy.asmatrix([[0,1,1,1],[1,0,1,0],[1,1,1,1]])
85 |     
86 |     eval1 = MyBuyer.train_mlmodel(train_data)    
87 |     
88 |     print("eval acc",eval1)
89 | 
90 | if __name__ == '__main__':
91 |     main()        
92 | 


--------------------------------------------------------------------------------
/src/seller.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 | 
17 | #!/usr/bin/env python3
18 | # -*- coding: utf-8 -*-
19 | """
20 | Created on Tue Aug 16 18:35:29 2022
21 | 
22 | @author: lingjiao
23 | """
24 | 
25 | from pricefunction import PriceFunction
26 | import numpy
27 | numpy.random.seed(1111)
28 | 
29 | class Seller(object):
30 | 
31 |     def __init__(self):
32 |         return
33 |         
34 |     def loaddata(self, 
35 |                  data=None, 
36 |                  datapath=None,):
37 |         # data: a m x n matrix
38 |         # datapath: a path to a csv file.
39 |         # the file should be a matrix with column names.
40 |         if(not (data is None)):
41 |             self.data = data
42 |             return
43 |         if(datapath != None):
44 |             self.data = numpy.loadtxt(open(datapath, "rb"), 
45 |                                       delimiter=",", 
46 |                                       skiprows=1)
47 |             return
48 |         print("Not implemented load data of seller")
49 |         return
50 |     
51 |     def setprice(self, pricefunc):
52 |         self.pricefunc = pricefunc
53 |         
54 |     def getprice(self,data_size):
55 |         q1 = data_size/(len(self.data))
56 |         return self.pricefunc.get_price(q1) 
57 |     
58 |     def getdata(self, data_size, price):
59 |         data = self.data
60 |         q1 = data_size/(len(self.data))
61 |         if(q1>1):
62 |             raise ValueError("The required number of samples is too large!")
63 | 
64 |         if(self.pricefunc.get_price(q1) <= price):
65 |             number_of_rows = self.data.shape[0]
66 |             random_indices = numpy.random.choice(number_of_rows, 
67 |                                   size=data_size, 
68 |                                   replace=True)
69 |             rows = data[random_indices, :]
70 |             return rows
71 |         else:
72 |             raise ValueError("The buyer's offer is too small!")
73 |         return
74 |         
75 | 
76 |     
77 | def main():
78 |     print("test of the seller")
79 |     MySeller = Seller()
80 |     
81 |     MySeller.loaddata(data=numpy.asmatrix([[0,1,1],[1,0,1]]))
82 |     
83 |     MyPricing = PriceFunction()
84 |     MyPricing.setup(max_p = 100, method="lin")
85 |     
86 |     MySeller.setprice(MyPricing)
87 |     
88 |     data = MySeller.getdata(1,60)
89 |     
90 |     print("get data is ",data)
91 |     
92 | if __name__ == '__main__':
93 |     main()        
94 |     
95 | 


--------------------------------------------------------------------------------
/src/dam.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 | 
17 | import numpy
18 | import pickle
19 | import json
20 | from pricefunction import PriceFunction
21 | import pandas
22 | class Dam(object):
23 |     def __init__(self, 
24 |                  instance=0,
25 |                  ):
26 |         self._instance = instance
27 |         self._marketpath="../marketinfo/"
28 |         if(instance not in [0,1,2,3,4]):
29 |             raise ValueError("the instance id is incorrect. it must be 0, 1, 2, 3, or 4.")
30 |         return 
31 | 
32 |     def getbudget(self,):
33 |         budget = numpy.loadtxt(self._marketpath+str(self._instance)+"/price/"+"/budget.txt")
34 |         return float(budget)
35 |     
36 |     def getbuyerdata(self,):
37 |         path = self._marketpath+str(self._instance)+"/data_buyer/"+"/20.csv"
38 |         buydata = pandas.read_csv(path,header=None,engine="pyarrow").to_numpy()
39 |         return buydata
40 |     
41 |     def getmlmodel(self,):
42 |         path = self._marketpath+str(self._instance)+"/data_buyer/"+"/mlmodel.pickle"
43 |         with open(path, 'rb') as handle:
44 |             model = pickle.load(handle)        
45 |         return model
46 | 
47 |     def getsellerid(self,):
48 |         path = self._marketpath+str(self._instance)+"/sellerid.txt"
49 |         ids = numpy.loadtxt(path)
50 |         return ids
51 |     
52 |     def getsellerinfo(self,seller_id):
53 |         path = self._marketpath+str(self._instance)+"/summary/"+str(seller_id)+".csv.json"
54 |         f = open(path)
55 |         ids = json.load(f)
56 |         
57 |         price = numpy.loadtxt(self._marketpath+str(self._instance)+"/price/"+"/price.txt",
58 |                                 delimiter=',',dtype=str)
59 |         price_i = price[seller_id]       
60 |         MyPricing1 = PriceFunction()
61 |         #print("row number",ids['row_number'])
62 |         MyPricing1.setup(max_p = float(price_i[1]), method=price_i[0], data_size=ids['row_number'])
63 | 
64 | 
65 |         samples = numpy.loadtxt(self._marketpath+str(self._instance)+"/summary/"+str(seller_id)+".csvsamples.csv",
66 |                                 delimiter=' ',dtype=float)
67 | 
68 |                 
69 |         return MyPricing1, ids, samples
70 | 
71 | 
72 | def main():    
73 |     MyDam = Dam()
74 |     budget = MyDam.getbudget() # get budget
75 |     buyer_data = MyDam.getbuyerdata() # get buyer data
76 |     mlmodel = MyDam.getmlmodel() # get ml model
77 |     sellers_id = MyDam.getsellerid()
78 |     i=0
79 |     seller_i_price, seller_i_summary, seller_i_samples =  MyDam.getsellerinfo(seller_id=i)
80 | 
81 |     return 
82 |     
83 | if __name__ == "__main__":
84 |    main()
85 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | This Code of Conduct also applies outside the project spaces when there is a
56 | reasonable belief that an individual's behavior may have a negative impact on
57 | the project or its community.
58 | 
59 | ## Enforcement
60 | 
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported by contacting the project team at <opensource-conduct@fb.com>. All
63 | complaints will be reviewed and investigated and will result in a response that
64 | is deemed necessary and appropriate to the circumstances. The project team is
65 | obligated to maintain confidentiality with regard to the reporter of an incident.
66 | Further details of specific enforcement policies may be posted separately.
67 | 
68 | Project maintainers who do not follow or enforce the Code of Conduct in good
69 | faith may face temporary or permanent repercussions as determined by other
70 | members of the project's leadership.
71 | 
72 | ## Attribution
73 | 
74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
76 | 
77 | [homepage]: https://www.contributor-covenant.org
78 | 
79 | For answers to common questions about this code of conduct, see
80 | https://www.contributor-covenant.org/faq
81 | 


--------------------------------------------------------------------------------
/src/evaluator.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | """
 16 | 
 17 | #!/usr/bin/env python3
 18 | # -*- coding: utf-8 -*-
 19 | """
 20 | Created on Tue Aug 16 18:36:30 2022
 21 | 
 22 | @author: lingjiao
 23 | """
 24 | 
 25 | 
 26 | from sklearn.linear_model import LogisticRegression
 27 | 
 28 | 
 29 | 
 30 | import numpy
 31 | from seller import Seller
 32 | from buyer import Buyer
 33 | from pricefunction import PriceFunction
 34 | from marketengine import MarketEngine
 35 | from helper import Helper
 36 | 
 37 | class Evaluator(object):
 38 |     def __init__(self):
 39 |         self.Helper = Helper()
 40 |         return
 41 |     def eval_submission(self, 
 42 |                         submission, 
 43 |                         seller_data,
 44 |                         buyer_data,
 45 |                         seller_price,
 46 |                         buyer_budget=100,
 47 |                         mlmodel=LogisticRegression(random_state=0),
 48 |                         ):
 49 |         '''
 50 |         
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         submission : TYPE
 55 |             DESCRIPTION.
 56 |         seller_data_path : TYPE
 57 |             DESCRIPTION.
 58 |         buyer_data_path : TYPE
 59 |             DESCRIPTION.
 60 |         price_data_path : TYPE
 61 |         mlmodel: TYPE
 62 |             DESCRIPTION.
 63 |          : TYPE
 64 |             DESCRIPTION.
 65 | 
 66 |         Returns
 67 |         -------
 68 |         None.
 69 | 
 70 |         ''' 
 71 |         
 72 |         MyMarketEngine = MarketEngine()
 73 |         MyHelper = self.Helper
 74 |     
 75 | 
 76 |         # set up the market
 77 |         MyMarketEngine.setup_market(seller_data=seller_data,
 78 |                                 seller_prices = seller_price,
 79 |                      buyer_data=buyer_data,
 80 |                      buyer_budget=buyer_budget,
 81 |                      mlmodel=mlmodel,
 82 |                      )
 83 |         
 84 |         # get train data
 85 |         traindata = MyHelper.load_data(submission, MyMarketEngine)
 86 |         # train the model
 87 |         model = MyHelper.train_model(mlmodel, traindata[:,0:-1],
 88 |                                  numpy.ravel(traindata[:,-1]))
 89 |         # eval the model
 90 |         acc1 = MyHelper.eval_model(model,test_X=buyer_data[:,0:-1],test_Y=buyer_data[:,-1])   
 91 |         return acc1
 92 |        
 93 | def main():
 94 |     print("test of the evaluator")
 95 |     submission = [[1,2],[50,50]]
 96 |     data_1 = numpy.asmatrix([[0,1,0],[1,0,0]])               
 97 |     data_2 = numpy.asmatrix([[0,1,1],[1,0,1],[1,1,1],[0,0,1]])
 98 |     seller_data = [data_1, data_2]   
 99 |     buyer_data = numpy.asmatrix([[0,1,0],[1,0,1],[0,1,1]])
100 |     MyPricing1 = PriceFunction()
101 |     MyPricing1.setup(max_p = 100, method="lin")
102 |     MyPricing2 = PriceFunction()
103 |     MyPricing2.setup(max_p = 100, method="lin")
104 |     seller_price = [MyPricing1, MyPricing2]
105 |     
106 |     MyEval = Evaluator()
107 |     acc1 = MyEval.eval_submission( 
108 |                         submission, 
109 |                         seller_data,
110 |                         buyer_data,
111 |                         seller_price,
112 |                         buyer_budget=100,
113 |                         mlmodel=LogisticRegression(random_state=0),
114 |                         )
115 |     print("acc is:", acc1)
116 | if __name__ == '__main__':
117 |     main()        
118 |     
119 |     
120 |     
121 |     
122 | 


--------------------------------------------------------------------------------
/src/marketengine.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | """
 16 | 
 17 | #!/usr/bin/env python3
 18 | # -*- coding: utf-8 -*-
 19 | """
 20 | Created on Tue Aug 16 18:36:30 2022
 21 | 
 22 | @author: lingjiao
 23 | """
 24 | 
 25 | 
 26 | from sklearn.linear_model import LogisticRegression
 27 | 
 28 | 
 29 | 
 30 | import numpy
 31 | from seller import Seller
 32 | from buyer import Buyer
 33 | from pricefunction import PriceFunction
 34 | 
 35 | 
 36 | class MarketEngine(object):
 37 |     def __init__(self):
 38 |         return
 39 |     
 40 |     def setup_market(self, 
 41 |                      seller_data=None,
 42 |                      seller_prices=None,
 43 |                      buyer_data=None,
 44 |                      buyer_budget=None,
 45 |                      mlmodel=None):
 46 |         sellers = list()
 47 |         for i in range(len(seller_data)):
 48 |             MySeller = Seller()
 49 |             MySeller.loaddata(data=seller_data[i])
 50 |             MySeller.setprice(seller_prices[i])
 51 |             sellers.append(MySeller)
 52 |         self.sellers = sellers
 53 |         
 54 |         MyBuyer = Buyer()    
 55 |         MyBuyer.loaddata(data=buyer_data)     
 56 |         mlmodel1 = mlmodel
 57 |         MyBuyer.load_mlmodel(mlmodel1)
 58 |         self.buyer = MyBuyer
 59 |         self.buyer_budget = buyer_budget
 60 |         #print("set up the market")
 61 |         return
 62 | 
 63 |     def load_stretagy(self,
 64 |                       stretagy=None,):
 65 |         self.stretagy = stretagy
 66 | 
 67 |         return
 68 |     
 69 |     def train_buyer_model(self):
 70 |         print(" train buyer model ")
 71 |         
 72 |         
 73 |         # check if the budget constraint is satisified.
 74 |         cost = sum(self.stretagy[1])
 75 |         if(cost>self.buyer_budget):
 76 |             raise ValueError("The budget constraint is not satisifed!")
 77 |             return
 78 |         
 79 |         traindata = None
 80 |         for i in range(len(self.sellers)):
 81 |             d1 = self.sellers[i].getdata(self.stretagy[0][i],self.stretagy[1][i])
 82 |             if(i==0):
 83 |                 traindata = d1
 84 |             else:
 85 |                 traindata = numpy.concatenate((traindata,d1))
 86 |             print(i,d1)
 87 | 
 88 |         print("budget checked! data loaded!")                
 89 |         #print("train data", traindata)   
 90 |         acc = self.buyer.train_mlmodel(traindata)    
 91 |         return acc
 92 |     
 93 |     
 94 | def main():
 95 |     print("test of the market engine")
 96 |     MyMarketEngine = MarketEngine()
 97 |     
 98 |     data_1 = numpy.asmatrix([[0,1,0],[1,0,0]])               
 99 |     data_2 = numpy.asmatrix([[0,1,1],[1,0,1],[1,1,1],[0,0,1]])
100 |     data_b = numpy.asmatrix([[0,1,0],[1,0,1],[0,1,1]])
101 |                      
102 |     buyer_budget = 100
103 |            
104 |     MyPricing1 = PriceFunction()
105 |     MyPricing1.setup(max_p = 100, method="lin")
106 |     MyPricing2 = PriceFunction()
107 |     MyPricing2.setup(max_p = 100, method="lin")
108 | 
109 | 
110 |     mlmodel1 = LogisticRegression(random_state=0)
111 | 
112 |              
113 |     MyMarketEngine.setup_market(seller_data=[data_1,data_2],
114 |                                 seller_prices = [MyPricing1,MyPricing2],
115 |                      buyer_data=data_b,
116 |                      buyer_budget=buyer_budget,
117 |                      mlmodel=mlmodel1,
118 |                      )
119 | 
120 |     stretagy = [[1,2],[50,50]]
121 |     MyMarketEngine.load_stretagy(stretagy)
122 |     
123 |     acc1 = MyMarketEngine.train_buyer_model()
124 |     print("acc is ",acc1)
125 |     
126 |     
127 | if __name__ == '__main__':
128 |     main()        
129 |     
130 |     
131 |     
132 |     
133 | 


--------------------------------------------------------------------------------
/src/marketengine_demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Copyright (c) Meta Platforms, Inc. and affiliates.\n",
  8 |     "\n",
  9 |     "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 10 |     "you may not use this file except in compliance with the License.\n",
 11 |     "You may obtain a copy of the License at\n",
 12 |     "\n",
 13 |     "    http://www.apache.org/licenses/LICENSE-2.0\n",
 14 |     "\n",
 15 |     "Unless required by applicable law or agreed to in writing, software\n",
 16 |     "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 17 |     "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 18 |     "See the License for the specific language governing permissions and\n",
 19 |     "limitations under the License."
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 7,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "# The market demo\n",
 29 |     "from marketengine import MarketEngine\n",
 30 |     "from pricefunction import PriceFunction\n",
 31 |     "from sklearn.linear_model import LogisticRegression\n",
 32 |     "import numpy\n",
 33 |     "MyMarketEngine = MarketEngine()"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 8,
 39 |    "metadata": {},
 40 |    "outputs": [
 41 |     {
 42 |      "name": "stdout",
 43 |      "output_type": "stream",
 44 |      "text": [
 45 |       "set up the market\n"
 46 |      ]
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "# Set up the market\n",
 51 |     "MyMarketEngine = MarketEngine()\n",
 52 |     "    \n",
 53 |     "# load the dataset    \n",
 54 |     "data_1 = numpy.asmatrix([[0,1,0],[1,0,0]])               \n",
 55 |     "data_2 = numpy.asmatrix([[0,1,1],[1,0,1],[1,1,1],[0,0,1]])\n",
 56 |     "data_b = numpy.asmatrix([[0,1,0],[1,0,1],[0,1,1]])\n",
 57 |     "\n",
 58 |     "# buyer budget\n",
 59 |     "buyer_budget = 100\n",
 60 |     "           \n",
 61 |     "# seller price    \n",
 62 |     "MyPricing1 = PriceFunction()\n",
 63 |     "MyPricing1.setup(max_p = 100, method=\"lin\")\n",
 64 |     "MyPricing2 = PriceFunction()\n",
 65 |     "MyPricing2.setup(max_p = 100, method=\"lin\")\n",
 66 |     "\n",
 67 |     "\n",
 68 |     "mlmodel1 = LogisticRegression(random_state=0)\n",
 69 |     "\n",
 70 |     "             \n",
 71 |     "MyMarketEngine.setup_market(seller_data=[data_1,data_2],\n",
 72 |     "                                seller_prices = [MyPricing1,MyPricing2],\n",
 73 |     "                     buyer_data=data_b,\n",
 74 |     "                     buyer_budget=buyer_budget,\n",
 75 |     "                     mlmodel=mlmodel1,\n",
 76 |     "                     )"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 11,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "name": "stdout",
 86 |      "output_type": "stream",
 87 |      "text": [
 88 |       " train buyer model \n",
 89 |       "0 [[0 1 0]]\n",
 90 |       "1 [[1 0 1]\n",
 91 |       " [0 1 1]]\n",
 92 |       "budget checked! data loaded!\n",
 93 |       "acc for the strategy is 0.6666666666666666\n"
 94 |      ]
 95 |     },
 96 |     {
 97 |      "name": "stderr",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "/Users/lingjiao/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/validation.py:593: FutureWarning: np.matrix usage is deprecated in 1.0 and will raise a TypeError in 1.2. Please convert to a numpy array with np.asarray. For more information see: https://numpy.org/doc/stable/reference/generated/numpy.matrix.html\n",
101 |       "  warnings.warn(\n",
102 |       "/Users/lingjiao/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/validation.py:593: FutureWarning: np.matrix usage is deprecated in 1.0 and will raise a TypeError in 1.2. Please convert to a numpy array with np.asarray. For more information see: https://numpy.org/doc/stable/reference/generated/numpy.matrix.html\n",
103 |       "  warnings.warn(\n"
104 |      ]
105 |     }
106 |    ],
107 |    "source": [
108 |     "# Eval a stretagy\n",
109 |     "stretagy = [[1,2],[50,50]]\n",
110 |     "MyMarketEngine.load_stretagy(stretagy)\n",
111 |     "acc1 = MyMarketEngine.train_buyer_model()\n",
112 |     "print(\"acc for the strategy is\", acc1)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": []
121 |   }
122 |  ],
123 |  "metadata": {
124 |   "kernelspec": {
125 |    "display_name": "Python 3 (ipykernel)",
126 |    "language": "python",
127 |    "name": "python3"
128 |   },
129 |   "language_info": {
130 |    "codemirror_mode": {
131 |     "name": "ipython",
132 |     "version": 3
133 |    },
134 |    "file_extension": ".py",
135 |    "mimetype": "text/x-python",
136 |    "name": "python",
137 |    "nbconvert_exporter": "python",
138 |    "pygments_lexer": "ipython3",
139 |    "version": "3.9.12"
140 |   }
141 |  },
142 |  "nbformat": 4,
143 |  "nbformat_minor": 4
144 | }
145 | 


--------------------------------------------------------------------------------
/src/evaluator_submission.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | """
 16 | 
 17 | #!/usr/bin/env python3
 18 | # -*- coding: utf-8 -*-
 19 | """
 20 | Created on Tue Aug 16 18:36:30 2022
 21 | 
 22 | @author: lingjiao
 23 | """
 24 | from sklearn.linear_model import LogisticRegression
 25 | from sklearn.ensemble import GradientBoostingClassifier
 26 | import numpy
 27 | from marketengine import MarketEngine
 28 | from helper import Helper
 29 | from sklearn.neighbors import KNeighborsClassifier
 30 | import json
 31 | 
 32 | def evaluate_batch(data_config,
 33 |                    ):
 34 |     instance_ids = data_config['instance_ids']
 35 |     result = dict()
 36 |     for id1 in instance_ids:
 37 |         result[id1] = evaluate_multiple_trial(data_config,instance_id=id1)    
 38 |     return result
 39 | 
 40 | def evaluate_multiple_trial(data_config,
 41 |                             instance_id,
 42 |                             num_trial=10,
 43 |                             ):
 44 |     
 45 |     results = [evaluate_once(data_config=data_config,
 46 |                   instance_id=instance_id) for i in range(num_trial)]
 47 |     #print("results are:",results)
 48 |     results_avg = dict()
 49 |     results_avg['cost'] = 0
 50 |     results_avg['acc'] = 0
 51 |     for item in results:
 52 |         #print("item is:",item)
 53 |         results_avg['cost'] += item['cost']/len(results)
 54 |         results_avg['acc'] += item['acc']/len(results)
 55 |     return results_avg
 56 | 
 57 | def evaluate_once(data_config,
 58 |                   instance_id):
 59 |     # load submission
 60 |     submission = load_submission(path = data_config['submission_path']+str(instance_id)+".csv")
 61 |     
 62 |     # get the helper
 63 |     model_name = data_config['model_name']
 64 |     MarketHelper, MarketEngineObj, model, traindata, buyer_data = get_market_info(instance_id=instance_id,
 65 |                                                                                   model_name=model_name)
 66 |     
 67 |     # calculate the cost of the submission
 68 |     cost = MarketHelper.get_cost(submission,MarketEngineObj)
 69 |     
 70 |     # generate the accuracy of the submission
 71 |     traindata = MarketHelper.load_data(submission, MarketEngineObj)
 72 |     model = MarketHelper.train_model(model, traindata[:,0:-1],
 73 |                                  numpy.ravel(traindata[:,-1]))
 74 |     acc1 = MarketHelper.eval_model(model,test_X=buyer_data[:,0:-1],test_Y=buyer_data[:,-1])
 75 |     
 76 |     result = dict()
 77 |     result['cost'] = cost
 78 |     result['acc'] = acc1
 79 |     return result
 80 | 
 81 | def load_submission(path):
 82 |     data = numpy.loadtxt(path,delimiter=",",dtype=int)
 83 |     return data
 84 | 
 85 | def get_market_info(instance_id,
 86 |                     model_name="lr"):
 87 |     MyHelper = Helper()
 88 |     seller_data, seller_prices,  buyer_data, buyer_budget, data_size  = MyHelper.load_market_instance(
 89 |         feature_path="../features/"+str(instance_id)+"/",
 90 |         buyer_data_path="../marketinfo/"+str(instance_id)+"/data_buyer/20.csv",
 91 |         price_path="../marketinfo/"+str(instance_id)+"/price/price.txt",
 92 |         budget_path="../marketinfo/"+str(instance_id)+"/price/budget.txt",
 93 |         )
 94 |     MyMarketEngine = MarketEngine()
 95 |     mlmodel1 = LogisticRegression(random_state=0)
 96 |     if(model_name=="knn"):
 97 |         mlmodel1 = KNeighborsClassifier(n_neighbors=9)	
 98 |     if(model_name=='rf'):
 99 |         mlmodel1 = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
100 |                                    max_depth=1, random_state=0)
101 |     MyMarketEngine.setup_market(seller_data=seller_data,
102 |                                 seller_prices = seller_prices,
103 |                                 buyer_data=buyer_data,
104 |                                 buyer_budget=1e10,
105 |                                 mlmodel=mlmodel1,
106 |                                 ) 
107 |     
108 |     return MyHelper, MyMarketEngine, mlmodel1,seller_data, buyer_data
109 | 
110 | def main():
111 |     data_config = json.load(open("../config/bilge20230301_rf.json")) # load the data folder
112 |     result = evaluate_batch(data_config)
113 |     json_object = json.dumps(result, indent=4)
114 |     save_path = data_config['save_path']
115 |     with open(save_path, "w") as outfile:
116 |         outfile.write(json_object)
117 |     print("The result is:",result)
118 |     
119 |     return
120 |  
121 | if __name__ == '__main__':
122 |     main()        
123 |     
124 |     
125 |     
126 |     
127 | 


--------------------------------------------------------------------------------
/src/evaluator_acc_cost.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | """
 16 | 
 17 | #!/usr/bin/env python3
 18 | # -*- coding: utf-8 -*-
 19 | """
 20 | Created on Tue Aug 16 18:36:30 2022
 21 | 
 22 | @author: lingjiao
 23 | """
 24 | 
 25 | 
 26 | from sklearn.linear_model import LogisticRegression
 27 | 
 28 | from sklearn.ensemble import GradientBoostingClassifier
 29 | 
 30 | import numpy
 31 | from seller import Seller
 32 | from buyer import Buyer
 33 | from pricefunction import PriceFunction
 34 | from marketengine import MarketEngine
 35 | from helper import Helper
 36 | import pandas
 37 | from sklearn.neighbors import KNeighborsClassifier
 38 | 
 39 | 
 40 | def evaluate(
 41 |         MarketHelper,
 42 |         MarketEngineObj,
 43 |         model,
 44 |         buyer_data,
 45 |         trial=100, # number of trials per budget
 46 |         seller_data_size_list = [100,200,300],
 47 |         cost_scale=0.1,
 48 |         method="single",
 49 |         full_price=100,
 50 |         ):
 51 |     trial_list = list(range(trial))
 52 |     acc_list = list()
 53 |     cost_list = list()    
 54 |     budget_list = list()
 55 |     for i in range(trial):
 56 |         print("trial:",i)
 57 |         # generate a submission
 58 |         submission = gen_submission(seller_data_size_list,cost_scale=cost_scale,
 59 |                                     method=method)
 60 |         # calculate the cost of the submission
 61 |         cost = MarketHelper.get_cost(submission,MarketEngineObj)
 62 |         # generate the accuracy of the submission
 63 |         traindata = MarketHelper.load_data(submission, MarketEngineObj)
 64 |         model = MarketHelper.train_model(model, traindata[:,0:-1],
 65 |                                  numpy.ravel(traindata[:,-1]))
 66 |         acc1 = MarketHelper.eval_model(model,test_X=buyer_data[:,0:-1],test_Y=buyer_data[:,-1])
 67 | 
 68 |         cost_list.append(cost)
 69 |         acc_list.append(acc1)
 70 |         budget_list.append(cost_scale*full_price)
 71 |     result = pandas.DataFrame()
 72 |     result['trial'] = trial_list
 73 |     result['acc'] = acc_list
 74 |     result['cost'] = cost_list
 75 |     result['budget'] = budget_list
 76 |     return result
 77 | 
 78 | ''' generate a pandas dataframe
 79 | 
 80 | trial,accuracy, cost
 81 | '''
 82 | 
 83 | def gen_submission(seller_data_size_list=[100,200,300],
 84 |                    cost_scale=1,
 85 |                    method="uniform"):
 86 |     if(method=="uniform"):
 87 |         d = len(seller_data_size_list)
 88 |         submission = [numpy.random.randint(0,int(a*cost_scale/d*2)) for a in seller_data_size_list]
 89 |     if(method=="single"):
 90 |         submission = [0]*len(seller_data_size_list)        
 91 |         index = numpy.random.randint(0,len(submission))
 92 |         submission[index] = int(seller_data_size_list[index]*cost_scale)                               
 93 |     return submission
 94 | 
 95 | def evaluate_budget(MarketHelper,
 96 |         MarketEngineObj,
 97 |         model,
 98 |         buyer_data,
 99 |         trial=100, # number of trials per budget
100 |         seller_data_size_list = [100,200,300],
101 |         cost_scale_list=[0.1],
102 |         method="single",
103 |         ):
104 |     results = [evaluate(
105 |             MarketHelper=MarketHelper,
106 |             MarketEngineObj=MarketEngineObj,
107 |             model=model,
108 |             buyer_data=buyer_data,
109 |             trial=trial, # number of trials per budget
110 |             seller_data_size_list = seller_data_size_list,
111 |             cost_scale=c1,
112 |             method=method,
113 |             ) for c1 in cost_scale_list]
114 |     full_result = pandas.concat(results, ignore_index=True,axis=0)
115 |     return full_result
116 | 
117 | def evaluate_full(instance_id=0,    
118 | method="single",
119 | model_name="knn",):
120 |     print("evaluate acc and cost tradeoffs")
121 | #    instance_id=0
122 | #    method="single"
123 | #    model_name="knn"
124 |     MyHelper = Helper()
125 |     seller_data, seller_prices,  buyer_data, buyer_budget, data_size  = MyHelper.load_market_instance(
126 |         feature_path="../features/"+str(instance_id)+"/",
127 |         buyer_data_path="../marketinfo/"+str(instance_id)+"/data_buyer/20.csv",
128 |         price_path="../marketinfo/"+str(instance_id)+"/price/price.txt",
129 |         budget_path="../marketinfo/"+str(instance_id)+"/price/budget.txt",
130 |         )
131 |     numpy.savetxt("../marketinfo/"+str(instance_id)+"/seller_datasize.csv",data_size,fmt="%d")
132 | 
133 |     MyMarketEngine = MarketEngine()
134 |     mlmodel1 = LogisticRegression(random_state=0)
135 |     if(model_name=="knn"):
136 |         mlmodel1 = KNeighborsClassifier(n_neighbors=9)	
137 |     if(model_name=='rf'):
138 |         mlmodel1 = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
139 |                                    max_depth=1, random_state=0)
140 |     MyMarketEngine.setup_market(seller_data=seller_data,
141 |                                 seller_prices = seller_prices,
142 |                                 buyer_data=buyer_data,
143 |                                 buyer_budget=1e10,
144 |                                 mlmodel=mlmodel1,
145 |                                 )    
146 | 
147 |     result = evaluate(
148 |         MarketHelper=MyHelper,
149 |         MarketEngineObj=MyMarketEngine,
150 |         model=mlmodel1,
151 |         buyer_data=buyer_data,
152 |         trial=10, # number of trials per budget
153 |         seller_data_size_list = numpy.loadtxt("../marketinfo/"+str(instance_id)+"/seller_datasize.csv"),
154 |         cost_scale=0.1,
155 |         ) 
156 |     result2 = evaluate_budget(
157 |         MarketHelper=MyHelper,
158 |         MarketEngineObj=MyMarketEngine,
159 |         model=mlmodel1,
160 |         buyer_data=buyer_data,
161 |         trial=100, # number of trials per budget
162 |         seller_data_size_list = numpy.loadtxt("../marketinfo/" + str(instance_id) +"/seller_datasize.csv"),
163 | #        cost_scale_list=[0.005,0.0075,0.01,0.025,0.05,0.075,0.1],
164 |         cost_scale_list=[0.01,0.025,0.05,0.1,0.2],
165 |         method=method,
166 |  #       cost_scale_list=[0.05,0.1,0.5,1],
167 |  #       method="single",
168 |         )
169 |     folder1 = "../logs/"+str(instance_id)+"/"
170 |     
171 |     result2.to_csv(folder1+"acc_cost_tradeoffs_"+method+"_"+model_name+".csv")
172 |     print("result is:",result)     
173 |     return       
174 | 
175 | def main():
176 |     instance_ids = [3,4]
177 |     methods = ['single','uniform']
178 |     for instance_id in instance_ids:
179 |         for method in methods:
180 |             evaluate_full(instance_id=instance_id,method=method,model_name="knn")
181 |             evaluate_full(instance_id=instance_id,method=method,model_name="logreg")        
182 |             evaluate_full(instance_id=instance_id,method=method,model_name="rf")
183 | 
184 | 
185 |     return
186 |  
187 | if __name__ == '__main__':
188 |     main()        
189 |     
190 |     
191 |     
192 |     
193 | 


--------------------------------------------------------------------------------
/src/visualize_acc_cost.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | """
 16 | 
 17 | #!/usr/bin/env python3
 18 | # -*- coding: utf-8 -*-
 19 | """
 20 | Created on Tue Aug 16 18:36:30 2022
 21 | 
 22 | @author: lingjiao
 23 | """
 24 | 
 25 | 
 26 | from sklearn.linear_model import LogisticRegression
 27 | 
 28 | import matplotlib.pyplot as plt
 29 | import matplotlib
 30 | 
 31 | import numpy
 32 | from seller import Seller
 33 | from buyer import Buyer
 34 | from pricefunction import PriceFunction
 35 | from marketengine import MarketEngine
 36 | from helper import Helper
 37 | import pandas
 38 | from sklearn.neighbors import KNeighborsClassifier
 39 | import seaborn as sns
 40 | 
 41 | def visualize_acc_cost(data_path="../logs/0/acc_cost_tradeoffs_uniform_logreg.csv",
 42 |                        savepath="../figures/",
 43 |                        ):
 44 |     plt.clf()
 45 |     data = pandas.read_csv(data_path)
 46 |     print("data",data)
 47 |     mean1 = data.groupby("budget").mean()
 48 |     var1 = data.groupby("budget").var()
 49 |     max1 = data.groupby("budget").max()
 50 |     min1 = data.groupby("budget").min()
 51 |     print("mean1 of acc",mean1['acc'])
 52 |     print("var",var1['acc'])
 53 |     print("diff, max, and min",max1['acc']-min1['acc'],max1['acc'],min1['acc'])
 54 |     sns.color_palette("tab10")
 55 |     swarm_plot  = sns.histplot(data=data, x="acc", hue="budget",palette=["C0", "C1", "C2","C3","C4"])
 56 |     #swarm_plot = sns.scatterplot(data=data, x= "cost",y="acc")
 57 |     plt.figure()
 58 |     fig = swarm_plot.get_figure()
 59 |     data_parse = data_path.split("/")
 60 |     method = data_parse[-1].split("_")[-2]
 61 |     instanceid = data_parse[-2]
 62 |     ml = data_parse[-1].split("_")[-1]    
 63 |     fig.savefig(savepath+str(instanceid)+"/"+method+ml+".pdf")
 64 | 
 65 |     plt.figure()
 66 | 
 67 |     swarm_plot  = sns.lineplot(data=data, y="acc", x="budget", err_style="band")
 68 |     fig2 = swarm_plot.get_figure()
 69 |     fig2.savefig(savepath+str(instanceid)+"/"+method+ml+"_line.pdf")
 70 | 
 71 | 
 72 |     return 
 73 | 
 74 | def evaluate(
 75 |         MarketHelper,
 76 |         MarketEngineObj,
 77 |         model,
 78 |         buyer_data,
 79 |         trial=100, # number of trials per budget
 80 |         seller_data_size_list = [100,200,300],
 81 |         cost_scale=0.1,
 82 |         method="single",
 83 |         ):
 84 |     trial_list = list(range(trial))
 85 |     acc_list = list()
 86 |     cost_list = list()    
 87 |     
 88 |     for i in range(trial):
 89 |         print("trial:",i)
 90 |         # generate a submission
 91 |         submission = gen_submission(seller_data_size_list,cost_scale=cost_scale,
 92 |                                     method=method)
 93 |         # calculate the cost of the submission
 94 |         cost = MarketHelper.get_cost(submission,MarketEngineObj)
 95 |         # generate the accuracy of the submission
 96 |         traindata = MarketHelper.load_data(submission, MarketEngineObj)
 97 |         model = MarketHelper.train_model(model, traindata[:,0:-1],
 98 |                                  numpy.ravel(traindata[:,-1]))
 99 |         acc1 = MarketHelper.eval_model(model,test_X=buyer_data[:,0:-1],test_Y=buyer_data[:,-1])
100 | 
101 |         cost_list.append(cost)
102 |         acc_list.append(acc1)
103 |     
104 |     result = pandas.DataFrame()
105 |     result['trial'] = trial_list
106 |     result['acc'] = acc_list
107 |     result['cost'] = cost_list
108 |     return result
109 | 
110 | ''' generate a pandas dataframe
111 | 
112 | trial,accuracy, cost
113 | '''
114 | 
115 | def gen_submission(seller_data_size_list=[100,200,300],
116 |                    cost_scale=1,
117 |                    method="uniform"):
118 |     if(method=="uniform"):
119 |         submission = [numpy.random.randint(0,int(a*cost_scale)) for a in seller_data_size_list]
120 |     if(method=="single"):
121 |         submission = [0]*len(seller_data_size_list)        
122 |         index = numpy.random.randint(0,len(submission))
123 |         submission[index] = int(seller_data_size_list[index]*cost_scale)                               
124 |     return submission
125 | 
126 | def evaluate_budget(MarketHelper,
127 |         MarketEngineObj,
128 |         model,
129 |         buyer_data,
130 |         trial=100, # number of trials per budget
131 |         seller_data_size_list = [100,200,300],
132 |         cost_scale_list=[0.1],
133 |         method="single",
134 |         ):
135 |     results = [evaluate(
136 |             MarketHelper=MarketHelper,
137 |             MarketEngineObj=MarketEngineObj,
138 |             model=model,
139 |             buyer_data=buyer_data,
140 |             trial=trial, # number of trials per budget
141 |             seller_data_size_list = seller_data_size_list,
142 |             cost_scale=c1,
143 |             method=method,
144 |             ) for c1 in cost_scale_list]
145 |     full_result = pandas.concat(results, ignore_index=True,axis=0)
146 |     return full_result
147 | 
148 |        
149 | def main():
150 |     matplotlib.pyplot.close('all')
151 |     instance_ids = [0,1,2,3,4]
152 |     methods = ['single','uniform']
153 |     
154 |     methods=['uniform']
155 |     for instance_id in instance_ids:
156 |         for method in methods:
157 |             #visualize_acc_cost(data_path="../logs/"+str(instance_id)+"/acc_cost_tradeoffs_"+method+"_knn.csv")
158 |             visualize_acc_cost(data_path="../logs/"+str(instance_id)+"/acc_cost_tradeoffs_"+method+"_rf.csv")
159 |             #visualize_acc_cost(data_path="../logs/"+str(instance_id)+"/acc_cost_tradeoffs_"+method+"_logreg.csv")
160 | 
161 |     '''
162 |     print("evaluate acc and cost tradeoffs")
163 |     instance_id=0
164 |     MyHelper = Helper()
165 |     seller_data, seller_prices,  buyer_data, buyer_budget, data_size  = MyHelper.load_market_instance(
166 |         feature_path="../features/"+str(instance_id)+"/",
167 |         buyer_data_path="../marketinfo/"+str(instance_id)+"/data_buyer/20.csv",
168 |         price_path="../marketinfo/"+str(instance_id)+"/price/price.txt",
169 |         budget_path="../marketinfo/"+str(instance_id)+"/price/budget.txt",
170 |         )
171 |     
172 |     MyMarketEngine = MarketEngine()
173 |     mlmodel1 = LogisticRegression(random_state=0)
174 |     mlmodel1 = KNeighborsClassifier(n_neighbors=9)	
175 | 
176 |     MyMarketEngine.setup_market(seller_data=seller_data,
177 |                                 seller_prices = seller_prices,
178 |                                 buyer_data=buyer_data,
179 |                                 buyer_budget=1e10,
180 |                                 mlmodel=mlmodel1,
181 |                                 )    
182 | 
183 |     result = evaluate(
184 |         MarketHelper=MyHelper,
185 |         MarketEngineObj=MyMarketEngine,
186 |         model=mlmodel1,
187 |         buyer_data=buyer_data,
188 |         trial=10, # number of trials per budget
189 |         seller_data_size_list = numpy.loadtxt("../marketinfo/"+str(instance_id)+"/seller_datasize.csv"),
190 |         cost_scale=0.1,
191 |         ) 
192 |     result2 = evaluate_budget(
193 |         MarketHelper=MyHelper,
194 |         MarketEngineObj=MyMarketEngine,
195 |         model=mlmodel1,
196 |         buyer_data=buyer_data,
197 |         trial=100, # number of trials per budget
198 |         seller_data_size_list = numpy.loadtxt("../marketinfo/" + str(instance_id) +"/seller_datasize.csv"),
199 | #        cost_scale_list=[0.005,0.0075,0.01,0.025],
200 | #        method="uniform",
201 |        cost_scale_list=[0.05,0.1,0.5,1],
202 |        method="single",
203 |         )
204 |     folder1 = "../logs/"+str(instance_id)+"/"
205 |     
206 |     result2.to_csv(folder1+"acc_cost_tradeoffs.csv")
207 |     print("result is:",result) 
208 |     '''     
209 | if __name__ == '__main__':
210 |     main()        
211 |     
212 |     
213 |     
214 |     
215 | 


--------------------------------------------------------------------------------
/src/helper.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | """
 16 | 
 17 | #!/usr/bin/env python3
 18 | # -*- coding: utf-8 -*-
 19 | """
 20 | Created on Tue Aug 16 18:36:30 2022
 21 | 
 22 | @author: lingjiao
 23 | """
 24 | 
 25 | 
 26 | from sklearn.linear_model import LogisticRegression
 27 | from sklearn.neighbors import KNeighborsClassifier
 28 | from sklearn.ensemble import RandomForestClassifier
 29 | from sklearn.dummy import DummyClassifier
 30 | import numpy
 31 | from seller import Seller
 32 | from buyer import Buyer
 33 | from pricefunction import PriceFunction
 34 | from marketengine import MarketEngine
 35 | import glob
 36 | import pandas
 37 | 
 38 | def sub2stretagy(submission,MarketEngineObj):
 39 |     stretagy1 = list()
 40 |     cost1 = list()
 41 |     for i in range(len(submission)):
 42 |         stretagy1.append(submission[i])
 43 |         cost1.append(MarketEngineObj.sellers[i].getprice(submission[i]))
 44 |     stretagy = list()
 45 |     stretagy.append(stretagy1)
 46 |     stretagy.append(cost1)
 47 |     #print("stretagy is:",stretagy)
 48 |     return stretagy
 49 | 
 50 | class Helper(object):
 51 |     def __init__(self):
 52 |         return
 53 |     
 54 |     def get_cost(self,submission,MarketEngineObj):
 55 |         stretagy = sub2stretagy(submission,MarketEngineObj)
 56 |         cost = sum(stretagy[1])
 57 |         return cost
 58 | 
 59 |     def load_data(self, submission, MarketEngineObj):
 60 |         '''
 61 |         load submissions.
 62 |         return: train X and y 
 63 |         '''
 64 |         
 65 |         #print(" train buyer model ")
 66 |         
 67 |         stretagy = sub2stretagy(submission,MarketEngineObj)
 68 |         buyer_budget = MarketEngineObj.buyer_budget
 69 |         print("strategy is:",stretagy)
 70 |         # check if the budget constraint is satisified.
 71 |         cost = sum(stretagy[1])
 72 |         if(cost>buyer_budget):
 73 |             raise ValueError("The budget constraint is not satisifed!")
 74 |             return
 75 |         
 76 |         traindata = None
 77 |         for i in range(len(MarketEngineObj.sellers)):
 78 |             d1 = MarketEngineObj.sellers[i].getdata(stretagy[0][i],stretagy[1][i])
 79 |             if(i==0):
 80 |                 traindata = d1
 81 |             else:
 82 |                 traindata = numpy.concatenate((traindata,d1))
 83 |         return traindata
 84 |             
 85 |         
 86 |     def train_model(self, model, train_X, train_Y):
 87 |         model.fit(train_X,train_Y)
 88 |         return model 
 89 |     
 90 |     def eval_model(self, model, test_X, test_Y):
 91 |         eval_acc = model.score(test_X, test_Y)
 92 |         return eval_acc     
 93 |     
 94 |     def load_market_instance(self,
 95 |                     feature_path="features/0/",
 96 |                     buyer_data_path="buyerdata.csv",
 97 |                     price_path="price.txt",
 98 |                     budget_path="budget.txt",
 99 |                     ):
100 |         paths = glob.glob(feature_path+"*.csv")
101 |         print("paths:",paths)
102 |         # 1. load seller data
103 |         seller_data = list()
104 |         seller_prices = list()
105 |         buyer_budget = numpy.loadtxt(budget_path)
106 |         buyer_budget = float(buyer_budget)
107 |         #print('budget_ is', type(buyer_budget))
108 | #        datafull = [numpy.loadtxt(path,delimiter=',') for path in paths]
109 |         datafull = [pandas.read_csv(path,header=None,engine="pyarrow").to_numpy() for path in paths]
110 |         seller_datasize = [len(data1) for data1 in datafull]
111 |         pricefull = numpy.loadtxt(price_path,delimiter=',',dtype=str) 
112 |         for i in range(len(datafull)):
113 |             if(1):
114 |                 seller_data.append(datafull[i])
115 |                 #print(pricefull[i])
116 |                 MyPricing1 = PriceFunction()
117 |                 MyPricing1.setup(max_p = float(pricefull[i][1]), method=pricefull[i][0])
118 |                 seller_prices.append(MyPricing1)
119 | #        buyer_data =  numpy.loadtxt(buyer_data_path,delimiter=',')    
120 |         buyer_data =  pandas.read_csv(buyer_data_path,header=None,engine="pyarrow").to_numpy()  
121 |         return seller_data, seller_prices,  buyer_data, buyer_budget, seller_datasize 
122 | def main():
123 |     print("test of the helper")
124 |     MyMarketEngine = MarketEngine()
125 |     
126 |     data_1 = numpy.asmatrix([[0,1,0],[1,0,0]])               
127 |     data_2 = numpy.asmatrix([[0,1,1],[1,0,1],[1,1,1],[0,0,1]])
128 |     data_b = numpy.asmatrix([[0,1,0],[1,0,1],[0,1,1]])
129 |                      
130 |     buyer_budget = 100
131 |            
132 |     MyPricing1 = PriceFunction()
133 |     MyPricing1.setup(max_p = 100, method="lin")
134 |     MyPricing2 = PriceFunction()
135 |     MyPricing2.setup(max_p = 100, method="lin")
136 | 
137 | 
138 |     mlmodel1 = LogisticRegression(random_state=0)
139 | 
140 |              
141 |     MyMarketEngine.setup_market(seller_data=[data_1,data_2],
142 |                                 seller_prices = [MyPricing1,MyPricing2],
143 |                      buyer_data=data_b,
144 |                      buyer_budget=buyer_budget,
145 |                      mlmodel=mlmodel1,
146 |                      )
147 | 
148 |     stretagy = [[1,2],[50,50]]
149 |     #MyMarketEngine.load_stretagy(stretagy)
150 |     
151 |     #acc1 = MyMarketEngine.train_buyer_model()
152 |     #print("acc is ",acc1)
153 |     
154 |     MyHelper = Helper()
155 |     seller_data, seller_prices,  buyer_data, buyer_budget, seller_datasize = MyHelper.load_market_instance(
156 |         feature_path="../features/0/",
157 |         buyer_data_path="../marketinfo/0/data_buyer/20.csv",
158 |         price_path="../marketinfo/0/price/price.txt",
159 |         budget_path="../marketinfo/0/price/budget.txt",
160 |         )
161 |     print("load data finished")		
162 |     print("seller data size:",seller_datasize)
163 |     numpy.savetxt("../marketinfo/0/seller_datasize.csv",seller_datasize,fmt="%d")
164 |     MyMarketEngine.setup_market(seller_data=seller_data,
165 |                                 seller_prices = seller_prices,
166 |                                 buyer_data=buyer_data,
167 |                                 buyer_budget=buyer_budget,
168 |                                 mlmodel=mlmodel1,
169 |                                 )
170 |     print("set up market finished")
171 |     stretagy=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,10,10,10,10,15]
172 |     stretagy=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,0,0,0,0]
173 |     stretagy=[10,20,30,40,50,60,70,80,9,10,11,12,13,14,15,0,0,0,0,0]
174 |     stretagy=[10,20,30,40,50,60,70,800,9,10,11,12,13,14,15,0,0,0,0,0]
175 |     stretagy=[10,20,30,40,50,60,70,80,9,10,11,12,13,14,15,0,0,0,0,0]
176 |     stretagy=[50,20,30,40,5,6,7,80,9,10,11,12,13,14,15,0,400,0,50,0]
177 | 
178 |     stretagy=[100,200,300,400,500,600,70,80,9,10,11,12,13,14,15,50,50,50,50,50]
179 |     stretagy=[10,20,30,40,50,60,70,80,9,10,11,12,13,14,15,50,50,50,50,50]
180 |     stretagy=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,0,0,0,0]
181 | 	
182 |     traindata = MyHelper.load_data(stretagy, MyMarketEngine)
183 |     model = RandomForestClassifier()	
184 |     model = KNeighborsClassifier(n_neighbors=9)	
185 |     model = LogisticRegression(random_state=0)
186 | 
187 |     model = MyHelper.train_model(model, traindata[:,0:-1],
188 |                                  numpy.ravel(traindata[:,-1]))
189 |     acc1 = MyHelper.eval_model(model,test_X=buyer_data[:,0:-1],test_Y=buyer_data[:,-1])
190 | 		
191 |     print("acc is:", acc1)
192 |     model2 = DummyClassifier(strategy="most_frequent")	
193 |     model2 = MyHelper.train_model(model2, traindata[:,0:-1],
194 |                                  numpy.ravel(traindata[:,-1]))	
195 |     acc2 = MyHelper.eval_model(model2,test_X=buyer_data[:,0:-1],test_Y=buyer_data[:,-1])
196 |     print("dummy acc is:", acc2)
197 | 	
198 | if __name__ == '__main__':
199 |     main()        
200 |     
201 |     
202 |     
203 |     
204 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # dataperf-dam: A Data-centric Benchmark on Data Acquisition for Machine Learning
  2 | 
  3 | This github repo serves as the starting point for submissions and evaluations for data acquisition for machine learning benchmark, or in short, DAM, as part of the DataPerf benchmark suite [https://dataperf.org/](https://dataperf.org/)
  4 | 
  5 | 
  6 | ## 1. What is the DAM benchmark?
  7 | 
  8 | An increasingly large amount of data is purchased for AI-enabled data science applications. How to select the right set of datasets for AI tasks of interest is an important decision that has, however, received limited attention. A naive approach is to acquire all available datasets and then select which ones to use empirically. This requires expensive human supervision and incurs prohibitively high costs, posing unique challenges to budget-limited users. 
  9 | 
 10 | How can one decide which datasets to acquire before actually purchasing the data to optimize the performance quality of an ML model?  In the DAM (Data-Acquisition-for-Machine-learning) benchmark, the participants are asked to tackle the aforementioned problem. Participants need to provide a data purchase strategy for a data buyer in K (=5 in the beta version) separate data marketplaces. In each data marketplace, there are a few data sellers offering datasets for sale, and one data buyer interested in acquiring some of those datasets to train an ML model. The seller provides a pricing function that depends on the number of purchased samples. The buyer first decides how many data points to purchase from each seller given a data acquisition budget b. Then those data points are compiled into one dataset to train an ML model f(). The buyer also has a dataset Db to evaluate the performance of the trained model. Similar to real-world data marketplaces, the buyer can observe no sellers’ datasets but some summary information from the sellers.
 11 | 
 12 | ## 2. How to participate this challenge?
 13 | We suggest to start participating by using the [colab notebook](https://colab.research.google.com/drive/1HYoFfKwd9Pr-Zg_e2uJxWF8yHqa9sRMn?usp=sharing). It is self-contained, and shows how to (i) install the needed library, (ii) access the buyer's observation, and (iii) create strategies ready to be submitted. In the following we explain this in more details. 
 14 | 
 15 | ## 3. How to access the buyer's observation?
 16 | 
 17 | We provide a simple python library to access the buyer’s observation in each data marketplace.
 18 | To use it, we recommand to create a virtual environment by
 19 | ```
 20 | conda create -n DAM python=3.8
 21 | conda activate DAM
 22 | ```
 23 | and then clone the github repo and install all libraries, and download the data by
 24 | ```
 25 | git clone https://github.com/facebookresearch/Data_Acquisition_for_ML_Benchmark
 26 | cd Data_Acquisition_for_ML_Benchmark
 27 | pip install -r requirements.txt
 28 | wget https://github.com/lchen001/Data_Acquisition_for_ML_Benchmark/releases/download/v0.0.1/marketinfo.zip
 29 | ! unzip marketinfo.zip
 30 | cd src
 31 | ```
 32 | 
 33 | Now, one is ready to use this library. For example,  to specify the marketplace id, one can use
 34 | 
 35 | ```
 36 | from dam import Dam
 37 | MyDam = Dam(instance=0)
 38 | ```
 39 | 
 40 | 
 41 | The following code lists the buyer’s budget, dataset, and ml model.
 42 | 
 43 | ```
 44 | budget = MyDam.getbudget()
 45 | buyer_data = MyDam.getbuyerdata()
 46 | mlmodel = MyDam.getmlmodel()
 47 | ```
 48 | 
 49 | 
 50 | To list all sellers’ ids, execute 
 51 | 
 52 | 
 53 | ```
 54 | sellers_id = MyDam.getsellerid()
 55 | ```
 56 | 
 57 | To get seller i’s information, run
 58 | 
 59 | ```
 60 | seller_i_price, seller_i_summary, seller_i_samples =  MyDam.getsellerinfo(seller_id=i)
 61 | ```
 62 | 
 63 | seller_i_price contains the pricing function. seller_i_summary includes (i) the number of rows, (ii) the number of columns, (iii) the histogram of each dimension, and (iv) the correlation between each column and the label. Seller_i_samples contains 5 samples from each dataset.  
 64 | 
 65 | Note: For simplification purposes, all sellers sell the same type of data, or in a more mathematically way, their data distribution shares the same support. For example, the number of columns are the same, and so the semantic meaning.
 66 | 
 67 | More details on the price function: given a sample size, the price can be calculated by calling the get_price_samplesize function. For example, if the sample size is 100, then calling
 68 | 
 69 | ```
 70 | seller_i_price.get_price_samplesize(samplesize=100)
 71 | ```
 72 | gives the price.
 73 | 
 74 | More details on the seller summary: the seller_i_summary contains four fields as follows:
 75 | 
 76 | ```
 77 | seller_i_summary.keys()
 78 | >>> dict_keys(['row_number', 'column_number', 'hist', 'label_correlation'])
 79 | ```
 80 | Here, seller_i_summary['row_number'] encode the number of data points. Similarly, seller_i_summary['column_number'] equals the number of features plus (the label). seller_i_summary['hist'] is a dictionary containg the histgram for each feature. seller_i_summary['label_correlation'] is a dictionary that represents the pearson correlation between each feature and the label.
 81 | 
 82 | For example, one can print the histogram of the second feature by 
 83 | ```
 84 | print(seller_i_summary['hist']['2'])
 85 | >>> {'0_size': 3, '1_size': 35, '2_size': 198, '3_size': 821, '4_size': 2988, '5_size': 8496, '6_size': 11563, '7_size': 5155, '8_size': 704, '9_size': 37, '0_range': -0.7187578082084656, '1_range': -0.5989721298217774, '2_range': -0.4791864514350891, '3_range': -0.3594007730484009, '4_range': -0.23961509466171266, '5_range': -0.11982941627502441, '6_range': -4.373788833622605e-05, '7_range': 0.11974194049835207, '8_range': 0.23952761888504026, '9_range': 0.35931329727172856, '10_range': 0.47909897565841675}
 86 | ```
 87 | How to read this? This representation basically documents (i) how the histogram bins are created (i_range), and (ii) how many points fall into each bin (i_size). For example, '2_size':198 means 198 data points are in the 2nd bin, and '' '2_range': -0.4791864514350891, '3_range': -0.3594007730484009'' means the 2nd bin is within [-0.4791864514350891,-0.3594007730484009].
 88 | 
 89 | ```
 90 | print(seller_i_summary['label_correlation']['2'])
 91 | >>> 0.08490820825406746
 92 | ```
 93 | This means the correlation between the 2nd feature and the label is 0.08490820825406746.
 94 | 
 95 | Note that all features in the sellers and buyers' datasets are NOT in their raw form. In fact, we have extracted those features using a deep learning model (more specifically, a dist-bert model) from their original format.
 96 | 
 97 | ## 3. How to submit a solution?
 98 | 
 99 | The submission should contain K(=5) txt files. k.txt corresponds to the purchase strategy for the kth marketplace. The notebook will automatically generate txt files for submission under the folder ```\submission\my_submission```. For example, one submission may look like
100 | 
101 | 
102 | ```
103 | 
104 |  \submission\my_submission\0.txt 
105 | 
106 |  \submission\my_submission\1.txt 
107 | 
108 |  \submission\my_submission\2.txt 
109 | 
110 |  \submission\my_submission\3.txt 
111 | 
112 |  \submission\my_submission\4.txt
113 | 
114 | ```
115 | 
116 | Each txt file should contain one line of numbers, where the ith number indicates the number of data to purchase from the ith seller. For example, 0.txt containing
117 | 
118 | ```
119 | 100,50,200,500
120 | ```
121 | 
122 | means buying 100, 50, 200, and 500 samples from seller 1, seller 2, seller 3, and seller 4 separately. 
123 | 
124 | Once you are ready, upload the txt files to DynaBench for evaluation: https://dynabench.org/tasks/DAM/
125 | 
126 | 
127 | ## 4. How is a submission evaluated?
128 | 
129 | Once received the submission, we will first evaluate whether the strategy is legal (e.g., satisfying the budget constraint). Then we train an ML model on the dataset generated by the submitted strategy and evaluate its performance (standard accuracy) on the buyer’s data Db. We will report the performance averaged over all K marketplace instances. 
130 | 
131 | What ML model to train? To focus on the data acquisition task, we train a simple logistic regression model. More specifically, we use the following model 
132 | 
133 | ```
134 | from sklearn.linear_model import LogisticRegression
135 | model = LogisticRegression(random_state=0)	
136 | ```  
137 | 
138 | Requirements:
139 | 
140 | (i) you may use any (open-source/commercial) software;
141 | 
142 | (ii) you may not use external datasets;
143 | 
144 | (iii) do not create multiple accounts for submission;
145 | 
146 | (iv) follow the honor code.
147 | 
148 | ## Contact and License
149 | _DAM_ is Apache 2.0 licensed.
150 | 
151 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 |                                  Apache License
  4 |                            Version 2.0, January 2004
  5 |                         http://www.apache.org/licenses/
  6 | 
  7 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  8 | 
  9 |    1. Definitions.
 10 | 
 11 |       "License" shall mean the terms and conditions for use, reproduction,
 12 |       and distribution as defined by Sections 1 through 9 of this document.
 13 | 
 14 |       "Licensor" shall mean the copyright owner or entity authorized by
 15 |       the copyright owner that is granting the License.
 16 | 
 17 |       "Legal Entity" shall mean the union of the acting entity and all
 18 |       other entities that control, are controlled by, or are under common
 19 |       control with that entity. For the purposes of this definition,
 20 |       "control" means (i) the power, direct or indirect, to cause the
 21 |       direction or management of such entity, whether by contract or
 22 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 23 |       outstanding shares, or (iii) beneficial ownership of such entity.
 24 | 
 25 |       "You" (or "Your") shall mean an individual or Legal Entity
 26 |       exercising permissions granted by this License.
 27 | 
 28 |       "Source" form shall mean the preferred form for making modifications,
 29 |       including but not limited to software source code, documentation
 30 |       source, and configuration files.
 31 | 
 32 |       "Object" form shall mean any form resulting from mechanical
 33 |       transformation or translation of a Source form, including but
 34 |       not limited to compiled object code, generated documentation,
 35 |       and conversions to other media types.
 36 | 
 37 |       "Work" shall mean the work of authorship, whether in Source or
 38 |       Object form, made available under the License, as indicated by a
 39 |       copyright notice that is included in or attached to the work
 40 |       (an example is provided in the Appendix below).
 41 | 
 42 |       "Derivative Works" shall mean any work, whether in Source or Object
 43 |       form, that is based on (or derived from) the Work and for which the
 44 |       editorial revisions, annotations, elaborations, or other modifications
 45 |       represent, as a whole, an original work of authorship. For the purposes
 46 |       of this License, Derivative Works shall not include works that remain
 47 |       separable from, or merely link (or bind by name) to the interfaces of,
 48 |       the Work and Derivative Works thereof.
 49 | 
 50 |       "Contribution" shall mean any work of authorship, including
 51 |       the original version of the Work and any modifications or additions
 52 |       to that Work or Derivative Works thereof, that is intentionally
 53 |       submitted to Licensor for inclusion in the Work by the copyright owner
 54 |       or by an individual or Legal Entity authorized to submit on behalf of
 55 |       the copyright owner. For the purposes of this definition, "submitted"
 56 |       means any form of electronic, verbal, or written communication sent
 57 |       to the Licensor or its representatives, including but not limited to
 58 |       communication on electronic mailing lists, source code control systems,
 59 |       and issue tracking systems that are managed by, or on behalf of, the
 60 |       Licensor for the purpose of discussing and improving the Work, but
 61 |       excluding communication that is conspicuously marked or otherwise
 62 |       designated in writing by the copyright owner as "Not a Contribution."
 63 | 
 64 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 65 |       on behalf of whom a Contribution has been received by Licensor and
 66 |       subsequently incorporated within the Work.
 67 | 
 68 |    2. Grant of Copyright License. Subject to the terms and conditions of
 69 |       this License, each Contributor hereby grants to You a perpetual,
 70 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 71 |       copyright license to reproduce, prepare Derivative Works of,
 72 |       publicly display, publicly perform, sublicense, and distribute the
 73 |       Work and such Derivative Works in Source or Object form.
 74 | 
 75 |    3. Grant of Patent License. Subject to the terms and conditions of
 76 |       this License, each Contributor hereby grants to You a perpetual,
 77 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 78 |       (except as stated in this section) patent license to make, have made,
 79 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 80 |       where such license applies only to those patent claims licensable
 81 |       by such Contributor that are necessarily infringed by their
 82 |       Contribution(s) alone or by combination of their Contribution(s)
 83 |       with the Work to which such Contribution(s) was submitted. If You
 84 |       institute patent litigation against any entity (including a
 85 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 86 |       or a Contribution incorporated within the Work constitutes direct
 87 |       or contributory patent infringement, then any patent licenses
 88 |       granted to You under this License for that Work shall terminate
 89 |       as of the date such litigation is filed.
 90 | 
 91 |    4. Redistribution. You may reproduce and distribute copies of the
 92 |       Work or Derivative Works thereof in any medium, with or without
 93 |       modifications, and in Source or Object form, provided that You
 94 |       meet the following conditions:
 95 | 
 96 |       (a) You must give any other recipients of the Work or
 97 |           Derivative Works a copy of this License; and
 98 | 
 99 |       (b) You must cause any modified files to carry prominent notices
100 |           stating that You changed the files; and
101 | 
102 |       (c) You must retain, in the Source form of any Derivative Works
103 |           that You distribute, all copyright, patent, trademark, and
104 |           attribution notices from the Source form of the Work,
105 |           excluding those notices that do not pertain to any part of
106 |           the Derivative Works; and
107 | 
108 |       (d) If the Work includes a "NOTICE" text file as part of its
109 |           distribution, then any Derivative Works that You distribute must
110 |           include a readable copy of the attribution notices contained
111 |           within such NOTICE file, excluding those notices that do not
112 |           pertain to any part of the Derivative Works, in at least one
113 |           of the following places: within a NOTICE text file distributed
114 |           as part of the Derivative Works; within the Source form or
115 |           documentation, if provided along with the Derivative Works; or,
116 |           within a display generated by the Derivative Works, if and
117 |           wherever such third-party notices normally appear. The contents
118 |           of the NOTICE file are for informational purposes only and
119 |           do not modify the License. You may add Your own attribution
120 |           notices within Derivative Works that You distribute, alongside
121 |           or as an addendum to the NOTICE text from the Work, provided
122 |           that such additional attribution notices cannot be construed
123 |           as modifying the License.
124 | 
125 |       You may add Your own copyright statement to Your modifications and
126 |       may provide additional or different license terms and conditions
127 |       for use, reproduction, or distribution of Your modifications, or
128 |       for any such Derivative Works as a whole, provided Your use,
129 |       reproduction, and distribution of the Work otherwise complies with
130 |       the conditions stated in this License.
131 | 
132 |    5. Submission of Contributions. Unless You explicitly state otherwise,
133 |       any Contribution intentionally submitted for inclusion in the Work
134 |       by You to the Licensor shall be under the terms and conditions of
135 |       this License, without any additional terms or conditions.
136 |       Notwithstanding the above, nothing herein shall supersede or modify
137 |       the terms of any separate license agreement you may have executed
138 |       with Licensor regarding such Contributions.
139 | 
140 |    6. Trademarks. This License does not grant permission to use the trade
141 |       names, trademarks, service marks, or product names of the Licensor,
142 |       except as required for reasonable and customary use in describing the
143 |       origin of the Work and reproducing the content of the NOTICE file.
144 | 
145 |    7. Disclaimer of Warranty. Unless required by applicable law or
146 |       agreed to in writing, Licensor provides the Work (and each
147 |       Contributor provides its Contributions) on an "AS IS" BASIS,
148 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 |       implied, including, without limitation, any warranties or conditions
150 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 |       PARTICULAR PURPOSE. You are solely responsible for determining the
152 |       appropriateness of using or redistributing the Work and assume any
153 |       risks associated with Your exercise of permissions under this License.
154 | 
155 |    8. Limitation of Liability. In no event and under no legal theory,
156 |       whether in tort (including negligence), contract, or otherwise,
157 |       unless required by applicable law (such as deliberate and grossly
158 |       negligent acts) or agreed to in writing, shall any Contributor be
159 |       liable to You for damages, including any direct, indirect, special,
160 |       incidental, or consequential damages of any character arising as a
161 |       result of this License or out of the use or inability to use the
162 |       Work (including but not limited to damages for loss of goodwill,
163 |       work stoppage, computer failure or malfunction, or any and all
164 |       other commercial damages or losses), even if such Contributor
165 |       has been advised of the possibility of such damages.
166 | 
167 |    9. Accepting Warranty or Additional Liability. While redistributing
168 |       the Work or Derivative Works thereof, You may choose to offer,
169 |       and charge a fee for, acceptance of support, warranty, indemnity,
170 |       or other liability obligations and/or rights consistent with this
171 |       License. However, in accepting such obligations, You may act only
172 |       on Your own behalf and on Your sole responsibility, not on behalf
173 |       of any other Contributor, and only if You agree to indemnify,
174 |       defend, and hold each Contributor harmless for any liability
175 |       incurred by, or claims asserted against, such Contributor by reason
176 |       of your accepting any such warranty or additional liability.
177 | 
178 |    END OF TERMS AND CONDITIONS
179 | 
180 |    APPENDIX: How to apply the Apache License to your work.
181 | 
182 |       To apply the Apache License to your work, attach the following
183 |       boilerplate notice, with the fields enclosed by brackets "[]"
184 |       replaced with your own identifying information. (Don't include
185 |       the brackets!)  The text should be enclosed in the appropriate
186 |       comment syntax for the file format. We also recommend that a
187 |       file or class name and description of purpose be included on the
188 |       same "printed page" as the copyright notice for easier
189 |       identification within third-party archives.
190 | 
191 |    Copyright 2022 Meta Platforms
192 | 
193 |    Licensed under the Apache License, Version 2.0 (the "License");
194 |    you may not use this file except in compliance with the License.
195 |    You may obtain a copy of the License at
196 | 
197 |        http://www.apache.org/licenses/LICENSE-2.0
198 | 
199 |    Unless required by applicable law or agreed to in writing, software
200 |    distributed under the License is distributed on an "AS IS" BASIS,
201 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 |    See the License for the specific language governing permissions and
203 |    limitations under the License.
204 | 


--------------------------------------------------------------------------------
/src/visualizetools.py:
--------------------------------------------------------------------------------
   1 | import matplotlib  # noqa
   2 | matplotlib.use('Agg')  # noqa
   3 | 
   4 | import matplotlib.pyplot as plt
   5 | plt.rcParams['axes.facecolor'] = 'white'
   6 | 
   7 | import numpy as np
   8 | import matplotlib.ticker as ticker
   9 | import json
  10 | import seaborn as sn
  11 | import pandas as pd
  12 | from matplotlib.colors import LogNorm
  13 | import seaborn as sns
  14 | from matplotlib.colors import LinearSegmentedColormap
  15 | import umap
  16 | #import matplotlib.pyplot as plt
  17 | 
  18 | 
  19 | class VisualizeTools(object):
  20 |     def __init__(self,figuresize = (10,8),figureformat='jpg',
  21 |                  colorset=['r','orange','k','yellow','g','b','k'],
  22 |                  markersize=30,
  23 |                  fontsize=30,
  24 |                  usecommand=True):
  25 |         self.figuresize=figuresize
  26 |         self.figureformat = figureformat
  27 |         self.fontsize = fontsize
  28 |         self.linewidth = 5
  29 |         self.markersize = markersize
  30 |         self.folder = "../figures/" # use "../figures/" if needed
  31 |         self.colorset=colorset
  32 |         self.markerset = ['o','X','^','v','s','o','*','d','p']
  33 |         self.marker = 'o' # from ['X','^','v','s','o','*','d','p'],
  34 |         self.linestyle = '-' # from ['-.','--','--','-.','-',':','--','-.'],
  35 |         self.linestyleset = ['-','-.','--','--','-.','-',':','--','-.']
  36 |         self.usecommand = usecommand
  37 |         
  38 |     def plotline(self,
  39 |                  xvalue,
  40 |                  yvalue,
  41 |                  xlabel='xlabel',
  42 |                  ylabel='ylabel',
  43 |                  legend=None,
  44 |                  filename='lineplot',
  45 |                  fig=None,
  46 |                  color=None,
  47 |                  ax=None):
  48 |         if(ax==None):        
  49 |             # setup figures
  50 |             fig = plt.figure(figsize=self.figuresize)
  51 |             fig, ax = plt.subplots(figsize=self.figuresize)
  52 |             plt.rcParams.update({'font.size': self.fontsize})
  53 |             plt.rcParams["font.weight"] = "bold"
  54 |             plt.rcParams["axes.labelweight"] = "bold"
  55 |             plt.rcParams["lines.linewidth"] = self.linewidth
  56 |             plt.rcParams["lines.markersize"] = self.markersize
  57 |             plt.rcParams["font.sans-serif"] = 'Arial'        
  58 | 
  59 |         # plot it        
  60 |         if(color==None):
  61 |             color = self.colorset[0]
  62 |         ax.plot(xvalue, 
  63 |                  yvalue,
  64 |                  marker=self.marker,
  65 |                  label=legend,
  66 |                  color=color,
  67 |                  linestyle = self.linestyle,
  68 |                  zorder=0,
  69 |                  )
  70 |         plt.xlabel(xlabel)
  71 |         plt.ylabel(ylabel) 
  72 |         
  73 |         plt.grid(True)
  74 |         ax.locator_params(axis='x', nbins=6)
  75 |         ax.locator_params(axis='y', nbins=6)
  76 | 
  77 |         formatter = ticker.FormatStrFormatter('%0.2e')
  78 |         
  79 |         formatterx = ticker.FormatStrFormatter('%0.2f')
  80 |         
  81 |         ax.yaxis.set_major_formatter(formatter)
  82 |         ax.xaxis.set_major_formatter(formatterx)
  83 |      
  84 |         filename =filename+'.'+self.figureformat
  85 |         
  86 |         if(self.figureformat=='jpg'):
  87 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=300)       
  88 |         else:
  89 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
  90 |             
  91 |         return fig, ax 
  92 |         #plt.fill_between(bud, np.asarray(acc_mean)-np.asarray(acc_std), np.asarray(acc_mean)+np.asarray(acc_std),alpha=0.3,facecolor='lightgray')
  93 | 
  94 | 
  95 |     def plotlines(self,
  96 |                  xvalue,
  97 |                  yvalues,
  98 |                  xlabel='xlabel',
  99 |                  ylabel='ylabel',
 100 |                  legend=None,
 101 |                  filename='lineplot',
 102 |                  fig=None,
 103 |                  ax=None,
 104 |                  showlegend=False,
 105 |                  log=False,
 106 |                  fontsize=60,
 107 |                  basey=10,
 108 |                  ylim=None):
 109 |         #if(-1):
 110 |         if(ax==None):        
 111 |             # setup figures
 112 |             fig = plt.figure(figsize=self.figuresize)
 113 |             fig, ax = plt.subplots(figsize=self.figuresize,frameon=True)
 114 |             plt.rcParams.update({'font.size': fontsize})
 115 |             plt.rcParams["font.weight"] = "bold"
 116 |             plt.rcParams["axes.labelweight"] = "bold"
 117 |             plt.rcParams["lines.linewidth"] = self.linewidth
 118 |             plt.rcParams["lines.markersize"] = self.markersize
 119 |             plt.rcParams["font.sans-serif"] = 'Arial'   
 120 |             ax.set_facecolor("white")
 121 |             #ax.set_edgecolor("black")
 122 |             ax.grid("True",color="grey")
 123 |             ax.get_yaxis().set_visible(True)
 124 |             ax.get_xaxis().set_visible(True)
 125 |         # plot it        
 126 |         for i in range(len(yvalues)):
 127 |             ax.plot(xvalue, 
 128 |                  yvalues[i],
 129 |                  marker=self.markerset[i],
 130 |                  label=legend[i],
 131 |                  color=self.colorset[i],
 132 |                  linestyle =  self.linestyleset[i],
 133 |                  zorder=0,
 134 |                  markersize=self.markersize,
 135 |                  markevery=1,
 136 |                  )
 137 |         plt.xlabel(xlabel,fontsize=fontsize)
 138 |         plt.ylabel(ylabel,fontsize=fontsize) 
 139 |         
 140 |         plt.grid(True)
 141 |         #ax.locator_params(axis='x', nbins=6)
 142 |         #ax.locator_params(axis='y', nbins=6)
 143 |         '''
 144 |         formatter = ticker.FormatStrFormatter('%d')
 145 |         
 146 |         formatterx = ticker.FormatStrFormatter('%d')
 147 |         
 148 |         ax.yaxis.set_major_formatter(formatter)
 149 |         ax.xaxis.set_major_formatter(formatterx)
 150 |         '''
 151 |         ax.tick_params(axis='both', which='major', labelsize=fontsize)
 152 |         
 153 |         if(ylim!=None):
 154 |             plt.ylim(ylim)
 155 | 
 156 |         if(log==True):
 157 |             ax.set_yscale('log',base=basey)
 158 |         if(showlegend==True):
 159 |             ax.legend(legend,facecolor="white",prop={'size': fontsize},
 160 |                       markerscale=1, numpoints= 2,loc="best")
 161 |         
 162 |         filename =filename+'.'+self.figureformat
 163 |         
 164 |         if(self.figureformat=='jpg'):
 165 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=300)       
 166 |         else:
 167 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
 168 |             
 169 |         return fig, ax 
 170 |         #plt.fill_between(bud, np.asarray(acc_mean)-np.asarray(acc_std), np.asarray(acc_mean)+np.asarray(acc_std),alpha=0.3,facecolor='lightgray')
 171 |     
 172 |     def Histogram(self,
 173 |                  xvalue,
 174 | 
 175 |                  xlabel='xlabel',
 176 |                  ylabel='ylabel',
 177 |                  legend=None,
 178 |                  filename='lineplot',
 179 |                  fig=None,
 180 |                  ax=None,
 181 |                  showlegend=False,
 182 |                  log=False,
 183 |                  fontsize=90,
 184 |                  ylim=None,
 185 |                  n_bins=20):
 186 |         #if(-1):
 187 |         if(ax==None):        
 188 |             # setup figures
 189 |             fig = plt.figure(figsize=self.figuresize)
 190 |             fig, ax = plt.subplots(figsize=self.figuresize,frameon=True)
 191 |             plt.rcParams.update({'font.size': fontsize})
 192 |             plt.rcParams["font.weight"] = "bold"
 193 |             plt.rcParams["axes.labelweight"] = "bold"
 194 |             plt.rcParams["lines.linewidth"] = self.linewidth
 195 |             plt.rcParams["lines.markersize"] = self.markersize
 196 |             plt.rcParams["font.sans-serif"] = 'Arial'   
 197 |             ax.set_facecolor("white")
 198 |             #ax.set_edgecolor("black")
 199 |             ax.grid("True",color="grey")
 200 |             ax.get_yaxis().set_visible(True)
 201 |             ax.get_xaxis().set_visible(True)
 202 |         # plot it
 203 |         plt.hist(xvalue,bins=n_bins)        
 204 |         '''
 205 |         for i in range(len(yvalues)):
 206 | 
 207 |             ax.plot(xvalue, 
 208 |                  yvalues[i],
 209 |                  marker=self.markerset[i],
 210 |                  label=legend[i],
 211 |                  color=self.colorset[i],
 212 |                  linestyle =  self.linestyleset[i],
 213 |                  zorder=0,
 214 |                  markersize=self.markersize,
 215 |                  markevery=10,
 216 |                  )
 217 |         '''
 218 |         plt.xlabel(xlabel,fontsize=fontsize)
 219 |         plt.ylabel(ylabel,fontsize=fontsize) 
 220 |         
 221 |         plt.grid(True)
 222 |         #ax.locator_params(axis='x', nbins=6)
 223 |         #ax.locator_params(axis='y', nbins=6)
 224 |         '''
 225 |         formatter = ticker.FormatStrFormatter('%d')
 226 |         
 227 |         formatterx = ticker.FormatStrFormatter('%d')
 228 |         
 229 |         ax.yaxis.set_major_formatter(formatter)
 230 |         ax.xaxis.set_major_formatter(formatterx)
 231 |         '''
 232 |         ax.tick_params(axis='both', which='major', labelsize=fontsize)
 233 |         
 234 |         if(ylim!=None):
 235 |             plt.ylim(ylim)
 236 | 
 237 |         if(log==True):
 238 |             ax.set_yscale('log')
 239 |         if(showlegend==True):
 240 |             ax.legend(legend,facecolor="white",prop={'size': fontsize},
 241 |                       markerscale=2, numpoints= 2,loc=0)
 242 |         
 243 |         filename =filename+'.'+self.figureformat
 244 |         
 245 |         if(self.figureformat=='jpg'):
 246 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=300)       
 247 |         else:
 248 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
 249 |             
 250 |         return fig, ax 
 251 |         #plt.fill_between(bud, np.asarray(acc_mean)-np.asarray(acc_std), np.asarray(acc_mean)+np.asarray(acc_std),alpha=0.3,facecolor='lightgray')
 252 | 
 253 | 
 254 |     def Histograms(self,
 255 |                  xvalues,
 256 |                  xlabel='xlabel',
 257 |                  ylabel='ylabel',
 258 |                  legend=None,
 259 |                  filename='lineplot',
 260 |                  fig=None,
 261 |                  ax=None,
 262 |                  showlegend=False,
 263 |                  log=False,
 264 |                  fontsize=90,
 265 |                  color=['red','orange'],
 266 |                  ylim=None,
 267 |                  n_bins=20):
 268 |         #if(-1):
 269 |         if(ax==None):        
 270 |             # setup figures
 271 |             fig = plt.figure(figsize=self.figuresize)
 272 |             fig, ax = plt.subplots(figsize=self.figuresize,frameon=True)
 273 |             plt.rcParams.update({'font.size': fontsize})
 274 |             plt.rcParams["font.weight"] = "bold"
 275 |             plt.rcParams["axes.labelweight"] = "bold"
 276 |             plt.rcParams["lines.linewidth"] = self.linewidth
 277 |             plt.rcParams["lines.markersize"] = self.markersize
 278 |             plt.rcParams["font.sans-serif"] = 'Arial'   
 279 |             ax.set_facecolor("white")
 280 |             #ax.set_edgecolor("black")
 281 |             ax.grid("True",color="grey")
 282 |             ax.get_yaxis().set_visible(True)
 283 |             ax.get_xaxis().set_visible(True)
 284 |         # plot it
 285 |         plt.hist(xvalues,bins=n_bins, density=True,color=color)        
 286 |         '''
 287 |         for i in range(len(yvalues)):
 288 | 
 289 |             ax.plot(xvalue, 
 290 |                  yvalues[i],
 291 |                  marker=self.markerset[i],
 292 |                  label=legend[i],
 293 |                  color=self.colorset[i],
 294 |                  linestyle =  self.linestyleset[i],
 295 |                  zorder=0,
 296 |                  markersize=self.markersize,
 297 |                  markevery=10,
 298 |                  )
 299 |         '''
 300 |         plt.xlabel(xlabel,fontsize=fontsize)
 301 |         plt.ylabel(ylabel,fontsize=fontsize) 
 302 |         
 303 |         plt.grid(True)
 304 |         #ax.locator_params(axis='x', nbins=6)
 305 |         #ax.locator_params(axis='y', nbins=6)
 306 |         '''
 307 |         formatter = ticker.FormatStrFormatter('%d')
 308 |         
 309 |         formatterx = ticker.FormatStrFormatter('%d')
 310 |         
 311 |         ax.yaxis.set_major_formatter(formatter)
 312 |         ax.xaxis.set_major_formatter(formatterx)
 313 |         '''
 314 |         ax.tick_params(axis='both', which='major', labelsize=fontsize)
 315 |         
 316 |         if(ylim!=None):
 317 |             plt.ylim(ylim)
 318 | 
 319 |         if(log==True):
 320 |             ax.set_yscale('log')
 321 |         if(showlegend==True):
 322 |             ax.legend(legend,facecolor="white",prop={'size': fontsize},
 323 |                       markerscale=2, numpoints= 2,loc=0)
 324 |         
 325 |         filename =filename+'.'+self.figureformat
 326 |         
 327 |         if(self.figureformat=='jpg'):
 328 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=300)       
 329 |         else:
 330 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
 331 |             
 332 |         return fig, ax 
 333 |         #plt.fill_between(bud, np.asarray(acc_mean)-np.asarray(acc_std), np.asarray(acc_mean)+np.asarray(acc_std),alpha=0.3,facecolor='lightgray')
 334 |     
 335 |         
 336 |     def plotscatter(self,
 337 |                     xvalue=0.3,
 338 |                     yvalue=0.5,
 339 |                     filename='lineplot',
 340 |                     markersize=10,
 341 |                     legend='Learned Thres',
 342 |                     color='blue',
 343 |                     showlegend=False,
 344 |                     fig=None,
 345 |                     ax=None):
 346 |         if(ax==None):
 347 |             # setup figures
 348 |             fig = plt.figure(figsize=self.figuresize)
 349 |             fig, ax = plt.subplots(figsize=self.figuresize)
 350 |             plt.rcParams.update({'font.size': self.fontsize})
 351 |             plt.rcParams["font.weight"] = "bold"
 352 |             plt.rcParams["axes.labelweight"] = "bold"
 353 |             plt.rcParams["lines.linewidth"] = self.linewidth
 354 |             plt.rcParams["lines.markersize"] = self.markersize
 355 |             plt.rcParams["font.sans-serif"] = 'Arial' 
 356 |         
 357 |         ax.plot(xvalue,yvalue,'*',markersize=markersize,color=color,
 358 |                 label=legend)
 359 |         if(showlegend):
 360 |             handles, labels = ax.get_legend_handles_labels()
 361 |             print("labels",labels)
 362 |             ax.legend(handles[::-1],labels[::-1], prop={'size': 35},markerscale=3, numpoints= 1,loc=0)
 363 | 
 364 | 
 365 |         filename =filename+'.'+self.figureformat
 366 |         if(self.figureformat=='jpg'):
 367 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=300)       
 368 |         else:
 369 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
 370 |                     
 371 |         return fig, ax
 372 | 
 373 | 
 374 |     def plotscatter(self,
 375 |                     xvalue=0.3,
 376 |                     yvalue=0.5,
 377 |                     filename='lineplot',
 378 |                     markersize=10,
 379 |                     legend='Learned Thres',
 380 |                     color='blue',
 381 |                     showlegend=False,
 382 |                     fig=None,
 383 |                     ax=None):
 384 |         if(ax==None):
 385 |             # setup figures
 386 |             fig = plt.figure(figsize=self.figuresize)
 387 |             fig, ax = plt.subplots(figsize=self.figuresize)
 388 |             plt.rcParams.update({'font.size': self.fontsize})
 389 |             plt.rcParams["font.weight"] = "bold"
 390 |             plt.rcParams["axes.labelweight"] = "bold"
 391 |             plt.rcParams["lines.linewidth"] = self.linewidth
 392 |             plt.rcParams["lines.markersize"] = self.markersize
 393 |             plt.rcParams["font.sans-serif"] = 'Arial' 
 394 |         
 395 |         ax.plot(xvalue,yvalue,'*',markersize=markersize,color=color,
 396 |                 label=legend)
 397 |         if(showlegend):
 398 |             handles, labels = ax.get_legend_handles_labels()
 399 |             print("labels",labels)
 400 |             ax.legend(handles[::-1],labels[::-1], prop={'size': 35},markerscale=3, numpoints= 1,loc=0)
 401 | 
 402 | 
 403 |         filename =filename+'.'+self.figureformat
 404 |         if(self.figureformat=='jpg'):
 405 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=300)       
 406 |         else:
 407 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
 408 |                     
 409 |         return fig, ax
 410 |     
 411 |     def plotscatters_annotation(self,
 412 |                     xvalue=[0.3],
 413 |                     yvalue=[0.5],
 414 |                     filename='lineplot',
 415 |                     markersize=10,
 416 |                     legend='Learned Thres',
 417 |                     color='blue',
 418 |                     showlegend=False,
 419 |                     fig=None,
 420 |                     ax=None,
 421 |                     annotation=None):
 422 |         if(ax==None):
 423 |             # setup figures
 424 |             fig = plt.figure(figsize=self.figuresize)
 425 |             fig, ax = plt.subplots(figsize=self.figuresize)
 426 |             plt.rcParams.update({'font.size': self.fontsize})
 427 |             plt.rcParams["font.weight"] = "bold"
 428 |             plt.rcParams["axes.labelweight"] = "bold"
 429 |             plt.rcParams["lines.linewidth"] = self.linewidth
 430 |             plt.rcParams["lines.markersize"] = self.markersize
 431 |             plt.rcParams["font.sans-serif"] = 'Arial' 
 432 |         
 433 |         ax.scatter(xvalue,yvalue,)
 434 |         #           '*',markersize=markersize,color=color,
 435 |         #        )
 436 |         for i in range(len(xvalue)):
 437 |             ax.annotate(annotation[i], xy=[xvalue[i],yvalue[i]])
 438 |         if(showlegend):
 439 |             handles, labels = ax.get_legend_handles_labels()
 440 |             print("labels",labels)
 441 |             ax.legend(handles[::-1],labels[::-1], prop={'size': 35},markerscale=3, numpoints= 1,loc=0)
 442 | 
 443 | 
 444 |         filename =filename+'.'+self.figureformat
 445 |         if(self.figureformat=='jpg'):
 446 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=300)       
 447 |         else:
 448 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
 449 |                     
 450 |         return fig, ax
 451 |     
 452 |     def plot_bar(self,barname,barvalue,
 453 |                  filename='barplot',
 454 |                  markersize=2,
 455 |                  yname='Frequency',
 456 |                  xname="",
 457 |                  color='blue',
 458 |                  ylim=None,
 459 |                  fig=None,
 460 |                  showlegend=False,
 461 |                  ax=None,
 462 |                  labelpad=None,
 463 |                  fontsize=30,
 464 |                  threshold=10,
 465 |                  add_thresline=False,):
 466 |         if(ax==None):
 467 |             # setup figures
 468 |             fig = plt.figure(figsize=self.figuresize)
 469 |             fig, ax = plt.subplots(figsize=self.figuresize)
 470 |             ax.set_facecolor("white")
 471 |             plt.rcParams.update({'font.size': 1})
 472 |             plt.rcParams["font.weight"] = "bold"
 473 |             plt.rcParams["axes.labelweight"] = "bold"
 474 |             plt.rcParams["lines.linewidth"] = self.linewidth
 475 |             plt.rcParams["lines.markersize"] = markersize
 476 |             plt.rcParams["font.sans-serif"] = 'Arial' 
 477 |             plt.rc('font', size=1)          # controls default text sizes
 478 |             print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
 479 |             plt.grid(True,color="grey")
 480 |         x = np.arange(len(barname))
 481 |         ax.bar(x,barvalue,color=color,
 482 |                 label=barname)
 483 |         ax.set_ylabel(yname,fontsize=fontsize)
 484 |         if(xname!=""):
 485 |             ax.set_xlabel(xname,fontsize=fontsize)
 486 | 
 487 |         #ax.set_title('Scores by group and gender')
 488 |         ax.set_xticks(x)
 489 |         ax.set_xticklabels(barname,rotation='horizontal',fontsize=fontsize)
 490 |         #ax.set_xticklabels(barname,rotation='vertical')
 491 |         plt.xlim(x[0]-0.5,x[-1]+0.5)
 492 |         
 493 |         if(add_thresline==True):
 494 |             ax.plot([min(x)-0.5, max(x)+0.5], [threshold, threshold], "k--")
 495 | 
 496 |         matplotlib.rc('xtick', labelsize=fontsize) 
 497 |         
 498 |         ax.tick_params(axis='both', which='major', labelsize=fontsize)
 499 | 
 500 |         if(not(labelpad==None)):
 501 |             ax.tick_params(axis='x', which='major', pad=labelpad)
 502 |       
 503 |         #matplotlib.rc('ytick', labelsize=fontsize) 
 504 |         #ax.text(0.5,0.5,"hello")
 505 | 
 506 |         #ax.legend()
 507 |         
 508 |         if(showlegend):
 509 |             handles, labels = ax.get_legend_handles_labels()
 510 |             print("labels",labels)
 511 |             ax.legend(handles[::-1],labels[::-1], prop={'size': 10},markerscale=3, numpoints= 1,loc=0)
 512 | 
 513 | 
 514 |         #ticks = [tick for tick in plt.gca().get_xticklabels()]
 515 |         #print("ticks 0 is",ticks[0].get_window_extent())
 516 |         '''
 517 |         plt.text(-0.07, -0.145, 'label:', horizontalalignment='center',fontsize=fontsize,
 518 |                 verticalalignment='center', transform=ax.transAxes)
 519 |         plt.text(-0.07, -0.25, 'qs:', horizontalalignment='center',fontsize=fontsize,
 520 |                 verticalalignment='center', transform=ax.transAxes)
 521 |         '''        
 522 |         filename =filename+'.'+self.figureformat
 523 |         if(not(ylim==None)):
 524 |             plt.ylim(ylim)
 525 |         if(self.figureformat=='jpg'):
 526 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=300)       
 527 |         else:
 528 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
 529 |                     
 530 |         return fig, ax
 531 |         
 532 |     
 533 |     def plot_bar2value(self,barname,barvalue, barvalue2,
 534 |                  filename='barplot',
 535 |                  markersize=2,
 536 |                  yname='Frequency',
 537 |                  color='blue',
 538 |                  fig=None,
 539 |                  showlegend=False,
 540 |                  legend=['precision','recall'],
 541 |                  yrange = None,
 542 |                  ax=None,
 543 |                  fontsize=25,
 544 |                  showvalues = False,
 545 |                  legend_loc="upper left",
 546 |                  hatch=None):
 547 |         if(ax==None):
 548 |             # setup figures
 549 |             fig = plt.figure(figsize=self.figuresize)
 550 |             fig, ax = plt.subplots(figsize=self.figuresize)
 551 |             plt.rcParams.update({'font.size': fontsize})
 552 |             plt.rcParams["font.weight"] = "bold"
 553 |             plt.rcParams["axes.labelweight"] = "bold"
 554 |             plt.rcParams["lines.linewidth"] = self.linewidth
 555 |             plt.rcParams["lines.markersize"] = markersize
 556 |             plt.rcParams["font.sans-serif"] = 'Arial' 
 557 |         width=0.3
 558 |         x = np.arange(len(barname))
 559 |         ax.bar(x-width/2,barvalue,width,color=color[0],
 560 |                 label=legend[0])
 561 |         ax.bar(x+width/2,barvalue2,width, color=color[1],
 562 |                hatch=hatch,
 563 |                 label=legend[1])
 564 |         
 565 | 
 566 | 
 567 |         ax.set_ylabel(yname,fontsize=fontsize)
 568 |         #ax.set_title('Scores by group and gender')
 569 |         ax.set_xticks(x)
 570 |         #ax.set_xticklabels(barname,rotation='vertical')
 571 |         #ax.set_xticklabels(barname,rotation=45)
 572 |         ax.set_xticklabels(barname,rotation='horizontal')
 573 |         plt.xlim(x[0]-0.5,x[-1]+0.5)
 574 |         if(not(yrange==None)):
 575 |             plt.ylim(yrange[0],yrange[1])
 576 |             
 577 |         matplotlib.rc('xtick', labelsize=fontsize) 
 578 |         matplotlib.rc('ytick', labelsize=fontsize) 
 579 | 
 580 |         #ax.legend()
 581 |         
 582 |         if(showvalues==True):
 583 |             for i, v in enumerate(barvalue):
 584 |                 ax.text(i - 0.33,v + 0.1, "{:.1f}".format(v), color=color[0], fontweight='bold',)
 585 |     
 586 |             for i, v in enumerate(barvalue2):
 587 |                 ax.text(i + .10,v + 0.2, "{:.1f}".format(v), color=color[1], fontweight='bold',)
 588 | 
 589 |         if(showlegend):
 590 |             handles, labels = ax.get_legend_handles_labels()
 591 |             print("labels",labels)
 592 |             ax.legend(handles[::-1],labels[::-1], prop={'size': fontsize},markerscale=3, numpoints= 1,
 593 |                       loc=legend_loc,ncol=1, )#bbox_to_anchor=(0, 1.05))
 594 | 
 595 | 
 596 |         filename =filename+'.'+self.figureformat
 597 |         if(self.figureformat=='jpg'):
 598 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=300)       
 599 |         else:
 600 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
 601 |                     
 602 |         return fig, ax
 603 |     
 604 |     def plotconfusionmaitrix(self,confmatrix,
 605 |                              xlabel=None,ylabel=None,
 606 |                              filename='confmatrix',
 607 |                              keywordsize = 16,
 608 |                              font_scale=2,
 609 |                              figuresize=(10,10),
 610 |                              cmap="coolwarm", # "Blues"
 611 |                              vmin=0,
 612 |                              vmax=10,
 613 |                              fonttype='Arial',
 614 |                              title1="",
 615 |                              fmt=".1f",
 616 |                              xlabel1 = "Predicted label",
 617 |                              ylabel1="True label",):
 618 |         if(self.usecommand==True):
 619 |             return self.plotconfusionmaitrix_common1(confmatrix=confmatrix,
 620 |                                                      xlabel=xlabel,
 621 |                                                      ylabel=ylabel,
 622 |                                                      filename=filename,
 623 |                                                      keywordsize = keywordsize,
 624 |                                                      font_scale=font_scale,
 625 |                                                      figuresize=figuresize,
 626 |                                                      cmap=cmap,
 627 |                                                      vmin=vmin,
 628 |                                                      vmax=vmax,
 629 |                                                      fonttype=fonttype,
 630 |                                                      title1=title1,
 631 |                                                      xlabel1=xlabel1,
 632 |                                                      ylabel1=ylabel1,
 633 |                                                      fmt=fmt)
 634 |         
 635 |         sn.set(font=fonttype)
 636 |         #boundaries = [0.0, 0.045, 0.05, 0.055, 0.06,0.065,0.07,0.08,0.1,0.15, 1.0]  # custom boundaries
 637 |         boundaries = [0.0, 0.06,0.2, 0.25,0.3, 0.4,0.5,0.6,0.7, 0.8, 1.0]  # custom boundaries
 638 | 
 639 |         # here I generated twice as many colors, 
 640 |         # so that I could prune the boundaries more clearly
 641 |         #hex_colors = sns.light_palette('blue', n_colors=len(boundaries) * 2 + 2, as_cmap=False).as_hex()
 642 |         #hex_colors = [hex_colors[i] for i in range(0, len(hex_colors), 2)]
 643 |         #print("hex",hex_colors)
 644 |         # My color
 645 |         hex_colors = ['#ffffff','#ebf1f7',
 646 |  '#d3e4f3',
 647 |  '#bfd8ed',
 648 |  '#a1cbe2',
 649 |  '#7db8da',
 650 |  '#5ca4d0',
 651 |  '#3f8fc5',
 652 |  '#2676b8',
 653 |  '#135fa7',
 654 |  '#08488e']
 655 |         '''
 656 |         ['#e5eff9',
 657 |  '#d3e4f3',
 658 |  '#bfd8ed',
 659 |  '#a1cbe2',
 660 |  '#7db8da',
 661 |  '#5ca4d0',
 662 |  '#3f8fc5',
 663 |  '#2676b8',
 664 |  '#135fa7',
 665 |  '#08488e']
 666 |         '''
 667 | 
 668 |         boundaries = [0.0, 0.03, 0.06,0.1,0.2,0.29,0.3,0.8,1.0]
 669 |         hex_colors = ['#F2F6FA','#ebf1f7','#FFB9C7','#FF1242', '#FF1242','#FF1242','#2676b8','#135fa7','#08488e']
 670 |         
 671 |         colors=list(zip(boundaries, hex_colors))
 672 | 
 673 |         custom_color_map = LinearSegmentedColormap.from_list(
 674 |             name='custom_navy',
 675 |             colors=colors,
 676 |             )
 677 | 
 678 |         tol=1e-4
 679 |         labels = confmatrix
 680 |         confmatrix=confmatrix*(confmatrix>0.35)
 681 |         print("confmatrix",confmatrix+tol)
 682 |         df_cm = pd.DataFrame(confmatrix+tol,xlabel,ylabel)
 683 |         plt.figure(figsize=figuresize)
 684 |         sn.set(font_scale=font_scale) # for label size
 685 |         g = sn.heatmap(df_cm, 
 686 |                        linewidths=0.3,
 687 |                        linecolor="grey",
 688 |                        cmap=custom_color_map,
 689 |                        #annot=True, 
 690 |                        annot  = labels,
 691 |                        annot_kws={"size": keywordsize},fmt=".1f",
 692 |                        #mask=df_cm < 0.02,
 693 |                        vmin=vmin+tol,
 694 |                        vmax=vmax,
 695 |                        cbar=False,
 696 |                        #cbar_kws={"ticks":[0.1,0.3,1,3,10]},
 697 |                        #norm=LogNorm(),
 698 |                        #legend=False,
 699 |                        ) # font size
 700 |         #g.cax.set_visible(False)
 701 |         #sn.heatmap(df, cbar=False) 
 702 | 
 703 |         g.set_yticklabels(labels=g.get_yticklabels(), va='center')
 704 |         filename =filename+'.'+self.figureformat
 705 |         plt.ylabel(ylabel1)
 706 |         plt.xlabel(xlabel1)  
 707 |         plt.title("Overall accuracy:"+"{:.1f}".format(np.trace(confmatrix)),
 708 |                   fontweight="bold",
 709 |                   pad=32)
 710 |         g.set_xticklabels(g.get_xticklabels(), rotation = 0)
 711 |         
 712 | 
 713 |         if(self.figureformat=='jpg'):
 714 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=300)       
 715 |         else:
 716 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
 717 |             
 718 |         return 0
 719 | 
 720 | 
 721 |     def plotconfusionmaitrix_common1(self,confmatrix,
 722 |                              xlabel=None,ylabel=None,
 723 |                              filename='confmatrix',
 724 |                              keywordsize = 16,
 725 |                              font_scale=2,
 726 |                              figuresize=(10,10),
 727 |                              cmap="vlag",
 728 |                              vmin=0,
 729 |                              vmax=10,
 730 |                              fonttype='Arial',
 731 |                              title1="",
 732 |                              fmt=".1f",
 733 |                              xlabel1 = "Predicted label",
 734 |                              ylabel1="True label",
 735 |                              ):
 736 |         print("Use common confusion matrix plot!")
 737 |         sn.set(font=fonttype)
 738 |         #boundaries = [0.0, 0.045, 0.05, 0.055, 0.06,0.065,0.07,0.08,0.1,0.15, 1.0]  # custom boundaries
 739 |         boundaries = [0.0, 0.06,0.2, 0.25,0.3, 0.4,0.5,0.6,0.7, 0.8, 1.0]  # custom boundaries
 740 | 
 741 |         # here I generated twice as many colors, 
 742 |         # so that I could prune the boundaries more clearly
 743 |         #hex_colors = sns.light_palette('blue', n_colors=len(boundaries) * 2 + 2, as_cmap=False).as_hex()
 744 |         #hex_colors = [hex_colors[i] for i in range(0, len(hex_colors), 2)]
 745 |         #print("hex",hex_colors)
 746 |         # My color
 747 |         hex_colors = ['#ffffff','#ebf1f7',
 748 |  '#d3e4f3',
 749 |  '#bfd8ed',
 750 |  '#a1cbe2',
 751 |  '#7db8da',
 752 |  '#5ca4d0',
 753 |  '#3f8fc5',
 754 |  '#2676b8',
 755 |  '#135fa7',
 756 |  '#08488e']
 757 |         '''
 758 |         ['#e5eff9',
 759 |  '#d3e4f3',
 760 |  '#bfd8ed',
 761 |  '#a1cbe2',
 762 |  '#7db8da',
 763 |  '#5ca4d0',
 764 |  '#3f8fc5',
 765 |  '#2676b8',
 766 |  '#135fa7',
 767 |  '#08488e']
 768 |         '''
 769 | 
 770 |         boundaries = [0.0, 0.03, 0.06,0.1,0.2,0.29,0.3,0.8,1.0]
 771 |         hex_colors = ['#F2F6FA','#ebf1f7','#FFB9C7','#FF1242', '#FF1242','#FF1242','#2676b8','#135fa7','#08488e']
 772 |         
 773 |         colors=list(zip(boundaries, hex_colors))
 774 | 
 775 |         custom_color_map = LinearSegmentedColormap.from_list(
 776 |             name='custom_navy',
 777 |             colors=colors,
 778 |             )
 779 | 
 780 |         tol=1e-4
 781 |         labels = confmatrix
 782 |         #confmatrix=confmatrix*(confmatrix>0.35)
 783 |         #print("confmatrix",confmatrix+tol)
 784 |         df_cm = pd.DataFrame(confmatrix+tol,xlabel,ylabel)
 785 |         plt.figure(figsize=figuresize)
 786 |         sn.set(font_scale=font_scale) # for label size
 787 |         g = sn.heatmap(-df_cm, 
 788 |                        linewidths=0.3,
 789 |                        linecolor="grey",
 790 |                        cmap=cmap,
 791 |                        #annot=True, 
 792 |                        annot  = labels,
 793 |                        annot_kws={"size": keywordsize},fmt=fmt,
 794 |                        #mask=df_cm < 0.02,
 795 |                        #vmin=vmin+tol,
 796 |                        #vmax=vmax,
 797 |                        cbar=False,
 798 |                        center=0,
 799 |                        #cbar_kws={"ticks":[0.1,0.3,1,3,10]},
 800 |                        #norm=LogNorm(),
 801 |                        #legend=False,
 802 |                        ) # font size
 803 |         #g.cax.set_visible(False)
 804 |         #sn.heatmap(df, cbar=False) 
 805 | 
 806 |         g.set_yticklabels(labels=g.get_yticklabels(), va='center')
 807 |         filename =filename+'.'+self.figureformat
 808 |         plt.ylabel(ylabel1)
 809 |         plt.xlabel(xlabel1)  
 810 |         print("trece",np.trace(confmatrix),confmatrix)
 811 |         plt.title(title1,
 812 |                   fontweight="bold", 
 813 |                   fontsize=keywordsize*1.1,
 814 |                   pad=40)
 815 |         g.set_xticklabels(g.get_xticklabels(), rotation = 0)
 816 |         
 817 | 
 818 |         if(self.figureformat=='jpg'):
 819 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=300)       
 820 |         else:
 821 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
 822 |             
 823 |         return 0
 824 |     
 825 |     def plotconfusionmaitrix_common(self,confmatrix,
 826 |                              xlabel=None,ylabel=None,
 827 |                              filename='confmatrix',
 828 |                              keywordsize = 16,
 829 |                              font_scale=2,
 830 |                              figuresize=(10,10),
 831 |                              cmap='vlag',#sn.diverging_palette(240, 10, n=9),
 832 |                              vmin=-5,
 833 |                              vmax=10,
 834 |                              center=0,
 835 |                              fonttype='Arial'):
 836 |         
 837 |         cmap = LinearSegmentedColormap.from_list('RedWhiteGreen', ['red', 'white', 'green'])
 838 | 
 839 | 
 840 |         sn.set(font=fonttype)
 841 | 
 842 |         tol=1e-4
 843 |         labels = (confmatrix+0.05)*(np.abs(confmatrix)>0.1)
 844 |         labels = list()
 845 |         for i in range(confmatrix.shape[0]):
 846 |             temp = list()
 847 |             for j in range(confmatrix.shape[1]):
 848 |                 a = confmatrix[i,j]
 849 |                 if(a>0.1):
 850 |                     temp.append("+"+"{0:.1f}".format(a))
 851 |                 if(a<-0.1):
 852 |                     temp.append("{0:.1f}".format(a))
 853 |                 if(a<=0.1 and a>=-0.1):
 854 |                     temp.append(str(0.0))                    
 855 |             labels.append(temp)
 856 |         #labels = (confmatrix+0.05)*(np.abs(confmatrix)>0.1)
 857 | 
 858 |         print("labels",labels)
 859 | 
 860 |         confmatrix=confmatrix=confmatrix*(np.abs(confmatrix)>0.7)
 861 |         
 862 |         print("confmatrix",confmatrix+tol)
 863 |         df_cm = pd.DataFrame(confmatrix+tol,xlabel,ylabel)
 864 |         plt.figure(figsize=figuresize)
 865 |         sn.set(font_scale=font_scale) # for label size
 866 |         g = sn.heatmap(df_cm, 
 867 |                        linewidths=12.0,
 868 |                        linecolor="grey",
 869 |                        cmap=cmap,
 870 |                        center=center,
 871 |                        #annot=True, 
 872 |                        annot  = labels,
 873 |                        annot_kws={"size": keywordsize},fmt="s",#fmt="{0:+.1f}",
 874 |                        #mask=df_cm < 0.02,
 875 |                        vmin=vmin,
 876 |                        vmax=vmax,
 877 |                        cbar=False,
 878 |                        #cbar_kws={"ticks":[0.1,0.3,1,3,10]},
 879 |                        #norm=LogNorm(),
 880 |                        #legend=False,
 881 |                        ) # font size
 882 |         #g.cax.set_visible(False)
 883 |         #sn.heatmap(df, cbar=False) 
 884 | 
 885 |         g.set_yticklabels(labels=g.get_yticklabels(), va='center')
 886 |         filename =filename+'.'+self.figureformat
 887 |         plt.ylabel("ML API")
 888 |         plt.xlabel("Dataset",)  
 889 |         #plt.title("Overall accuracy:"+"{:.1f}".format(np.trace(confmatrix)),
 890 |         #          fontweight="bold",
 891 |         #          pad=32)
 892 |         g.set_xticklabels(g.get_xticklabels(), rotation = 0)
 893 |         
 894 | 
 895 |         if(self.figureformat=='jpg'):
 896 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight',dpi=40)       
 897 |         else:
 898 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight')       
 899 |             
 900 |         return 0
 901 |     
 902 |     def reward_vs_confidence(self,
 903 |                              BaseID = 100,
 904 |                              ModelID=[100,0,1,2],
 905 |                              confidencerange = (0.1,0.2,0.3,0.4,0.5,0.6,0.7,.99,1),
 906 |                              prob_range=None,
 907 |                              datapath='path/to/imagenet/result/val_performance'):
 908 |         """
 909 |         Run a small experiment on solving a Bernoulli bandit with K slot machines,
 910 |         each with a randomly initialized reward probability.
 911 |     
 912 |         Args:
 913 |             K (int): number of slot machiens.
 914 |             N (int): number of time steps to try.
 915 |         """
 916 |         datapath = self.datapath
 917 |         print('reward datapath',datapath)
 918 |         b0 = BernoulliBanditwithData(ModelID=ModelID,datapath=datapath)
 919 |         K = len(ModelID)
 920 |         print ("Data generated Bernoulli bandit has reward probabilities:\n", b0.probas)
 921 |         print ("The best machine has index: {} and proba: {}".format(
 922 |             max(range(K), key=lambda i: b0.probas[i]), max(b0.probas)))
 923 |         Params0 = context_params(ModelID=ModelID,datapath=datapath)
 924 |         #confidencerange = (0.02,0.03,0.04,0.05,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.99,0.9999,1)
 925 |         #confidencerange = (0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.9999,1)
 926 |         if(not(prob_range==None)):
 927 |             confidencerange = self.mlmodels.prob2qvalue(prob_interval=prob_range,conf_id=BaseID)
 928 |         BaseAccuracy, Others =self.mlmodels.accuracy_condition_score_List(ScoreRange=confidencerange,BaseID=BaseID,ModelID=ModelID)
 929 | 
 930 |         print(BaseAccuracy, Others)
 931 |         CDF = Params0.BaseModel.Compute_Prob_vs_Score(ScoreRange=confidencerange)
 932 |         print(CDF)
 933 |         plot_reward_vs_confidence(confidencerange, BaseAccuracy,Others, ModelID,"model reward compare_ModelID_{}.png".format(ModelID),CDF)
 934 | 
 935 |     def reward_vs_prob(self,
 936 |                        BaseID = 100,
 937 |                        ModelID=[100,0,1,2],
 938 |                        confidencerange = (0.1,0.2,0.3,0.4,0.5,0.6,0.7,.99,1),
 939 |                        prob_range=None,
 940 |                        datapath='path/to/imagenet/result/val_performance',
 941 |                        dataname='imagenet_val',
 942 |                        context=None):
 943 |         """
 944 |         compute and plot reward as a function of the probability of not using 
 945 |         the basemodel. 
 946 |     
 947 |         Args:
 948 |             See the name.
 949 |         """
 950 |         datapath = self.datapath
 951 |         print('reward datapath',datapath)
 952 |         if(not(prob_range==None)):
 953 |             confidencerange = self.mlmodels.prob2qvalue(prob_interval=prob_range,conf_id=BaseID,context = context)
 954 |         BaseAccuracy, Others =self.mlmodels.accuracy_condition_score_list(ScoreRange=confidencerange,BaseID=BaseID,ModelID=ModelID,context=context)
 955 |         print('Base Accuracy', BaseAccuracy, 'Other',Others)
 956 |         CDF = self.mlmodels.compute_prob_vs_score(ScoreRange=confidencerange,context = context)
 957 |         print('CDF',CDF)
 958 |         self._plot_reward_vs_prob(CDF, BaseAccuracy,Others, ModelID,self.folder+"Reward_vs_Prob_BaseID_{}_{}_context_{}.{}".format(BaseID,dataname,context,self.figureformat),CDF)
 959 | 
 960 |     def reward_vs_prob_pdf(self,
 961 |                        BaseID = 100,
 962 |                        ModelID=[100,0,1,2],
 963 |                        confidencerange = (0.1,0.2,0.3,0.4,0.5,0.6,0.7,.99,1),
 964 |                        prob_range=None,
 965 |                        datapath='path/to/imagenet/result/val_performance',
 966 |                        dataname='imagenet_val',
 967 |                        context=None):
 968 |         """
 969 |         compute and plot reward as a function of the probability of not using 
 970 |         the basemodel. 
 971 |     
 972 |         Args:
 973 |             See the name.
 974 |         """
 975 |         datapath = self.datapath
 976 |         print('reward datapath',datapath)
 977 |         if(not(prob_range==None)):
 978 |             confidencerange = self.mlmodels.prob2qvalue(prob_interval=prob_range,conf_id=BaseID,context = context)
 979 |         BaseAccuracy, Others =self.mlmodels.accuracy_condition_score_list(ScoreRange=confidencerange,BaseID=BaseID,ModelID=ModelID,context=context)
 980 |         print('Base Accuracy', BaseAccuracy, 'Other',Others)
 981 |         CDF = self.mlmodels.compute_prob_vs_score(ScoreRange=confidencerange,context = context)
 982 |         print('CDF',CDF)
 983 |         self._plot_reward_vs_prob(CDF, BaseAccuracy,Others, ModelID,self.folder+"Reward_vs_Prob_BaseID_{}_{}_context_{}.{}".format(BaseID,dataname,context,self.figureformat),CDF)
 984 | 
 985 |         if(not(prob_range==None)):
 986 |             base_pdf,other_pdf = self.mlmodels.accuracy_condition_score_list_cdf2pdf(prob_range,BaseAccuracy,Others,diff = False)
 987 |             print('base pdf',base_pdf)
 988 |             print('other pdf',other_pdf)
 989 |             self._plot_reward_vs_prob(CDF, base_pdf,other_pdf, ModelID,self.folder+"Reward_vs_Probpdf_diff_BaseID_{}_{}_context_{}.{}".format(BaseID,dataname,context,self.figureformat),CDF)
 990 |             self._plot_reward_vs_prob(confidencerange, base_pdf,other_pdf, ModelID,self.folder+"Reward_vs_conf_pdf_diff_BaseID_{}_{}_context_{}.{}".format(BaseID,dataname,context,self.figureformat),CDF)
 991 | 
 992 | 
 993 |     def qvalue_vs_prob(self,
 994 |                        confidence_range = None,
 995 |                        BaseID = 100,
 996 |                        prob_range = None,
 997 |                        dataname = 'imagenet_val',
 998 |                        context=None):
 999 |         if(not(prob_range==None)):
1000 |             confidence_range = self.mlmodels.prob2qvalue(prob_interval=prob_range,conf_id=BaseID,context=context)        
1001 |         filename = self.folder+"Conf_vs_prob_BaseID_{}_{}_context_{}.{}".format(BaseID,dataname,context,self.figureformat)
1002 |         prob = self.mlmodels.compute_prob_wrt_confidence(confidence_range=confidence_range,BaseID = BaseID,context=context)
1003 |         self._plot_q_value_vs_prob(confidence_range,prob,filename)
1004 |         return 0
1005 |     
1006 |     def _plot_reward_vs_prob(self, confidence_range, base_acc, model_acc, model_names, figname, CDF):
1007 |         """
1008 |         Plot the results by multi-armed bandit solvers.
1009 |     
1010 |         Args:
1011 |             solvers (list<Solver>): All of them should have been fitted.
1012 |             solver_names (list<str)
1013 |             figname (str)
1014 |         """
1015 |         fig = plt.figure(figsize=self.figuresize)
1016 |         plt.rcParams.update({'font.size': self.fontsize})  
1017 |         plt.rcParams["font.weight"] = "bold"
1018 |         plt.rcParams["axes.labelweight"] = "bold"   
1019 |         plt.rcParams["lines.linewidth"] = self.linewidth
1020 |         plt.rcParams["lines.markersize"] = self.markersize
1021 |         k=0
1022 |         for i in model_acc:
1023 |             plt.plot(confidence_range, i, label=model_names[k],marker='x')
1024 |             k=k+1
1025 |     
1026 |         plt.xlabel('Fraction of Low Confidence Data')
1027 |         plt.ylabel('Accuracy on Low Confidence Data')
1028 |         plt.legend(loc=8, ncol=5)
1029 |         plt.savefig(figname, format=self.figureformat, bbox_inches='tight')
1030 |     
1031 |     def _plot_q_value_vs_prob(self,confidence_range,prob,figname):
1032 |         fig = plt.figure(figsize=self.figuresize)
1033 |         plt.rcParams.update({'font.size': self.fontsize})
1034 |         plt.rcParams["font.weight"] = "bold"
1035 |         plt.rcParams["axes.labelweight"] = "bold"
1036 |         plt.rcParams["lines.linewidth"] = self.linewidth
1037 |         plt.rcParams["lines.markersize"] = self.markersize
1038 |         
1039 |         plt.plot(prob,confidence_range,marker='x')
1040 |         plt.xlabel('Fraction of Low Confidence Data')
1041 |         plt.ylabel('Confidence Threshold')
1042 |         #plt.legend(loc=9, ncol=5)
1043 |         plt.savefig(figname, format=self.figureformat, bbox_inches='tight')
1044 |         
1045 |     
1046 |     def plot_accuracy(self,
1047 |                       namestick=['bm', 's0','s1','s2'],
1048 |                       model_id=[100,0,1,2],
1049 |                       base_id = 100,
1050 |                       datapath='path/to/imagenet/result/val_performance',
1051 |                       dataname='imagenet_val'):
1052 |         datapath = self.datapath
1053 |         print('reward datapath',datapath)
1054 |         BaseAccuracy, Others =self.mlmodels.accuracy_condition_score_list(ScoreRange=[1],BaseID=base_id,ModelID=model_id)
1055 |         print('Base Accuracy', BaseAccuracy, 'Other',len(Others))
1056 |         fig = plt.figure(figsize=self.figuresize)
1057 |         plt.rcParams.update({'font.size': self.fontsize})
1058 |         plt.rcParams["font.weight"] = "bold"
1059 |         plt.rcParams["axes.labelweight"] = "bold"
1060 |         plt.rcParams["lines.linewidth"] = self.linewidth
1061 |         plt.rcParams["lines.markersize"] = self.markersize
1062 |         
1063 |         flattened = [val for sublist in Others for val in sublist]
1064 |         print('flat others',flattened)
1065 |         acc = flattened
1066 |         #plt.bar(range(len(acc)),acc,color=self.colorset,tick_label=namestick)
1067 |         bars = plt.bar(range(len(acc)),acc,color=self.colorset,hatch="/")
1068 |         #plt.bar(range(len(acc)),acc,color='r',edgecolor='k',hatch="/")
1069 |         #ax = plt.gca()
1070 |         #ax.bar(range(1, 5), range(1, 5), color='red', edgecolor='black', hatch="/")
1071 |         #patterns = ('-', '+', 'x', '\\', '*', 'o', 'O', '.')
1072 |         patterns = ('-', '\\', '/', 'o', 'O', '.','+', 'x','*')
1073 |         for bar, pattern in zip(bars, patterns):
1074 |             bar.set_hatch(pattern)
1075 |         #ax.set_hatch('/')
1076 |         plt.xlabel('ML Services')
1077 |         plt.ylabel('Accuracy')
1078 |         plt.ylim(min(acc)-0.01)
1079 |         #set_xticklabels(namestick)
1080 |         matplotlib.pyplot.xticks(range(len(acc)), namestick)
1081 | 
1082 |         #plt.legend(loc=9, ncol=5)
1083 |         figname = self.folder+"accuracy_dataset_{}.{}".format(dataname,self.figureformat)
1084 |         plt.savefig(figname, format=self.figureformat, bbox_inches='tight')       
1085 |         
1086 |     def plot_umaps(self,
1087 |                  fit_data=[[1,2,3],[4,5,6]],
1088 |                  data=[[1,2,3],[4,5,6]],
1089 |                  filename="umap",
1090 |                  markersize=2,
1091 |                  markershape=["8","s"],
1092 |                  yname='Frequency',
1093 |                  color=['blue','red'],
1094 |                  fig=None,
1095 |                  showlegend=False,
1096 |                  legend=['male','female'],
1097 |                  yrange = None,
1098 |                  ax=None,
1099 |                  fontsize=30,
1100 |                  figureformat="jpg",):
1101 |         # generate embeddings
1102 |         reducer = umap.UMAP(random_state=42)
1103 |         reducer.fit(fit_data[:,0:-1])
1104 |         for i in range(len(data)):
1105 |             datum1 = data[i]
1106 |             embedding = reducer.transform(datum1[:,0:-1])
1107 |             plt.scatter(embedding[:, 0], embedding[:, 1], c=datum1[:,-1], cmap='Spectral', s=markersize,marker=markershape[i],label=legend[i])
1108 |   
1109 | #        plt.legend(loc=8, ncol=5)
1110 |         
1111 |         lgnd = plt.legend(loc="lower left", scatterpoints=1, fontsize=10)
1112 |         for handle in lgnd.legendHandles:
1113 |             handle.set_sizes([2.0])
1114 |     
1115 |         self.figureformat = figureformat
1116 |         if(self.figureformat=='jpg'):
1117 |             plt.savefig(filename+".jpg", format=self.figureformat, bbox_inches='tight',dpi=300)       
1118 |         else:
1119 |             plt.savefig(filename, format=self.figureformat, bbox_inches='tight') 
1120 |         plt.close("all")
1121 |         return 
1122 |     
1123 | def plot_results(solvers, solver_names, figname):
1124 |     """
1125 |     Plot the results by multi-armed bandit solvers.
1126 | 
1127 |     Args:
1128 |         solvers (list<Solver>): All of them should have been fitted.
1129 |         solver_names (list<str)
1130 |         figname (str)
1131 |     """
1132 |     assert len(solvers) == len(solver_names)
1133 |     assert all(map(lambda s: isinstance(s, Solver), solvers))
1134 |     assert all(map(lambda s: len(s.regrets) > 0, solvers))
1135 | 
1136 |     b = solvers[0].bandit
1137 | 
1138 |     fig = plt.figure(figsize=(14, 4))
1139 |     fig.subplots_adjust(bottom=0.3, wspace=0.3)
1140 | 
1141 |     ax1 = fig.add_subplot(131)
1142 |     ax2 = fig.add_subplot(132)
1143 |     ax3 = fig.add_subplot(133)
1144 | 
1145 |     # Sub.fig. 1: Regrets in time.
1146 |     for i, s in enumerate(solvers):
1147 |         ax1.plot(range(len(s.regrets)), s.regrets, label=solver_names[i])
1148 | 
1149 |     ax1.set_xlabel('Time step')
1150 |     ax1.set_ylabel('Cumulative regret')
1151 |     ax1.legend(loc=9, bbox_to_anchor=(1.82, -0.25), ncol=5)
1152 |     ax1.grid('k', ls='--', alpha=0.3)
1153 | 
1154 |     # Sub.fig. 2: Probabilities estimated by solvers.
1155 |     sorted_indices = sorted(range(b.n), key=lambda x: b.probas[x])
1156 |     ax2.plot(range(b.n), [b.probas[x] for x in sorted_indices], 'k--', markersize=12)
1157 |     for s in solvers:
1158 |         ax2.plot(range(b.n), [s.estimated_probas[x] for x in sorted_indices], 'x', markeredgewidth=2)
1159 |     ax2.set_xlabel('Actions sorted by ' + r'$\theta$')
1160 |     ax2.set_ylabel('Estimated')
1161 |     ax2.grid('k', ls='--', alpha=0.3)
1162 | 
1163 |     # Sub.fig. 3: Action counts
1164 |     for s in solvers:
1165 |         ax3.plot(range(b.n), np.array(s.counts) / float(len(solvers[0].regrets)), ls='steps', lw=2)
1166 |     ax3.set_xlabel('Actions')
1167 |     ax3.set_ylabel('Frac. # trials')
1168 |     ax3.grid('k', ls='--', alpha=0.3)
1169 | 
1170 |     plt.savefig(figname)
1171 | 
1172 | def plot_reward_vs_confidence(confidence_range, base_acc, model_acc, model_names, figname, CDF):
1173 |     """
1174 |     Plot the results by multi-armed bandit solvers.
1175 | 
1176 |     Args:
1177 |         solvers (list<Solver>): All of them should have been fitted.
1178 |         solver_names (list<str)
1179 |         figname (str)
1180 |     """
1181 |     fig = plt.figure(figsize=(14, 6))
1182 |     fig.subplots_adjust(bottom=0.3, wspace=0.3)
1183 | 
1184 |     ax1 = fig.add_subplot(121)
1185 |     #ax2 = fig.add_subplot(212)
1186 |     ax3 = fig.add_subplot(122)
1187 |     #ax4 = fig.add_subplot(214)
1188 | 
1189 |     # Sub.fig. 1: Regrets in time.
1190 |     k=0
1191 |     for i in model_acc:
1192 |         ax1.plot(confidence_range, i, label=model_names[k],marker='x')
1193 |         k=k+1
1194 | 
1195 |     ax1.set_xlabel('Probability threshold')
1196 |     ax1.set_ylabel('Reward Value')
1197 |     ax1.legend(loc=9, bbox_to_anchor=(1.82, -0.25), ncol=5)
1198 |     ax1.grid('k', ls='--', alpha=0.3)
1199 | 
1200 |     
1201 |     # Sub.fig. 2: Regrets in time.
1202 |     k=0
1203 |     for i in model_acc:
1204 |         ax3.plot(confidence_range, CDF, label=model_names[k],marker='x')
1205 |         k=k+1
1206 | 
1207 |     ax3.set_xlabel('Probability threshold')
1208 |     ax3.set_ylabel('CDF')
1209 |     ax3.legend(loc=9, bbox_to_anchor=(1.82, -0.25), ncol=5)
1210 |     ax3.grid('k', ls='--', alpha=0.3)
1211 |     
1212 |     
1213 |     plt.savefig(figname, dpi=1000)
1214 |     
1215 | def plot_reward_vs_confidence_old(confidence_range, base_acc, model_acc, model_names, figname, CDF):
1216 |     """
1217 |     Plot the results by multi-armed bandit solvers.
1218 | 
1219 |     Args:
1220 |         solvers (list<Solver>): All of them should have been fitted.
1221 |         solver_names (list<str)
1222 |         figname (str)
1223 |     """
1224 |     fig = plt.figure(figsize=(14, 4))
1225 |     fig.subplots_adjust(bottom=0.3, wspace=0.3)
1226 | 
1227 |     ax1 = fig.add_subplot(221)
1228 |     ax2 = fig.add_subplot(222)
1229 |     ax3 = fig.add_subplot(223)
1230 |     ax4 = fig.add_subplot(224)
1231 | 
1232 |     # Sub.fig. 1: Regrets in time.
1233 |     k=0
1234 |     for i in model_acc:
1235 |         ax1.plot(confidence_range, i, label=model_names[k],marker='x')
1236 |         k=k+1
1237 | 
1238 |     ax1.set_xlabel('Probability threshold')
1239 |     ax1.set_ylabel('Reward Value')
1240 |     ax1.legend(loc=9, bbox_to_anchor=(1.82, -0.25), ncol=5)
1241 |     ax1.grid('k', ls='--', alpha=0.3)
1242 | 
1243 |     # Sub.fig. 2: Regrets in time.
1244 |     k=0
1245 |     for i in model_acc:
1246 |         ax2.plot(confidence_range, np.array(i)-np.asarray(base_acc), label=model_names[k],marker='x')
1247 |         k=k+1
1248 | 
1249 |     ax2.set_xlabel('Probability threshold')
1250 |     ax2.set_ylabel('Reward Value-Base')
1251 |     ax2.legend(loc=9, bbox_to_anchor=(1.82, -0.25), ncol=5)
1252 |     ax2.grid('k', ls='--', alpha=0.3)
1253 |     
1254 |     # Sub.fig. 2: Regrets in time.
1255 |     k=0
1256 |     for i in model_acc:
1257 |         ax3.plot(confidence_range, CDF, label=model_names[k],marker='x')
1258 |         k=k+1
1259 | 
1260 |     ax3.set_xlabel('Probability threshold')
1261 |     ax3.set_ylabel('CDF')
1262 |     ax3.legend(loc=9, bbox_to_anchor=(1.82, -0.25), ncol=5)
1263 |     ax3.grid('k', ls='--', alpha=0.3)
1264 |     
1265 |     
1266 |     # Sub.fig. 2: Regrets in time.
1267 |     k=0
1268 |     for i in model_acc:
1269 |         ax4.plot(confidence_range, (np.array(i)-np.asarray(base_acc))*np.asarray(CDF), label=model_names[k],marker='x')
1270 |         k=k+1
1271 | 
1272 |     ax4.set_xlabel('Probability threshold')
1273 |     ax4.set_ylabel('Reward*Prob')
1274 |     ax4.legend(loc=9, bbox_to_anchor=(1.82, -0.25), ncol=5)
1275 |     ax4.grid('k', ls='--', alpha=0.3)
1276 | 
1277 |     
1278 |     plt.savefig(figname, dpi=1000)
1279 |     
1280 |     
1281 | def reward_vs_confidence(N=1000, 
1282 |                          ModelID=[100,0,1,2,3,4],
1283 |                          ModelIndex = [0,1,2,3],
1284 |                          confidencerange = (0.1,0.2,0.3,0.4,0.5,0.6,0.7,.99,1),
1285 |                          datapath='path/to/imagenet/result/val_performance'):
1286 |     """
1287 |     Run a small experiment on solving a Bernoulli bandit with K slot machines,
1288 |     each with a randomly initialized reward probability.
1289 | 
1290 |     Args:
1291 |         K (int): number of slot machiens.
1292 |         N (int): number of time steps to try.
1293 |     """
1294 |     print('reward datapaht',datapath)
1295 |     b0 = BernoulliBanditwithData(ModelID=ModelID,datapath=datapath)
1296 |     K = len(ModelID)
1297 |     print ("Data generated Bernoulli bandit has reward probabilities:\n", b0.probas)
1298 |     print ("The best machine has index: {} and proba: {}".format(
1299 |         max(range(K), key=lambda i: b0.probas[i]), max(b0.probas)))
1300 |     Params0 = context_params(ModelID=ModelID,datapath=datapath)
1301 |     #confidencerange = (0.02,0.03,0.04,0.05,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.99,0.9999,1)
1302 |     #confidencerange = (0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.9999,1)
1303 | 
1304 |     BaseAccuracy, Others = Params0.BaseModel.Compute_Conditional_Accuracy_AmongModel_List(ScoreRange=confidencerange,BaseID=0,ModelID=ModelIndex)
1305 |     print(BaseAccuracy, Others)
1306 |     CDF = Params0.BaseModel.Compute_Prob_vs_Score(ScoreRange=confidencerange)
1307 |     print(CDF)
1308 |     
1309 |     #CDF1 = Compute_CDF_wrt_Score(ScoreRange=confidencerange)
1310 |     #print(CDF1)
1311 |     #print(Params0.BaseModel.Compute_Conditional_Accuracy(Score))
1312 |     #Params1 = context_params(ModelID=[2])
1313 |     #print(Params1.BaseModel.Compute_Conditional_Accuracy(Score))
1314 |     
1315 |     # Test for different combinaers
1316 |     #ParamsTest = BaseModel(ModelID=[0,1,3,4,5,100])
1317 |     #output = ParamsTest.Stacking_AllModels()
1318 |     # End of Test
1319 |     
1320 |     # print(ParamsTest.Compute_Conditional_Accuracy_AmongModel(ScoreBound=Score, ModelID = [0,1]))
1321 |     plot_reward_vs_confidence(confidencerange, BaseAccuracy,Others, ModelID,"model reward compare_ModelID_{}.png".format(ModelID),CDF)
1322 | 
1323 | 
1324 | def test_plotline():
1325 |     prange= [0.    , 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006, 0.0007,
1326 |        0.0008, 0.0009, 0.001 , 0.0011, 0.0012, 0.0013, 0.0014, 0.0015,
1327 |        0.0016, 0.0017, 0.0018, 0.0019, 0.002 , 0.0021, 0.0022, 0.0023,
1328 |        0.0024, 0.0025, 0.0026, 0.0027, 0.0028, 0.0029, 0.003 , 0.0031,
1329 |        0.0032, 0.0033, 0.0034, 0.0035, 0.0036, 0.0037, 0.0038, 0.0039,
1330 |        0.004 , 0.0041, 0.0042, 0.0043, 0.0044, 0.0045, 0.0046, 0.0047,
1331 |        0.0048, 0.0049, 0.005 , 0.0051, 0.0052, 0.0053, 0.0054, 0.0055,
1332 |        0.0056, 0.0057, 0.0058, 0.0059, 0.006 , 0.0061, 0.0062, 0.0063,
1333 |        0.0064, 0.0065, 0.0066, 0.0067, 0.0068, 0.0069, 0.007 , 0.0071,
1334 |        0.0072, 0.0073, 0.0074, 0.0075, 0.0076, 0.0077, 0.0078, 0.0079,
1335 |        0.008 , 0.0081, 0.0082, 0.0083, 0.0084, 0.0085, 0.0086, 0.0087,
1336 |        0.0088, 0.0089, 0.009 , 0.0091, 0.0092, 0.0093, 0.0094, 0.0095,
1337 |        0.0096, 0.0097, 0.0098, 0.0099, 0.01  , 0.0101, 0.0102, 0.0103,
1338 |        0.0104, 0.0105, 0.0106, 0.0107, 0.0108, 0.0109, 0.011 , 0.0111,
1339 |        0.0112, 0.0113, 0.0114, 0.0115, 0.0116, 0.0117, 0.0118, 0.0119,
1340 |        0.012 , 0.0121, 0.0122, 0.0123, 0.0124, 0.0125, 0.0126, 0.0127,
1341 |        0.0128, 0.0129, 0.013 , 0.0131, 0.0132, 0.0133, 0.0134, 0.0135,
1342 |        0.0136, 0.0137, 0.0138, 0.0139, 0.014 , 0.0141, 0.0142, 0.0143,
1343 |        0.0144, 0.0145, 0.0146, 0.0147, 0.0148, 0.0149, 0.015 , 0.0151,
1344 |        0.0152, 0.0153, 0.0154, 0.0155, 0.0156, 0.0157, 0.0158, 0.0159,
1345 |        0.016 , 0.0161, 0.0162, 0.0163, 0.0164, 0.0165, 0.0166, 0.0167,
1346 |        0.0168, 0.0169, 0.017 , 0.0171, 0.0172, 0.0173, 0.0174, 0.0175,
1347 |        0.0176, 0.0177, 0.0178, 0.0179, 0.018 , 0.0181, 0.0182, 0.0183,
1348 |        0.0184, 0.0185, 0.0186, 0.0187, 0.0188, 0.0189, 0.019 , 0.0191,
1349 |        0.0192, 0.0193, 0.0194, 0.0195, 0.0196, 0.0197, 0.0198, 0.0199,
1350 |        0.02  , 0.0201, 0.0202, 0.0203, 0.0204, 0.0205, 0.0206, 0.0207,
1351 |        0.0208, 0.0209, 0.021 , 0.0211, 0.0212, 0.0213, 0.0214, 0.0215,
1352 |        0.0216, 0.0217, 0.0218, 0.0219, 0.022 , 0.0221, 0.0222, 0.0223,
1353 |        0.0224, 0.0225, 0.0226, 0.0227, 0.0228, 0.0229, 0.023 , 0.0231,
1354 |        0.0232, 0.0233, 0.0234, 0.0235, 0.0236, 0.0237, 0.0238, 0.0239,
1355 |        0.024 , 0.0241, 0.0242, 0.0243, 0.0244, 0.0245, 0.0246, 0.0247,
1356 |        0.0248, 0.0249, 0.025 , 0.0251, 0.0252, 0.0253, 0.0254, 0.0255,
1357 |        0.0256, 0.0257, 0.0258, 0.0259, 0.026 , 0.0261, 0.0262, 0.0263,
1358 |        0.0264, 0.0265, 0.0266, 0.0267, 0.0268, 0.0269, 0.027 , 0.0271,
1359 |        0.0272, 0.0273, 0.0274, 0.0275, 0.0276, 0.0277, 0.0278, 0.0279,
1360 |        0.028 , 0.0281, 0.0282, 0.0283, 0.0284, 0.0285, 0.0286, 0.0287,
1361 |        0.0288, 0.0289, 0.029 , 0.0291, 0.0292, 0.0293, 0.0294, 0.0295,
1362 |        0.0296, 0.0297, 0.0298, 0.0299, 0.03  , 0.0301, 0.0302, 0.0303,
1363 |        0.0304, 0.0305, 0.0306, 0.0307, 0.0308, 0.0309, 0.031 , 0.0311,
1364 |        0.0312, 0.0313, 0.0314, 0.0315, 0.0316, 0.0317, 0.0318, 0.0319,
1365 |        0.032 , 0.0321, 0.0322, 0.0323, 0.0324, 0.0325, 0.0326, 0.0327,
1366 |        0.0328, 0.0329, 0.033 , 0.0331, 0.0332, 0.0333, 0.0334, 0.0335,
1367 |        0.0336, 0.0337, 0.0338, 0.0339, 0.034 , 0.0341, 0.0342, 0.0343,
1368 |        0.0344, 0.0345, 0.0346, 0.0347, 0.0348, 0.0349, 0.035 , 0.0351,
1369 |        0.0352, 0.0353, 0.0354, 0.0355, 0.0356, 0.0357, 0.0358, 0.0359,
1370 |        0.036 , 0.0361, 0.0362, 0.0363, 0.0364, 0.0365, 0.0366, 0.0367,
1371 |        0.0368, 0.0369, 0.037 , 0.0371, 0.0372, 0.0373, 0.0374, 0.0375,
1372 |        0.0376, 0.0377, 0.0378, 0.0379, 0.038 , 0.0381, 0.0382, 0.0383,
1373 |        0.0384, 0.0385, 0.0386, 0.0387, 0.0388, 0.0389, 0.039 , 0.0391,
1374 |        0.0392, 0.0393, 0.0394, 0.0395, 0.0396, 0.0397, 0.0398, 0.0399,
1375 |        0.04  , 0.0401, 0.0402, 0.0403, 0.0404, 0.0405, 0.0406, 0.0407,
1376 |        0.0408, 0.0409, 0.041 , 0.0411, 0.0412, 0.0413, 0.0414, 0.0415,
1377 |        0.0416, 0.0417, 0.0418, 0.0419, 0.042 , 0.0421, 0.0422, 0.0423,
1378 |        0.0424, 0.0425, 0.0426, 0.0427, 0.0428, 0.0429, 0.043 , 0.0431,
1379 |        0.0432, 0.0433, 0.0434, 0.0435, 0.0436, 0.0437, 0.0438, 0.0439,
1380 |        0.044 , 0.0441, 0.0442, 0.0443, 0.0444, 0.0445, 0.0446, 0.0447,
1381 |        0.0448, 0.0449, 0.045 , 0.0451, 0.0452, 0.0453, 0.0454, 0.0455,
1382 |        0.0456, 0.0457, 0.0458, 0.0459, 0.046 , 0.0461, 0.0462, 0.0463,
1383 |        0.0464, 0.0465, 0.0466, 0.0467, 0.0468, 0.0469, 0.047 , 0.0471,
1384 |        0.0472, 0.0473, 0.0474, 0.0475, 0.0476, 0.0477, 0.0478, 0.0479,
1385 |        0.048 , 0.0481, 0.0482, 0.0483, 0.0484, 0.0485, 0.0486, 0.0487,
1386 |        0.0488, 0.0489, 0.049 , 0.0491, 0.0492, 0.0493, 0.0494, 0.0495,
1387 |        0.0496, 0.0497, 0.0498, 0.0499]
1388 | 
1389 |     acc = [0.48301023, 0.48457155, 0.48538639, 0.48615516, 0.48668402,
1390 |        0.48743234, 0.48818995, 0.48874007, 0.48916215, 0.48976699,
1391 |        0.49029502, 0.49083267, 0.49127285, 0.49186667, 0.49235521,
1392 |        0.49291153, 0.49324094, 0.4937676 , 0.494199  , 0.49455204,
1393 |        0.49486084, 0.49522269, 0.49560935, 0.49594377, 0.49625499,
1394 |        0.49656768, 0.49680171, 0.497076  , 0.49740774, 0.49774282,
1395 |        0.49808112, 0.49844063, 0.49888367, 0.49907962, 0.49934593,
1396 |        0.4996519 , 0.50010442, 0.50044377, 0.50083441, 0.50119005,
1397 |        0.50157951, 0.50191593, 0.50229962, 0.50263862, 0.5029507 ,
1398 |        0.50321984, 0.50355179, 0.50382114, 0.50421764, 0.50475099,
1399 |        0.50509806, 0.50548435, 0.50571974, 0.50673374, 0.50709485,
1400 |        0.50754149, 0.50806022, 0.50838091, 0.50895068, 0.51405688,
1401 |        0.51405485, 0.51387681, 0.51375979, 0.51368061, 0.51363966,
1402 |        0.51358214, 0.51348813, 0.51320118, 0.5131013 , 0.51299855,
1403 |        0.51285864, 0.51261339, 0.51251116, 0.51239189, 0.51230296,
1404 |        0.51222542, 0.51213922, 0.51213295, 0.51199515, 0.51189603,
1405 |        0.51178252, 0.51170102, 0.51167787, 0.51146198, 0.51132532,
1406 |        0.51125551, 0.51083861, 0.51080367, 0.51065056, 0.51054177,
1407 |        0.51030458, 0.51009311, 0.50985171, 0.50952144, 0.50941722,
1408 |        0.50885601, 0.50872907, 0.5086227 , 0.50837746, 0.50827709,
1409 |        0.50811138, 0.50789465, 0.50776248, 0.50757616, 0.50723169,
1410 |        0.50710103, 0.50692262, 0.50636731, 0.50551236, 0.5052581 ,
1411 |        0.50467229, 0.50438479, 0.50419524, 0.50370731, 0.50344827,
1412 |        0.50315895, 0.50299427, 0.50237316, 0.50076062, 0.5000888 ,
1413 |        0.49904193, 0.49821103, 0.49782044, 0.49751325, 0.49721562,
1414 |        0.49615602, 0.49570051, 0.49546224, 0.49528738, 0.49499799,
1415 |        0.49480788, 0.49441211, 0.49400208, 0.4937852 , 0.49357293,
1416 |        0.4932995 , 0.49308115, 0.49274721, 0.49232387, 0.4904962 ,
1417 |        0.48979254, 0.48883763, 0.48723269, 0.48694961, 0.48664716,
1418 |        0.48383779, 0.4823387 , 0.48139062, 0.48097353, 0.48045641,
1419 |        0.47893606, 0.47857627, 0.4783139 , 0.47800683, 0.47767765,
1420 |        0.47749323, 0.47730572, 0.47711734, 0.47697098, 0.47674   ,
1421 |        0.47655673, 0.47617975, 0.47604766, 0.47593491, 0.4756809 ,
1422 |        0.47553382, 0.47541119, 0.47521898, 0.47502894, 0.47485214,
1423 |        0.47467929, 0.47456147, 0.47439464, 0.4742843 , 0.47419802,
1424 |        0.47407429, 0.47394389, 0.47376382, 0.47366419, 0.47347226,
1425 |        0.4733965 , 0.47327365, 0.47314522, 0.47295317, 0.47277204,
1426 |        0.47265397, 0.47254881, 0.47238721, 0.47231294, 0.47213071,
1427 |        0.47205963, 0.471965  , 0.47181219, 0.47166599, 0.4715344 ,
1428 |        0.47142736, 0.47133969, 0.47124544, 0.47120949, 0.47109536,
1429 |        0.47099978, 0.47084196, 0.47067891, 0.47054779, 0.47039573,
1430 |        0.47028735, 0.47016451, 0.47002245, 0.46977837, 0.46963242,
1431 |        0.46943925, 0.4692959 , 0.46914154, 0.46891011, 0.4687833 ,
1432 |        0.4685379 , 0.46843594, 0.46825524, 0.46778678, 0.46757136,
1433 |        0.46737609, 0.46692911, 0.46674504, 0.46645814, 0.46626084,
1434 |        0.46601046, 0.46587982, 0.46568659, 0.46549668, 0.46531255,
1435 |        0.46491423, 0.4644362 , 0.46398542, 0.4631161 , 0.46295977,
1436 |        0.46250332, 0.46236719, 0.46221666, 0.462093  , 0.46187842,
1437 |        0.46174634, 0.46159738, 0.46147783, 0.46137749, 0.46129638,
1438 |        0.4611781 , 0.46107324, 0.46094401, 0.46083739, 0.46074101,
1439 |        0.46072508, 0.46064278, 0.46052262, 0.46042853, 0.46034242,
1440 |        0.46028446, 0.46017712, 0.46011206, 0.46002659, 0.45995817,
1441 |        0.45986543, 0.45975698, 0.45968683, 0.45957428, 0.45942207,
1442 |        0.45930791, 0.45921235, 0.45910849, 0.45898494, 0.45888329,
1443 |        0.45879647, 0.45870982, 0.45870496, 0.45862491, 0.45850992,
1444 |        0.45846477, 0.4583252 , 0.45870034, 0.45860152, 0.4584608 ,
1445 |        0.45840916, 0.45837632, 0.45829484, 0.45822002, 0.45816921,
1446 |        0.45808426, 0.45801872, 0.4579592 , 0.45785556, 0.45777885,
1447 |        0.4577343 , 0.45766358, 0.45753936, 0.45752268, 0.45744507,
1448 |        0.45736837, 0.45728324, 0.45717934, 0.45703663, 0.45697995,
1449 |        0.45691548, 0.45679727, 0.45673414, 0.45666303, 0.45661996,
1450 |        0.4565089 , 0.45641751, 0.45633791, 0.45626128, 0.45619948,
1451 |        0.4561366 , 0.45613471, 0.45607387, 0.45597782, 0.45588608,
1452 |        0.45581065, 0.45568215, 0.4555245 , 0.45539021, 0.45530577,
1453 |        0.45521037, 0.4550916 , 0.45500052, 0.45498943, 0.45484803,
1454 |        0.45476247, 0.45469974, 0.45461052, 0.45449327, 0.45441162,
1455 |        0.4543233 , 0.45421517, 0.45414812, 0.45402163, 0.45396933,
1456 |        0.45382181, 0.45372327, 0.45364773, 0.4535485 , 0.45345609,
1457 |        0.45338647, 0.45332349, 0.45321917, 0.45318078, 0.45311913,
1458 |        0.45302852, 0.45289496, 0.45282775, 0.45291292, 0.45281203,
1459 |        0.45271895, 0.45259684, 0.45251492, 0.45226131, 0.45199698,
1460 |        0.45190208, 0.45177381, 0.45167107, 0.45156732, 0.45120557,
1461 |        0.4510243 , 0.45040894, 0.45016372, 0.41494005, 0.41482359,
1462 |        0.4147391 , 0.41467827, 0.41456255, 0.41442845, 0.41435356,
1463 |        0.41427217, 0.4141186 , 0.41393056, 0.41373277, 0.41356792,
1464 |        0.41346815, 0.41313181, 0.41306098, 0.41297357, 0.41284036,
1465 |        0.41271761, 0.41264731, 0.41260986, 0.41259229, 0.41252037,
1466 |        0.41246792, 0.41244859, 0.41239455, 0.41236259, 0.41230149,
1467 |        0.41226418, 0.41217959, 0.41212254, 0.41211362, 0.41207712,
1468 |        0.41202834, 0.4119794 , 0.41189217, 0.41186648, 0.41183323,
1469 |        0.41177104, 0.4117605 , 0.41172562, 0.41171102, 0.4116806 ,
1470 |        0.41165032, 0.41161321, 0.41153588, 0.4114937 , 0.41145179,
1471 |        0.41141475, 0.41141205, 0.4113842 , 0.41137095, 0.41133905,
1472 |        0.41131634, 0.41129309, 0.41124033, 0.41121707, 0.41119274,
1473 |        0.41117111, 0.41115895, 0.41114137, 0.4111238 , 0.4111119 ,
1474 |        0.41109377, 0.41106132, 0.41101536, 0.41100238, 0.41097399,
1475 |        0.41095669, 0.4109064 , 0.41086747, 0.4108653 , 0.41084692,
1476 |        0.41080381, 0.41078624, 0.4107565 , 0.41074001, 0.4107346 ,
1477 |        0.41071432, 0.41067972, 0.41063105, 0.41062294, 0.41059725,
1478 |        0.41055453, 0.41050722, 0.41047964, 0.41046612, 0.41040232,
1479 |        0.41038609, 0.41036176, 0.41036446, 0.41036176, 0.41034473,
1480 |        0.41029336, 0.41027285, 0.4102012 , 0.41018011, 0.41015145,
1481 |        0.41014199, 0.41010603, 0.4100817 , 0.41002357, 0.40999707,
1482 |        0.40999301, 0.40998193, 0.40995883, 0.40995234, 0.40991009,
1483 |        0.40989792, 0.4098425 , 0.40983087, 0.40981059, 0.40980789,
1484 |        0.40978626, 0.40978414, 0.40976115, 0.40971627, 0.40970445,
1485 |        0.40969383, 0.40966004, 0.40961732, 0.40958487, 0.4095454 ,
1486 |        0.40952512, 0.40952269, 0.40948619, 0.40948078, 0.40944969,
1487 |        0.40944428, 0.40941995, 0.40940778, 0.40941589, 0.40941589,
1488 |        0.40937777, 0.40934938, 0.40932234, 0.40931288, 0.4092899 ]
1489 |     cost = [5.99998378, 5.99995133, 5.99998378, 5.99998378, 5.99998378,
1490 |        5.99998378, 5.99996756, 5.99996756, 5.99998378, 5.99993511,
1491 |        6.        , 5.99995133, 5.99998378, 5.99995133, 5.99996756,
1492 |        5.99996756, 5.99993511, 5.99998378, 5.99996756, 5.99993511,
1493 |        5.99995133, 5.99996756, 5.99993511, 6.        , 5.99998378,
1494 |        5.99995133, 6.        , 6.        , 5.99996756, 5.99996756,
1495 |        6.        , 5.99995133, 5.99995133, 5.99998378, 5.99991889,
1496 |        5.99998378, 5.99995133, 5.99996756, 5.99995133, 5.99991889,
1497 |        5.99995133, 6.        , 5.99995133, 6.        , 5.99996756,
1498 |        5.99998378, 5.99998378, 5.99993511, 5.99996756, 6.        ,
1499 |        5.99993511, 5.99996756, 6.        , 5.99998378, 5.99996756,
1500 |        5.99993511, 5.99995133, 5.99996756, 5.99995133, 5.99482512,
1501 |        5.96959964, 5.93986438, 5.90917202, 5.875365  , 5.84858218,
1502 |        5.81982026, 5.76072286, 5.73048472, 5.70715723, 5.68048796,
1503 |        5.65459737, 5.62457011, 5.59642463, 5.57118292, 5.54910454,
1504 |        5.52188372, 5.49866978, 5.47579651, 5.45198235, 5.42672442,
1505 |        5.39849783, 5.37371034, 5.34908507, 5.32488158, 5.29626565,
1506 |        5.27175394, 5.22610473, 5.19873791, 5.17562131, 5.1535267 ,
1507 |        5.12797677, 5.10221595, 5.07600091, 5.04115567, 5.01468107,
1508 |        4.94257349, 4.91944066, 4.89812472, 4.87351567, 4.84459153,
1509 |        4.82238336, 4.79530855, 4.77207839, 4.73781714, 4.70809811,
1510 |        4.68477062, 4.65748491, 4.60662838, 4.53348258, 4.5040231 ,
1511 |        4.43806372, 4.41154046, 4.3859743 , 4.34853352, 4.32293492,
1512 |        4.29568166, 4.26372396, 4.20829278, 4.08779443, 4.03799234,
1513 |        3.97743495, 3.91929466, 3.89564272, 3.86397703, 3.83818376,
1514 |        3.76805529, 3.72065408, 3.69499059, 3.67588086, 3.65414314,
1515 |        3.63321653, 3.60901304, 3.58041334, 3.55827007, 3.53622413,
1516 |        3.51049575, 3.48917981, 3.46007722, 3.42815197, 3.31135228,
1517 |        3.26297774, 3.19297904, 3.06952826, 3.04595743, 3.01523263,
1518 |        2.82674713, 2.74002336, 2.67805464, 2.6509636 , 2.60555772,
1519 |        2.50275777, 2.48069561, 2.4648952 , 2.44812147, 2.43077996,
1520 |        2.41721822, 2.40394848, 2.39111673, 2.37920966, 2.36603725,
1521 |        2.35312439, 2.33891376, 2.32945623, 2.32024203, 2.30869184,
1522 |        2.29655765, 2.28491013, 2.27579326, 2.26568685, 2.25663487,
1523 |        2.24586334, 2.23767114, 2.22874895, 2.22080008, 2.21041788,
1524 |        2.20259879, 2.19516904, 2.18631173, 2.17881708, 2.1691811 ,
1525 |        2.16051846, 2.15156382, 2.14377717, 2.13540653, 2.12697099,
1526 |        2.12012524, 2.1119817 , 2.10146973, 2.09397508, 2.08388489,
1527 |        2.07486536, 2.06678671, 2.05770229, 2.05040231, 2.04141522,
1528 |        2.0323308 , 2.02389527, 2.01721173, 2.00828953, 2.00157355,
1529 |        1.99427357, 1.98590293, 1.97717539, 1.96831808, 1.96066122,
1530 |        1.95154435, 1.94359548, 1.93636039, 1.92682175, 1.915012  ,
1531 |        1.90612225, 1.89590228, 1.8862663 , 1.87789566, 1.86851924,
1532 |        1.85755305, 1.84786841, 1.83615599, 1.80666407, 1.79229122,
1533 |        1.78366102, 1.76315619, 1.75384466, 1.73528648, 1.7239634 ,
1534 |        1.71507365, 1.70748167, 1.69943547, 1.69190838, 1.6825644 ,
1535 |        1.6716631 , 1.65424048, 1.63678541, 1.61277659, 1.60628772,
1536 |        1.5939913 , 1.58695088, 1.57871001, 1.57163714, 1.56274739,
1537 |        1.55638829, 1.5488612 , 1.54246966, 1.53666212, 1.53244436,
1538 |        1.52702615, 1.52173772, 1.51602751, 1.51041464, 1.50561287,
1539 |        1.50136266, 1.49763156, 1.4931218 , 1.48887159, 1.48410226,
1540 |        1.48027383, 1.47514762, 1.47080008, 1.46742586, 1.46398676,
1541 |        1.459769  , 1.45636234, 1.45321524, 1.44925702, 1.4444877 ,
1542 |        1.44023749, 1.43653884, 1.43186685, 1.42761664, 1.42278243,
1543 |        1.41905133, 1.41447667, 1.41042113, 1.4068198 , 1.40192071,
1544 |        1.39640517, 1.39127896, 1.37959899, 1.37586789, 1.37070923,
1545 |        1.36668613, 1.36263059, 1.35948349, 1.35607683, 1.35208617,
1546 |        1.34708974, 1.34361819, 1.33988709, 1.33547466, 1.33115956,
1547 |        1.32713646, 1.32080981, 1.31717604, 1.31415872, 1.31110895,
1548 |        1.30760496, 1.30423074, 1.29998053, 1.29560055, 1.29199922,
1549 |        1.28856012, 1.2840828 , 1.28074103, 1.27694504, 1.27065083,
1550 |        1.26717929, 1.2636753 , 1.26036597, 1.25686198, 1.25364999,
1551 |        1.25004867, 1.24761534, 1.2440789 , 1.24031536, 1.23525404,
1552 |        1.23204205, 1.22814872, 1.22266563, 1.2176692 , 1.21319188,
1553 |        1.20839011, 1.2038479 , 1.20112257, 1.19677503, 1.19310882,
1554 |        1.18992927, 1.18730128, 1.18363507, 1.17850886, 1.17562131,
1555 |        1.17302576, 1.16926222, 1.16702355, 1.16189735, 1.15858802,
1556 |        1.15313737, 1.14856271, 1.14583739, 1.14340406, 1.13844008,
1557 |        1.13526053, 1.13045876, 1.12695477, 1.12267212, 1.11946013,
1558 |        1.11400947, 1.10949971, 1.10661216, 1.09973396, 1.09558108,
1559 |        1.08763221, 1.08305756, 1.07887223, 1.07186425, 1.06485627,
1560 |        1.06021673, 1.05340341, 1.0491532 , 1.04516255, 1.02744793,
1561 |        1.02125105, 1.00470443, 0.99208358, 0.23337227, 0.22814872,
1562 |        0.22406074, 0.22204919, 0.21922653, 0.2165012 , 0.21445721,
1563 |        0.21192655, 0.20916878, 0.20537279, 0.2015768 , 0.19693725,
1564 |        0.19512037, 0.18824216, 0.18661995, 0.18389462, 0.18120174,
1565 |        0.17740575, 0.17432354, 0.17273376, 0.17205243, 0.17017066,
1566 |        0.16861333, 0.167932  , 0.16637467, 0.16484978, 0.16410356,
1567 |        0.1630329 , 0.16167024, 0.16014535, 0.1587178 , 0.15787425,
1568 |        0.15654403, 0.1552787 , 0.15287781, 0.15209915, 0.15080138,
1569 |        0.14963338, 0.14866005, 0.14797872, 0.14778405, 0.14729738,
1570 |        0.14642139, 0.14535072, 0.14382584, 0.14324184, 0.14256051,
1571 |        0.1414574 , 0.1411654 , 0.1402894 , 0.13951074, 0.13834274,
1572 |        0.13727208, 0.13649341, 0.13542275, 0.13493608, 0.13376809,
1573 |        0.13250276, 0.13201609, 0.13143209, 0.13065343, 0.13006943,
1574 |        0.12932321, 0.12864188, 0.12718188, 0.12659788, 0.12552722,
1575 |        0.12429434, 0.12302901, 0.12150412, 0.12101746, 0.12004412,
1576 |        0.11926546, 0.11897346, 0.11800013, 0.11735124, 0.11715658,
1577 |        0.11647524, 0.11559925, 0.11452858, 0.11407436, 0.11349036,
1578 |        0.11261437, 0.11164104, 0.11099215, 0.11044059, 0.10943482,
1579 |        0.10875349, 0.10787749, 0.10748816, 0.10651483, 0.10602816,
1580 |        0.10505483, 0.10447083, 0.10333528, 0.10297839, 0.10229706,
1581 |        0.1018104 , 0.10125884, 0.10057751, 0.09953929, 0.09866329,
1582 |        0.09827396, 0.09788463, 0.0974953 , 0.09710596, 0.0960353 ,
1583 |        0.09528908, 0.09444553, 0.09366686, 0.09308286, 0.09279086,
1584 |        0.09220687, 0.09181753, 0.09152553, 0.0905522 , 0.08987087,
1585 |        0.08948154, 0.08883265, 0.08805399, 0.08746999, 0.08669132,
1586 |        0.08630199, 0.08581533, 0.084842  , 0.084258  , 0.083674  ,
1587 |        0.08321978, 0.08273311, 0.08224645, 0.08205178, 0.08205178,
1588 |        0.08127312, 0.08078645, 0.08039712, 0.08010512, 0.07955357]
1589 |     a = VisualizeTools()
1590 |     max_x = 200
1591 |     prange=prange[0:max_x]
1592 |     acc =acc[0:max_x]
1593 |     cost = cost[0:max_x]
1594 |     fig, ax = a.plotline(prange,acc,xlabel='Weight Value', ylabel='Accuracy',
1595 |                filename='coco_p_value_acc')
1596 |     fig, ax = a.plotscatter(xvalue=[0.0060416667],
1597 |                             yvalue=[0.5140010157426727],
1598 |                             fig=fig,ax=ax,
1599 |                             markersize=30,
1600 |                             legend='Learned Thres',                            
1601 |                             filename='coco_p_value_acc')
1602 |     fig, ax = a.plotline(prange,cost,xlabel='Weight Value', ylabel='Cost',
1603 |                filename='coco_p_value_cost')    
1604 |     fig, ax = a.plotscatter(xvalue=[0.0060416667],
1605 |                             yvalue=[5.9999899999999995],
1606 |                             fig=fig,ax=ax,
1607 |                             markersize=30,
1608 |                             legend='Learned Thres',
1609 |                             filename='coco_p_value_cost')    
1610 |     
1611 |     
1612 | def getlabeldist(datapath='..\APIperformance\mlserviceperformance_coco\Model0_TrueLabel.txt'):
1613 |     mydict = dict()
1614 |     labels = json.load(open(datapath))
1615 |     for imgname in labels:
1616 |         labelexist = dict()
1617 |         for temp in labels[imgname]:
1618 |             #print(temp)
1619 |             label = temp['transcription']
1620 |             if label in mydict:
1621 |                 if(label not in labelexist):
1622 |                     mydict[label]+=1
1623 |                     labelexist[label] = 1
1624 |             else:
1625 |                 mydict[label] = 1
1626 |     len_img = len(labels)
1627 |     return mydict, len_img
1628 | 
1629 | def test_label_dist():
1630 |     showlegend = True
1631 |     a = VisualizeTools(figuresize=(22,8),figureformat='jpg')
1632 |     name = ['Microsoft','Google']
1633 |     value1 = [5175/6358,4302/6358]
1634 |     value2 = [5368/6358,4304/6358]
1635 |     legend = ['2020 March', '2021 Feb']
1636 |     
1637 |     a.plot_bar2value(barname = name,barvalue = value1, 
1638 |                      barvalue2 = value2,
1639 |                      color=['r','b'],
1640 |                      filename='FERPLUS',yname='',
1641 |                      legend=legend,
1642 |                      showlegend=showlegend,
1643 |                      yrange=[min(value1)-0.05,max(value2)+0.05])
1644 |        
1645 |     
1646 |     showlegend = True
1647 |     a = VisualizeTools(figuresize=(22,8),figureformat='jpg')
1648 |     name = ['Microsoft','Google']
1649 |     value1 = [10996/15339,10069/15339]
1650 |     value2 = [11000/15339,10073/15339]
1651 |     legend = ['2020 March', '2021 Feb']
1652 |     
1653 |     a.plot_bar2value(barname = name,barvalue = value1, 
1654 |                      barvalue2 = value2,
1655 |                      color=['r','b'],
1656 |                      filename='RAFDB',yname='',
1657 |                      legend=legend,
1658 |                      showlegend=showlegend,
1659 |                      yrange=[min(value1)-0.05,max(value2)+0.05])
1660 |         
1661 |     
1662 |     a.plot_bar(barname = name,barvalue = value1)
1663 |     
1664 | def getlabelprecisionandrecall(targetlabel='person',
1665 |                                truelabelpath='..\APIperformance\mlserviceperformance_coco\Model2_TrueLabel.txt',
1666 |                                predlabelpath='..\APIperformance\mlserviceperformance_coco\Model6_PredictedLabel.txt',):
1667 |     truelabel = json.load(open(truelabelpath))
1668 |     predlabel = json.load(open(predlabelpath))
1669 | 
1670 |     count = 0
1671 |     for imgname in truelabel:
1672 |         truehas = False
1673 |         for temp in truelabel[imgname]:
1674 |             #print(temp)
1675 |             label = temp['transcription']
1676 |             if label == targetlabel:
1677 |                 truehas = True
1678 |         predhas = False
1679 |         for temp in predlabel[imgname]:
1680 |             #print(temp)
1681 |             label = temp['transcription']
1682 |             if label == targetlabel:
1683 |                 predhas = True
1684 |         if(truehas and predhas):
1685 |             count+=1
1686 |                 
1687 |     totaltrue = getlabeldist(truelabelpath)
1688 |     totalpred = getlabeldist(predlabelpath)
1689 |     if(targetlabel in totalpred[0]):
1690 |         pred1 = totalpred[0][targetlabel]
1691 |     else:
1692 |         pred1 = 0
1693 |     print('total true, total pred, all correct',totaltrue[0][targetlabel],pred1,count)
1694 | 
1695 |     if(pred1==0):
1696 |         return 0, count/totaltrue[0][targetlabel]
1697 |     return count/totalpred[0][targetlabel], count/totaltrue[0][targetlabel]    
1698 |     
1699 | def test_precisionrecall(predlabelpath='cocoresult\majvote_coco.txt',
1700 |                          labelid=100,
1701 |                          showlegend=False):
1702 |     labeldist, labelen = getlabeldist()
1703 |     labellist = list()
1704 |     precisionlist = list()
1705 |     recalllist = list()
1706 |     for label in sorted(labeldist):
1707 |         print(label)
1708 |         pre, recall = getlabelprecisionandrecall(targetlabel=label,
1709 |                                                  predlabelpath=predlabelpath,)
1710 |         precisionlist.append(pre)
1711 |         recalllist.append(recall)
1712 |         labellist.append(label)
1713 |     print('pre and recall',precisionlist, recalllist)
1714 |     np.savetxt('precision'+str(labelid)+'.txt', precisionlist)
1715 |     np.savetxt('recall'+str(labelid)+'.txt', precisionlist)
1716 |     np.savetxt('label'+str(labelid)+'.txt',labellist,fmt='%s')
1717 |     a = VisualizeTools(figuresize=(23,8),figureformat='eps')
1718 |     a.plot_bar(barname = labellist,barvalue = precisionlist,filename='precisionmajvote',yname='')
1719 |     a.plot_bar(barname = labellist,barvalue = recalllist,filename='recallmajvote',yname='')
1720 | 
1721 |     a.plot_bar2value(barname = labellist,barvalue = precisionlist, 
1722 |                      barvalue2 = recalllist,
1723 |                      color=['r','b'],
1724 |                      filename='preandrecall'+str(labelid),yname='',
1725 |                      showlegend=showlegend)
1726 | 
1727 | 
1728 |     return 0
1729 | 
1730 | if __name__ == '__main__':
1731 |     '''
1732 |     test_precisionrecall(predlabelpath='cocoresult\\FrugalMCTcoco.txt',
1733 |                          labelid=99999)   
1734 |     test_precisionrecall(predlabelpath='cocoresult\\majvote_coco.txt',
1735 |                          labelid=888)
1736 |     test_precisionrecall(predlabelpath='cocoresult\\100000_coco_thres.txt',
1737 |                          labelid=100000,showlegend=True)
1738 |     test_precisionrecall(predlabelpath='cocoresult\\0_coco_thres.txt',
1739 |                          labelid=0)
1740 |     test_precisionrecall(predlabelpath='cocoresult\\6_coco_thres.txt',
1741 |                          labelid=6)    
1742 |     test_precisionrecall(predlabelpath='cocoresult\\2_coco_thres.txt',
1743 |                          labelid=2)
1744 |     ''' 
1745 |     
1746 |     #getlabelprecisionandrecall()
1747 |     
1748 |     
1749 |     test_label_dist()
1750 |     #test_plotline()
1751 |     matplotlib.pyplot.close('all')


--------------------------------------------------------------------------------