├── .gitignore ├── NOTICE ├── CODE_OF_CONDUCT.md ├── requirements.txt ├── config ├── SP_Oracle.json ├── SP_Truthful_TS.json ├── FP_DR_TS.json ├── FP_DM_Oracle.json ├── FP_IPS_TS.json └── FP_DM_TS.json ├── CITATION.cff ├── src ├── AuctionAllocation.py ├── Impression.py ├── BidderAllocation.py ├── Auction.py ├── Agent.py ├── Models.py ├── main.py ├── Bidder.py ├── Getting Started with AuctionGym (2. Effects of Bid Shading).ipynb └── Getting Started with AuctionGym (1. Effects of Competition).ipynb ├── CONFIG.md ├── CONTRIBUTING.md ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | results/ 2 | __pycache__/ 3 | .ipynb_checkpoints/ 4 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cycler==0.11.0 2 | fonttools==4.33.1 3 | joblib==1.2.0 4 | kiwisolver==1.4.2 5 | llvmlite==0.38.0 6 | matplotlib==3.5.1 7 | numba==0.55.1 8 | numpy==1.22.0 9 | packaging==21.3 10 | pandas==1.3.5 11 | Pillow==9.1.1 12 | pyparsing==3.0.8 13 | python-dateutil==2.8.2 14 | pytz==2022.1 15 | scikit-learn==1.0.2 16 | scipy==1.7.3 17 | seaborn==0.11.2 18 | six==1.16.0 19 | threadpoolctl==3.1.0 20 | torch==1.11.0 21 | tqdm==4.64.0 22 | typing_extensions==4.2.0 23 | -------------------------------------------------------------------------------- /config/SP_Oracle.json: -------------------------------------------------------------------------------- 1 | { 2 | "random_seed": 0, 3 | "num_runs": 3, 4 | "num_iter" : 20, 5 | "rounds_per_iter": 10000, 6 | "num_participants_per_round": 2, 7 | "embedding_size": 5, 8 | "embedding_var": 1.0, 9 | "obs_embedding_size": 4, 10 | "allocation": "SecondPrice", 11 | "agents": [ { 12 | "name": "Truthful Oracle", 13 | "num_copies": 6, 14 | "num_items": 12, 15 | "allocator": { 16 | "type": "OracleAllocator", 17 | "kwargs": {} 18 | }, 19 | "bidder": { 20 | "type": "TruthfulBidder", 21 | "kwargs": {} 22 | } 23 | } 24 | ], 25 | "output_dir": "results/SP_Oracle/" 26 | } 27 | -------------------------------------------------------------------------------- /config/SP_Truthful_TS.json: -------------------------------------------------------------------------------- 1 | { 2 | "random_seed": 0, 3 | "num_runs": 3, 4 | "num_iter" : 20, 5 | "rounds_per_iter": 10000, 6 | "num_participants_per_round": 2, 7 | "embedding_size": 5, 8 | "embedding_var": 1.0, 9 | "obs_embedding_size": 4, 10 | "allocation": "SecondPrice", 11 | "agents": [ { 12 | "name": "Truthful Learnt", 13 | "num_copies": 6, 14 | "num_items": 12, 15 | "allocator": { 16 | "type": "PyTorchLogisticRegressionAllocator", 17 | "kwargs": {"embedding_size": 4, "num_items": 12} 18 | }, 19 | "bidder": { 20 | "type": "TruthfulBidder", 21 | "kwargs": {} 22 | } 23 | } 24 | ], 25 | "output_dir": "results/SP_Truthful_TS/" 26 | } 27 | -------------------------------------------------------------------------------- /config/FP_DR_TS.json: -------------------------------------------------------------------------------- 1 | { 2 | "random_seed": 0, 3 | "num_runs": 2, 4 | "num_iter" : 3, 5 | "rounds_per_iter": 10000, 6 | "num_participants_per_round": 2, 7 | "embedding_size": 5, 8 | "embedding_var": 1.0, 9 | "obs_embedding_size": 4, 10 | "allocation": "FirstPrice", 11 | "agents": [ { 12 | "name": "DR", 13 | "num_copies": 3, 14 | "num_items": 12, 15 | "allocator": { 16 | "type": "PyTorchLogisticRegressionAllocator", 17 | "kwargs": {"embedding_size": 4, "num_items": 12} 18 | }, 19 | "bidder": { 20 | "type": "DoublyRobustBidder", 21 | "kwargs": { 22 | "gamma_sigma": 0.02, 23 | "init_gamma": 1.0 24 | } 25 | } 26 | }], 27 | "output_dir": "results/FP_DR_TS/" 28 | } 29 | -------------------------------------------------------------------------------- /config/FP_DM_Oracle.json: -------------------------------------------------------------------------------- 1 | { 2 | "random_seed": 0, 3 | "num_runs": 3, 4 | "num_iter" : 20, 5 | "rounds_per_iter": 10000, 6 | "num_participants_per_round": 2, 7 | "embedding_size": 5, 8 | "embedding_var": 1.0, 9 | "obs_embedding_size": 4, 10 | "allocation": "FirstPrice", 11 | "agents": [ { 12 | "name": "Oracle, DM (search)", 13 | "num_copies": 6, 14 | "num_items": 12, 15 | "allocator": { 16 | "type": "OracleAllocator", 17 | "kwargs": {} 18 | }, 19 | "bidder": { 20 | "type": "ValueLearningBidder", 21 | "kwargs": { 22 | "gamma_sigma": 0.02, 23 | "init_gamma": 1.0, 24 | "inference": "\"search\"" 25 | } 26 | } 27 | }], 28 | "output_dir": "results/FP_DM_Oracle/" 29 | } 30 | -------------------------------------------------------------------------------- /config/FP_IPS_TS.json: -------------------------------------------------------------------------------- 1 | { 2 | "random_seed": 0, 3 | "num_runs": 2, 4 | "num_iter" : 3, 5 | "rounds_per_iter": 10000, 6 | "num_participants_per_round": 2, 7 | "embedding_size": 5, 8 | "embedding_var": 1.0, 9 | "obs_embedding_size": 4, 10 | "allocation": "FirstPrice", 11 | "agents": [ { 12 | "name": "IPS", 13 | "num_copies": 3, 14 | "num_items": 12, 15 | "allocator": { 16 | "type": "PyTorchLogisticRegressionAllocator", 17 | "kwargs": {"embedding_size": 4, "num_items": 12} 18 | }, 19 | "bidder": { 20 | "type": "PolicyLearningBidder", 21 | "kwargs": { 22 | "gamma_sigma": 0.02, 23 | "init_gamma": 1.0, 24 | "loss": "\"PPO\"" 25 | } 26 | } 27 | }], 28 | "output_dir": "results/FP_IPS_TS/" 29 | } 30 | -------------------------------------------------------------------------------- /config/FP_DM_TS.json: -------------------------------------------------------------------------------- 1 | { 2 | "random_seed": 0, 3 | "num_runs": 2, 4 | "num_iter" : 3, 5 | "rounds_per_iter": 10000, 6 | "num_participants_per_round": 2, 7 | "embedding_size": 5, 8 | "embedding_var": 1.0, 9 | "obs_embedding_size": 4, 10 | "allocation": "FirstPrice", 11 | "agents": [ { 12 | "name": "DM (policy)", 13 | "num_copies": 3, 14 | "num_items": 12, 15 | "allocator": { 16 | "type": "PyTorchLogisticRegressionAllocator", 17 | "kwargs": {"embedding_size": 4, "num_items": 12} 18 | }, 19 | "bidder": { 20 | "type": "ValueLearningBidder", 21 | "kwargs": { 22 | "gamma_sigma": 0.02, 23 | "init_gamma": 1.0, 24 | "inference": "\"policy\"" 25 | } 26 | } 27 | }], 28 | "output_dir": "results/FP_DM_TS/" 29 | } 30 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use AuctionGym, please consider citing our paper" 3 | authors: 4 | - family-names: "Jeunen" 5 | given-names: "Olivier" 6 | orcid: "https://orcid.org/0000-0001-6256-5814" 7 | - family-names: "Murphy" 8 | given-names: "Sean" 9 | - family-names: "Allison" 10 | given-names: "Ben" 11 | title: "Learning to Bid with AuctionGym" 12 | url: "https://github.com/amzn/auction-gym" 13 | preferred-citation: 14 | type: conference-paper 15 | authors: 16 | - family-names: "Jeunen" 17 | given-names: "Olivier" 18 | orcid: "https://orcid.org/0000-0001-6256-5814" 19 | - family-names: "Murphy" 20 | given-names: "Sean" 21 | - family-names: "Allison" 22 | given-names: "Ben" 23 | collection-title: "Proceedings of the AdKDD Workshop at the 28th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining" 24 | title: "Learning to Bid with AuctionGym" 25 | year: 2022 26 | -------------------------------------------------------------------------------- /src/AuctionAllocation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class AllocationMechanism: 4 | ''' Base class for allocation mechanisms ''' 5 | def __init__(self): 6 | pass 7 | 8 | def allocate(self, bids, num_slots): 9 | pass 10 | 11 | 12 | class FirstPrice(AllocationMechanism): 13 | ''' (Generalised) First-Price Allocation ''' 14 | 15 | def __init__(self): 16 | super(FirstPrice, self).__init__() 17 | 18 | def allocate(self, bids, num_slots): 19 | winners = np.argsort(-bids)[:num_slots] 20 | sorted_bids = -np.sort(-bids) 21 | prices = sorted_bids[:num_slots] 22 | second_prices = sorted_bids[1:num_slots+1] 23 | return winners, prices, second_prices 24 | 25 | 26 | class SecondPrice(AllocationMechanism): 27 | ''' (Generalised) Second-Price Allocation ''' 28 | 29 | def __init__(self): 30 | super(SecondPrice, self).__init__() 31 | 32 | def allocate(self, bids, num_slots): 33 | winners = np.argsort(-bids)[:num_slots] 34 | prices = -np.sort(-bids)[1:num_slots+1] 35 | return winners, prices, prices 36 | -------------------------------------------------------------------------------- /src/Impression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from dataclasses import dataclass 3 | 4 | @dataclass 5 | class ImpressionOpportunity: 6 | __slots__ = ['context', 'item', 'value', 'bid', 'best_expected_value', 'true_CTR', 'estimated_CTR', 'price', 'second_price', 'winning_bid', 'outcome', 'won'] 7 | 8 | context: np.array 9 | item: np.uint32 10 | value: np.float32 11 | bid: np.float32 12 | best_expected_value: np.float32 13 | true_CTR: np.float32 14 | estimated_CTR: np.float32 15 | price: np.float32 16 | second_price: np.float32 17 | outcome: np.bool 18 | won: np.bool 19 | 20 | def set_true_CTR(self, best_expected_value, true_CTR): 21 | self.best_expected_value = best_expected_value # Best possible CTR (to compute regret from ad allocation) 22 | self.true_CTR = true_CTR # True CTR for the chosen ad 23 | 24 | def set_price_outcome(self, price, second_price, outcome, won=True): 25 | self.price = price 26 | self.second_price = second_price 27 | self.outcome = outcome 28 | self.won = won 29 | 30 | def set_price(self, price): 31 | self.price = price 32 | -------------------------------------------------------------------------------- /CONFIG.md: -------------------------------------------------------------------------------- 1 | ## AuctionGym 2 | 3 | ### Configuration Files 4 | 5 | AuctionGym uses JSON configuration files that detail configurations about the environment, the type of auction, and bidders' behaviour. 6 | 7 | #### General Format 8 | 9 | | Key | Description | 10 | | ------------- | ------------- | 11 | | `random_seed` | The random seed that is used as input to the random number generator | 12 | | `num_runs` | The number of runs to repeat and average results over | 13 | | `num_iter` | The number of iterations, bidders update their beliefs every iteration and metrics are reported per iteration | 14 | | `rounds_per_iter` | The number of rounds per iteration | 15 | | `num_participants_per_round` | The number of participants in every auction round | 16 | | `embedding_size` | The dimensionality of the underlying context and item embeddings | 17 | | `embedding_var` | The variance of the Gaussian distribution from which underlying embeddings are sampled | 18 | | `obs_embedding_size` | The dimensionality of the observable context embeddings to the bidders | 19 | | `allocation` | The type of allocation: currently `FirstPrice` and `SecondPrice` are supported | 20 | | `agents` | A list of agent configurations that describe bidders behaviour | 21 | | `output_dir` | A path to a directory that will contain results. If it does not exist, AuctionGym will create this directory. | 22 | 23 | 24 | #### Agent Format 25 | 26 | | Key | Description | 27 | | ------------- | ------------- | 28 | | `name` | An identifier for the agent | 29 | | `num_copies` | The number of agents with this configuration (but unique item catalogues). A suffix will be appended to the name if num_copies > 1 | 30 | | `num_items` | The number of items in the ad catalogue | 31 | | `allocator` | The allocator decides which ad to allocate, given a context. It also outputs welfare estimates. | 32 | | `bidder` | The bidder decides how to bid, given a welfare estimate, allocated ad and context. | 33 | 34 | 35 | Allocators have types, and possible keyword arguments supporting those types. Possible allocators are `OracleAllocator` and `PyTorchLogisticRegressionAllocator`, which takes `embedding_size` and `num_items`. 36 | 37 | Bidders can be one of `TruthfulBidder`, `ValueLearningBidder`, `PolicyLearningBidder` or `DoublyRobustBidder`. 38 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /src/BidderAllocation.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import torch 4 | from sklearn.metrics import log_loss, roc_auc_score 5 | from sklearn.model_selection import train_test_split 6 | from tqdm import tqdm 7 | 8 | from Models import PyTorchLogisticRegression, sigmoid 9 | 10 | 11 | class Allocator: 12 | """ Base class for an allocator """ 13 | 14 | def __init__(self, rng): 15 | self.rng = rng 16 | 17 | def update(self, contexts, items, outcomes, iteration, plot, figsize, fontsize, name): 18 | pass 19 | 20 | 21 | class PyTorchLogisticRegressionAllocator(Allocator): 22 | """ An allocator that estimates P(click) with Logistic Regression implemented in PyTorch""" 23 | 24 | def __init__(self, rng, embedding_size, num_items, thompson_sampling=True): 25 | self.response_model = PyTorchLogisticRegression(n_dim=embedding_size, n_items=num_items) 26 | self.thompson_sampling = thompson_sampling 27 | super(PyTorchLogisticRegressionAllocator, self).__init__(rng) 28 | 29 | def update(self, contexts, items, outcomes, iteration, plot, figsize, fontsize, name): 30 | # Rename 31 | X, A, y = contexts, items, outcomes 32 | 33 | if len(y) < 2: 34 | return 35 | 36 | # Fit the model 37 | self.response_model.train() 38 | epochs = 8192 * 2 39 | lr = 2e-3 40 | optimizer = torch.optim.Adam(self.response_model.parameters(), lr=lr) 41 | scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5) 42 | 43 | X, A, y = torch.Tensor(X), torch.LongTensor(A), torch.Tensor(y) 44 | losses = [] 45 | for epoch in tqdm(range(int(epochs)), desc=f'{name}'): 46 | optimizer.zero_grad() # Setting our stored gradients equal to zero 47 | loss = self.response_model.loss(torch.squeeze(self.response_model.predict_item(X, A)), y) 48 | loss.backward() # Computes the gradient of the given tensor w.r.t. the weights/bias 49 | optimizer.step() # Updates weights and biases with the optimizer (SGD) 50 | losses.append(loss.item()) 51 | scheduler.step(loss) 52 | 53 | if epoch > 1024 and np.abs(losses[-100] - losses[-1]) < 1e-6: 54 | print(f'Stopping at Epoch {epoch}') 55 | break 56 | 57 | # Laplace Approximation for variance q 58 | with torch.no_grad(): 59 | for item in range(self.response_model.m.shape[0]): 60 | item_mask = items == item 61 | X_item = torch.Tensor(contexts[item_mask]) 62 | self.response_model.laplace_approx(X_item, item) 63 | self.response_model.update_prior() 64 | 65 | self.response_model.eval() 66 | 67 | def estimate_CTR(self, context, sample=True): 68 | return self.response_model(torch.from_numpy(context.astype(np.float32)), sample=(self.thompson_sampling and sample)).detach().numpy() 69 | 70 | 71 | class OracleAllocator(Allocator): 72 | """ An allocator that acts based on the true P(click)""" 73 | 74 | def __init__(self, rng): 75 | self.item_embeddings = None 76 | super(OracleAllocator, self).__init__(rng) 77 | 78 | def update_item_embeddings(self, item_embeddings): 79 | self.item_embeddings = item_embeddings 80 | 81 | def estimate_CTR(self, context): 82 | return sigmoid(self.item_embeddings @ context) 83 | -------------------------------------------------------------------------------- /src/Auction.py: -------------------------------------------------------------------------------- 1 | from AuctionAllocation import AllocationMechanism 2 | from Bidder import Bidder 3 | 4 | import numpy as np 5 | 6 | from BidderAllocation import OracleAllocator 7 | from Models import sigmoid 8 | 9 | class Auction: 10 | ''' Base class for auctions ''' 11 | def __init__(self, rng, allocation, agents, agent2items, agents2item_values, max_slots, embedding_size, embedding_var, obs_embedding_size, num_participants_per_round): 12 | self.rng = rng 13 | self.allocation = allocation 14 | self.agents = agents 15 | self.max_slots = max_slots 16 | self.revenue = .0 17 | 18 | self.agent2items = agent2items 19 | self.agents2item_values = agents2item_values 20 | 21 | self.embedding_size = embedding_size 22 | self.embedding_var = embedding_var 23 | 24 | self.obs_embedding_size = obs_embedding_size 25 | 26 | self.num_participants_per_round = num_participants_per_round 27 | 28 | def simulate_opportunity(self): 29 | # Sample the number of slots uniformly between [1, max_slots] 30 | num_slots = self.rng.integers(1, self.max_slots + 1) 31 | 32 | # Sample a true context vector 33 | true_context = np.concatenate((self.rng.normal(0, self.embedding_var, size=self.embedding_size), [1.0])) 34 | 35 | # Mask true context into observable context 36 | obs_context = np.concatenate((true_context[:self.obs_embedding_size], [1.0])) 37 | 38 | # At this point, the auctioneer solicits bids from 39 | # the list of bidders that might want to compete. 40 | bids = [] 41 | CTRs = [] 42 | participating_agents_idx = self.rng.choice(len(self.agents), self.num_participants_per_round, replace=False) 43 | participating_agents = [self.agents[idx] for idx in participating_agents_idx] 44 | for agent in participating_agents: 45 | # Get the bid and the allocated item 46 | if isinstance(agent.allocator, OracleAllocator): 47 | bid, item = agent.bid(true_context) 48 | else: 49 | bid, item = agent.bid(obs_context) 50 | bids.append(bid) 51 | # Compute the true CTRs for items in this agent's catalogue 52 | true_CTR = sigmoid(true_context @ self.agent2items[agent.name].T) 53 | agent.logs[-1].set_true_CTR(np.max(true_CTR * self.agents2item_values[agent.name]), true_CTR[item]) 54 | CTRs.append(true_CTR[item]) 55 | bids = np.array(bids) 56 | CTRs = np.array(CTRs) 57 | 58 | # Now we have bids, we need to somehow allocate slots 59 | # "second_prices" tell us how much lower the winner could have gone without changing the outcome 60 | winners, prices, second_prices = self.allocation.allocate(bids, num_slots) 61 | 62 | # Bidders only obtain value when they get their outcome 63 | # Either P(view), P(click | view, ad), P(conversion | click, view, ad) 64 | # For now, look at P(click | ad) * P(view) 65 | outcomes = self.rng.binomial(1, CTRs[winners]) 66 | 67 | # Let bidders know what they're being charged for 68 | for slot_id, (winner, price, second_price, outcome) in enumerate(zip(winners, prices, second_prices, outcomes)): 69 | for agent_id, agent in enumerate(participating_agents): 70 | if agent_id == winner: 71 | agent.charge(price, second_price, bool(outcome)) 72 | else: 73 | agent.set_price(price) 74 | self.revenue += price 75 | 76 | def clear_revenue(self): 77 | self.revenue = 0.0 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## AuctionGym: Simulating Online Advertising Auctions 2 | 3 | This repository contains the source code for AuctionGym: a simulation environment that enables reproducible offline evaluation of bandit and reinforcement learning approaches to ad allocation and bidding in online advertising auctions. 4 | A [research paper](https://www.amazon.science/publications/learning-to-bid-with-auctiongym) accompanying this repository was accepted as a contribution to the [AdKDD '22 workshop](https://www.adkdd.org/), co-located with the [2022 ACM SIGKDD Conference](https://kdd.org/kdd2022/index.html), and received a Best Paper Award. 5 | 6 | Offline evaluation of "learning to bid" approaches is not straightforward, because of multiple reasons: 7 | (1) observational data suffers from unobserved confounding and experimental data with broad interventions is costly to obtain, 8 | (2) offline experiments suffer from Goodhart's Law: " *when a measure becomes a target, it ceases to be a good measure* ", and 9 | (3) at the time of writing and to the best of our knowledge -- there are no publicly available datasets to researchers that can be used for this purpose. 10 | As a result, reliable and reproducible validation of novel "learning to bid" methods is hindered, and so is open scientific progress in this field. 11 | 12 | AuctionGym aims to mitigate this problem, by providing a unified framework that practitioners and research can use to benchmark novel methods and gain insights into their inner workings. 13 | 14 | 15 | ## Getting Started 16 | 17 | We provide two introductory and exploratory notebooks. To open them, run `jupyter notebook` in the main directory and navigate to `src`. 18 | 19 | " *Getting Started with AuctionGym (1. Effects of Competition)* " simulates second-price auctions with varying levels of competition, visualising the effects on advertiser welfare and surplus, and revenue for the auctioneer. 20 | Analogosuly, " *Getting Started with AuctionGym (2. Effects of Bid Shading)* " simulates first-price auctions where bidders bid truthfully vs. when they shade their bids in a value-based manner. 21 | 22 | 23 | ## Reproducing Research Results 24 | 25 | This section provides instructions to reproduce the results reported in our AdKDD paper. 26 | 27 | We provide a script that takes as input a configuration file detailing the environment and bidders (in JSON format), and outputs raw logged metrics over repeated auction rounds in .csv-files, along with visualisations. 28 | To reproduce the results for truthful bidders in a second-price auction reported in Fig. 1 in the paper, run: 29 | 30 | ``` 31 | python src/main.py config/SP_Oracle.json 32 | ``` 33 | 34 | A `results`-directory will be created, with a subdirectory per configuration file that was ran. This subdirectory will contain .csv-files with raw metrics, and .pdf-files with general visualisations. 35 | Other configuration files will generate results for other environments, and other bidder behaviour. 36 | See [configuration](CONFIG.md) for more detail on the structure of the configuration files. 37 | 38 | 39 | 40 | ## Citing 41 | 42 | 43 | Please cite the [accompanying research paper](https://www.amazon.science/publications/learning-to-bid-with-auctiongym) if you use AuctionGym in your work: 44 | 45 | ```BibTeX 46 | @inproceedings{Jeunen2022_AuctionGym, 47 | author = {Jeunen, Olivier and Murphy, Sean and Allison, Ben}, 48 | title = {Learning to Bid with AuctionGym}, 49 | booktitle = {Proc. of the AdKDD Workshop at the 28th ACM SIGKDD Conference on Knowledge Discovery \& Data Mining}, 50 | series = {AdKDD '22}, 51 | year = {2022} 52 | } 53 | ``` 54 | 55 | 56 | ## Security 57 | 58 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 59 | 60 | ## License 61 | 62 | This project is licensed under the Apache-2.0 License. 63 | 64 | -------------------------------------------------------------------------------- /src/Agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from BidderAllocation import PyTorchLogisticRegressionAllocator, OracleAllocator 4 | from Impression import ImpressionOpportunity 5 | from Models import sigmoid 6 | 7 | 8 | class Agent: 9 | ''' An agent representing an advertiser ''' 10 | 11 | def __init__(self, rng, name, num_items, item_values, allocator, bidder, memory=0): 12 | self.rng = rng 13 | self.name = name 14 | self.num_items = num_items 15 | 16 | # Value distribution 17 | self.item_values = item_values 18 | 19 | self.net_utility = .0 20 | self.gross_utility = .0 21 | 22 | self.logs = [] 23 | 24 | self.allocator = allocator 25 | self.bidder = bidder 26 | 27 | self.memory = memory 28 | 29 | def select_item(self, context): 30 | # Estimate CTR for all items 31 | estim_CTRs = self.allocator.estimate_CTR(context) 32 | # Compute value if clicked 33 | estim_values = estim_CTRs * self.item_values 34 | # Pick the best item (according to TS) 35 | best_item = np.argmax(estim_values) 36 | 37 | # If we do Thompson Sampling, don't propagate the noisy bid amount but bid using the MAP estimate 38 | if type(self.allocator) == PyTorchLogisticRegressionAllocator and self.allocator.thompson_sampling: 39 | estim_CTRs_MAP = self.allocator.estimate_CTR(context, sample=False) 40 | return best_item, estim_CTRs_MAP[best_item] 41 | 42 | return best_item, estim_CTRs[best_item] 43 | 44 | def bid(self, context): 45 | # First, pick what item we want to choose 46 | best_item, estimated_CTR = self.select_item(context) 47 | 48 | # Sample value for this item 49 | value = self.item_values[best_item] 50 | 51 | # Get the bid 52 | bid = self.bidder.bid(value, context, estimated_CTR) 53 | 54 | # Log what we know so far 55 | self.logs.append(ImpressionOpportunity(context=context, 56 | item=best_item, 57 | estimated_CTR=estimated_CTR, 58 | value=value, 59 | bid=bid, 60 | # These will be filled out later 61 | best_expected_value=0.0, 62 | true_CTR=0.0, 63 | price=0.0, 64 | second_price=0.0, 65 | outcome=0, 66 | won=False)) 67 | 68 | return bid, best_item 69 | 70 | def charge(self, price, second_price, outcome): 71 | self.logs[-1].set_price_outcome(price, second_price, outcome, won=True) 72 | last_value = self.logs[-1].value * outcome 73 | self.net_utility += (last_value - price) 74 | self.gross_utility += last_value 75 | 76 | def set_price(self, price): 77 | self.logs[-1].set_price(price) 78 | 79 | def update(self, iteration, plot=False, figsize=(8,5), fontsize=14): 80 | # Gather relevant logs 81 | contexts = np.array(list(opp.context for opp in self.logs)) 82 | items = np.array(list(opp.item for opp in self.logs)) 83 | values = np.array(list(opp.value for opp in self.logs)) 84 | bids = np.array(list(opp.bid for opp in self.logs)) 85 | prices = np.array(list(opp.price for opp in self.logs)) 86 | outcomes = np.array(list(opp.outcome for opp in self.logs)) 87 | estimated_CTRs = np.array(list(opp.estimated_CTR for opp in self.logs)) 88 | 89 | # Update response model with data from winning bids 90 | won_mask = np.array(list(opp.won for opp in self.logs)) 91 | self.allocator.update(contexts[won_mask], items[won_mask], outcomes[won_mask], iteration, plot, figsize, fontsize, self.name) 92 | 93 | # Update bidding model with all data 94 | self.bidder.update(contexts, values, bids, prices, outcomes, estimated_CTRs, won_mask, iteration, plot, figsize, fontsize, self.name) 95 | 96 | def get_allocation_regret(self): 97 | ''' How much value am I missing out on due to suboptimal allocation? ''' 98 | return np.sum(list(opp.best_expected_value - opp.true_CTR * opp.value for opp in self.logs)) 99 | 100 | def get_estimation_regret(self): 101 | ''' How much am I overpaying due to over-estimation of the value? ''' 102 | return np.sum(list(opp.estimated_CTR * opp.value - opp.true_CTR * opp.value for opp in self.logs)) 103 | 104 | def get_overbid_regret(self): 105 | ''' How much am I overpaying because I could shade more? ''' 106 | return np.sum(list((opp.price - opp.second_price) * opp.won for opp in self.logs)) 107 | 108 | def get_underbid_regret(self): 109 | ''' How much have I lost because I could have shaded less? ''' 110 | # The difference between the winning price and our bid -- for opportunities we lost, and where we could have won without overpaying 111 | # Important to mention that this assumes a first-price auction! i.e. the price is the winning bid 112 | return np.sum(list((opp.price - opp.bid) * (not opp.won) * (opp.price < (opp.true_CTR * opp.value)) for opp in self.logs)) 113 | 114 | def get_CTR_RMSE(self): 115 | return np.sqrt(np.mean(list((opp.true_CTR - opp.estimated_CTR)**2 for opp in self.logs))) 116 | 117 | def get_CTR_bias(self): 118 | return np.mean(list((opp.estimated_CTR / opp.true_CTR) for opp in filter(lambda opp: opp.won, self.logs))) 119 | 120 | def clear_utility(self): 121 | self.net_utility = .0 122 | self.gross_utility = .0 123 | 124 | def clear_logs(self): 125 | if not self.memory: 126 | self.logs = [] 127 | else: 128 | self.logs = self.logs[-self.memory:] 129 | self.bidder.clear_logs(memory=self.memory) 130 | 131 | -------------------------------------------------------------------------------- /src/Models.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import torch 4 | from numba import jit 5 | from scipy.optimize import minimize 6 | from torch.nn import functional as F 7 | from tqdm import tqdm 8 | 9 | 10 | @jit(nopython=True) 11 | def sigmoid(x): 12 | return 1.0 / (1.0 + np.exp(-x)) 13 | 14 | # This is an implementation of Algorithm 3 (Regularised Bayesian Logistic Regression with a Laplace Approximation) 15 | # from "An Empirical Evaluation of Thompson Sampling" by Olivier Chapelle & Lihong Li 16 | # https://proceedings.neurips.cc/paper/2011/file/e53a0a2978c28872a4505bdb51db06dc-Paper.pdf 17 | 18 | class PyTorchLogisticRegression(torch.nn.Module): 19 | def __init__(self, n_dim, n_items): 20 | super(PyTorchLogisticRegression, self).__init__() 21 | self.m = torch.nn.Parameter(torch.Tensor(n_items, n_dim + 1)) 22 | torch.nn.init.normal_(self.m, mean=0.0, std=1.0) 23 | self.prev_iter_m = self.m.detach().clone() 24 | self.q = torch.ones((n_items, n_dim + 1)) 25 | self.logloss = torch.nn.BCELoss(reduction='sum') 26 | self.eval() 27 | 28 | def forward(self, x, sample=False): 29 | ''' Predict outcome for all items, allow for posterior sampling ''' 30 | if sample: 31 | return torch.sigmoid(F.linear(x, self.m + torch.normal(mean=0.0, std=1.0/torch.sqrt(self.q)))) 32 | else: 33 | return torch.sigmoid(F.linear(x, self.m)) 34 | 35 | def predict_item(self, x, a): 36 | ''' Predict outcome for an item a, only MAP ''' 37 | return torch.sigmoid((x * self.m[a]).sum(axis=1)) 38 | 39 | def loss(self, predictions, labels): 40 | prior_dist = self.q[:, :-1] * (self.prev_iter_m[:, :-1] - self.m[:, :-1])**2 41 | return 0.5 * prior_dist.sum() + self.logloss(predictions, labels) 42 | 43 | def laplace_approx(self, X, item): 44 | P = (1 + torch.exp(1 - X.matmul(self.m[item, :].T))) ** (-1) 45 | self.q[item, :] += (P*(1-P)).T.matmul(X ** 2).squeeze(0) 46 | 47 | def update_prior(self): 48 | self.prev_iter_m = self.m.detach().clone() 49 | 50 | 51 | class PyTorchWinRateEstimator(torch.nn.Module): 52 | def __init__(self): 53 | super(PyTorchWinRateEstimator, self).__init__() 54 | # Input P(click), the value, and the bid shading factor 55 | self.model = torch.nn.Sequential( 56 | torch.nn.Linear(3, 1, bias=True), 57 | torch.nn.Sigmoid() 58 | ) 59 | self.eval() 60 | 61 | def forward(self, x): 62 | return self.model(x) 63 | 64 | 65 | class BidShadingPolicy(torch.nn.Module): 66 | def __init__(self): 67 | super(BidShadingPolicy, self).__init__() 68 | # Input: P(click), value 69 | # Output: mu, sigma for Gaussian bid shading distribution 70 | # Learnt to maximise E[P(win|gamma)*(value - price)] when gamma ~ N(mu, sigma) 71 | self.shared_linear = torch.nn.Linear(2, 2, bias=True) 72 | 73 | self.mu_linear_hidden = torch.nn.Linear(2, 2) 74 | self.mu_linear_out = torch.nn.Linear(2, 1) 75 | 76 | self.sigma_linear_hidden = torch.nn.Linear(2, 2) 77 | self.sigma_linear_out = torch.nn.Linear(2, 1) 78 | self.eval() 79 | 80 | self.min_sigma = 1e-2 81 | 82 | def forward(self, x): 83 | x = self.shared_linear(x) 84 | mu = torch.nn.Softplus()(self.mu_linear_out(torch.nn.Softplus()(x))) 85 | sigma = torch.nn.Softplus()(self.sigma_linear_out(torch.nn.Softplus()(x))) + self.min_sigma 86 | dist = torch.distributions.normal.Normal(mu, sigma) 87 | sampled_value = dist.rsample() 88 | propensity = torch.exp(dist.log_prob(sampled_value)) 89 | sampled_value = torch.clip(sampled_value, min=0.0, max=1.0) 90 | return sampled_value, propensity 91 | 92 | 93 | class BidShadingContextualBandit(torch.nn.Module): 94 | def __init__(self, loss, winrate_model=None): 95 | super(BidShadingContextualBandit, self).__init__() 96 | 97 | self.shared_linear = torch.nn.Linear(2, 2, bias=True) 98 | 99 | self.mu_linear_out = torch.nn.Linear(2, 1) 100 | 101 | self.sigma_linear_out = torch.nn.Linear(2, 1) 102 | self.eval() 103 | 104 | self.min_sigma = 1e-2 105 | 106 | self.loss_name = loss 107 | 108 | self.model_initialised = False 109 | 110 | def initialise_policy(self, observed_contexts, observed_gammas): 111 | # The first time, train the policy to imitate the logging policy 112 | self.train() 113 | epochs = 8192 * 2 114 | lr = 1e-3 115 | optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=1e-4, amsgrad=True) 116 | 117 | criterion = torch.nn.MSELoss() 118 | losses = [] 119 | best_epoch, best_loss = -1, np.inf 120 | for epoch in tqdm(range(int(epochs)), desc=f'Initialising Policy'): 121 | optimizer.zero_grad() # Setting our stored gradients equal to zero 122 | predicted_mu_gammas = torch.nn.Softplus()(self.mu_linear_out(torch.nn.Softplus()(self.shared_linear(observed_contexts)))) 123 | predicted_sigma_gammas = torch.nn.Softplus()(self.sigma_linear_out(torch.nn.Softplus()(self.shared_linear(observed_contexts)))) 124 | loss = criterion(predicted_mu_gammas.squeeze(), observed_gammas) + criterion(predicted_sigma_gammas.squeeze(), torch.ones_like(observed_gammas) * .05) 125 | loss.backward() # Computes the gradient of the given tensor w.r.t. the weights/bias 126 | optimizer.step() # Updates weights and biases with the optimizer (SGD) 127 | losses.append(loss.item()) 128 | if (best_loss - losses[-1]) > 1e-6: 129 | best_epoch = epoch 130 | best_loss = losses[-1] 131 | elif epoch - best_epoch > 512: 132 | print(f'Stopping at Epoch {epoch}') 133 | break 134 | 135 | fig, ax = plt.subplots() 136 | plt.title(f'Initialising policy') 137 | plt.plot(losses, label=r'Loss') 138 | plt.ylabel('MSE with logging policy') 139 | plt.legend() 140 | fig.set_tight_layout(True) 141 | #plt.show() 142 | 143 | print('Predicted mu Gammas: ', predicted_mu_gammas.min(), predicted_mu_gammas.max(), predicted_mu_gammas.mean()) 144 | print('Predicted sigma Gammas: ', predicted_sigma_gammas.min(), predicted_sigma_gammas.max(), predicted_sigma_gammas.mean()) 145 | 146 | def forward(self, x): 147 | x = self.shared_linear(x) 148 | dist = torch.distributions.normal.Normal( 149 | torch.nn.Softplus()(self.mu_linear_out(torch.nn.Softplus()(x))), 150 | torch.nn.Softplus()(self.sigma_linear_out(torch.nn.Softplus()(x))) + self.min_sigma 151 | ) 152 | sampled_value = dist.rsample() 153 | propensity = torch.exp(dist.log_prob(sampled_value)) 154 | sampled_value = torch.clip(sampled_value, min=0.0, max=1.0) 155 | return sampled_value, propensity 156 | 157 | def normal_pdf(self, x, gamma): 158 | # Get distribution over bid shading factors 159 | x = self.shared_linear(x) 160 | mu = torch.nn.Softplus()(self.mu_linear_out(torch.nn.Softplus()(x))) 161 | sigma = torch.nn.Softplus()(self.sigma_linear_out(torch.nn.Softplus()(x))) + self.min_sigma 162 | mu = mu.squeeze() 163 | sigma = sigma.squeeze() 164 | # Compute the density for gamma under a Gaussian centered at mu -- prevent overflow 165 | return mu, sigma, torch.clip(torch.exp(-((mu - gamma) / sigma)**2/2) / (sigma * np.sqrt(2 * np.pi)), min=1e-30) 166 | 167 | def loss(self, observed_context, observed_gamma, logging_propensity, utility, utility_estimates=None, winrate_model=None, KL_weight=5e-2, importance_weight_clipping_eps=torch.inf): 168 | 169 | mean_gamma_target, sigma_gamma_target, target_propensities = self.normal_pdf(observed_context, observed_gamma) 170 | 171 | # If not initialised, do a single round of on-policy REINFORCE 172 | # The issue is that without proper initialisation, propensities vanish 173 | if (self.loss_name == 'REINFORCE'): # or (not self.model_initialised) 174 | return (-target_propensities * utility).mean() 175 | 176 | elif self.loss_name == 'REINFORCE_offpolicy': 177 | importance_weights = target_propensities / logging_propensity 178 | return (-importance_weights * utility).mean() 179 | 180 | elif self.loss_name == 'TRPO': 181 | # https://arxiv.org/abs/1502.05477 182 | importance_weights = target_propensities / logging_propensity 183 | expected_utility = torch.mean(importance_weights * utility) 184 | KLdiv = (sigma_gamma_target**2 + (mean_gamma_target - observed_gamma)**2) / (2 * sigma_gamma_target**2) - 0.5 185 | # Simpler proxy for KL divergence 186 | # KLdiv = (mean_gamma_target - observed_gamma)**2 187 | return - expected_utility + KLdiv.mean() * KL_weight 188 | 189 | elif self.loss_name == 'PPO': 190 | # https://arxiv.org/pdf/1707.06347.pdf 191 | # NOTE: clipping is actually proposed in an additive manner 192 | importance_weights = target_propensities / logging_propensity 193 | clipped_importance_weights = torch.clip(importance_weights, 194 | min=1.0/importance_weight_clipping_eps, 195 | max=importance_weight_clipping_eps) 196 | return - torch.min(importance_weights * utility, clipped_importance_weights * utility).mean() 197 | 198 | elif self.loss_name == 'Doubly Robust': 199 | importance_weights = target_propensities / logging_propensity 200 | 201 | DR_IPS = (utility - utility_estimates) * torch.clip(importance_weights, min=1.0/importance_weight_clipping_eps, max=importance_weight_clipping_eps) 202 | 203 | dist = torch.distributions.normal.Normal( 204 | mean_gamma_target, 205 | sigma_gamma_target 206 | ) 207 | 208 | sampled_gamma = torch.clip(dist.rsample(), min=0.0, max=1.0) 209 | features_for_p_win = torch.hstack((observed_context, sampled_gamma.reshape(-1,1))) 210 | 211 | W = winrate_model(features_for_p_win).squeeze() 212 | 213 | V = observed_context[:,0].squeeze() * observed_context[:,1].squeeze() 214 | P = observed_context[:,0].squeeze() * observed_context[:,1].squeeze() * sampled_gamma 215 | 216 | DR_DM = W * (V - P) 217 | 218 | return -(DR_IPS + DR_DM).mean() 219 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import os 6 | import pandas as pd 7 | import seaborn as sns 8 | from collections import defaultdict 9 | from copy import deepcopy 10 | from tqdm import tqdm 11 | 12 | from Agent import Agent 13 | from AuctionAllocation import * # FirstPrice, SecondPrice 14 | from Auction import Auction 15 | from Bidder import * # EmpiricalShadedBidder, TruthfulBidder 16 | from BidderAllocation import * # LogisticTSAllocator, OracleAllocator 17 | 18 | 19 | def parse_kwargs(kwargs): 20 | parsed = ','.join([f'{key}={value}' for key, value in kwargs.items()]) 21 | return ',' + parsed if parsed else '' 22 | 23 | 24 | def parse_config(path): 25 | with open(path) as f: 26 | config = json.load(f) 27 | 28 | # Set up Random Number Generator 29 | rng = np.random.default_rng(config['random_seed']) 30 | np.random.seed(config['random_seed']) 31 | 32 | # Number of runs 33 | num_runs = config['num_runs'] if 'num_runs' in config.keys() else 1 34 | 35 | # Max. number of slots in every auction round 36 | # Multi-slot is currently not fully supported. 37 | max_slots = 1 38 | 39 | # Technical parameters for distribution of latent embeddings 40 | embedding_size = config['embedding_size'] 41 | embedding_var = config['embedding_var'] 42 | obs_embedding_size = config['obs_embedding_size'] 43 | 44 | # Expand agent-config if there are multiple copies 45 | agent_configs = [] 46 | num_agents = 0 47 | for agent_config in config['agents']: 48 | if 'num_copies' in agent_config.keys(): 49 | for i in range(1, agent_config['num_copies'] + 1): 50 | agent_config_copy = deepcopy(agent_config) 51 | agent_config_copy['name'] += f' {num_agents + 1}' 52 | agent_configs.append(agent_config_copy) 53 | num_agents += 1 54 | else: 55 | agent_configs.append(agent_config) 56 | num_agents += 1 57 | 58 | # First sample item catalog (so it is consistent over different configs with the same seed) 59 | # Agent : (item_embedding, item_value) 60 | agents2items = { 61 | agent_config['name']: rng.normal(0.0, embedding_var, size=(agent_config['num_items'], embedding_size)) 62 | for agent_config in agent_configs 63 | } 64 | 65 | agents2item_values = { 66 | agent_config['name']: rng.lognormal(0.1, 0.2, agent_config['num_items']) 67 | for agent_config in agent_configs 68 | } 69 | 70 | # Add intercepts to embeddings (Uniformly in [-4.5, -1.5], this gives nicer distributions for P(click)) 71 | for agent, items in agents2items.items(): 72 | agents2items[agent] = np.hstack((items, - 3.0 - 1.0 * rng.random((items.shape[0], 1)))) 73 | 74 | return rng, config, agent_configs, agents2items, agents2item_values, num_runs, max_slots, embedding_size, embedding_var, obs_embedding_size 75 | 76 | 77 | def instantiate_agents(rng, agent_configs, agents2item_values, agents2items): 78 | # Store agents to be re-instantiated in subsequent runs 79 | # Set up agents 80 | agents = [ 81 | Agent(rng=rng, 82 | name=agent_config['name'], 83 | num_items=agent_config['num_items'], 84 | item_values=agents2item_values[agent_config['name']], 85 | allocator=eval(f"{agent_config['allocator']['type']}(rng=rng{parse_kwargs(agent_config['allocator']['kwargs'])})"), 86 | bidder=eval(f"{agent_config['bidder']['type']}(rng=rng{parse_kwargs(agent_config['bidder']['kwargs'])})"), 87 | memory=(0 if 'memory' not in agent_config.keys() else agent_config['memory'])) 88 | for agent_config in agent_configs 89 | ] 90 | 91 | for agent in agents: 92 | if isinstance(agent.allocator, OracleAllocator): 93 | agent.allocator.update_item_embeddings(agents2items[agent.name]) 94 | 95 | return agents 96 | 97 | 98 | def instantiate_auction(rng, config, agents2items, agents2item_values, agents, max_slots, embedding_size, embedding_var, obs_embedding_size): 99 | return (Auction(rng, 100 | eval(f"{config['allocation']}()"), 101 | agents, 102 | agents2items, 103 | agents2item_values, 104 | max_slots, 105 | embedding_size, 106 | embedding_var, 107 | obs_embedding_size, 108 | config['num_participants_per_round']), 109 | config['num_iter'], config['rounds_per_iter'], config['output_dir']) 110 | 111 | 112 | def simulation_run(): 113 | for i in range(num_iter): 114 | print(f'==== ITERATION {i} ====') 115 | 116 | for _ in tqdm(range(rounds_per_iter)): 117 | auction.simulate_opportunity() 118 | 119 | names = [agent.name for agent in auction.agents] 120 | net_utilities = [agent.net_utility for agent in auction.agents] 121 | gross_utilities = [agent.gross_utility for agent in auction.agents] 122 | 123 | result = pd.DataFrame({'Name': names, 'Net': net_utilities, 'Gross': gross_utilities}) 124 | 125 | print(result) 126 | print(f'\tAuction revenue: \t {auction.revenue}') 127 | 128 | for agent_id, agent in enumerate(auction.agents): 129 | agent.update(iteration=i, plot=True, figsize=FIGSIZE, fontsize=FONTSIZE) 130 | 131 | agent2net_utility[agent.name].append(agent.net_utility) 132 | agent2gross_utility[agent.name].append(agent.gross_utility) 133 | 134 | agent2allocation_regret[agent.name].append(agent.get_allocation_regret()) 135 | agent2estimation_regret[agent.name].append(agent.get_estimation_regret()) 136 | agent2overbid_regret[agent.name].append(agent.get_overbid_regret()) 137 | agent2underbid_regret[agent.name].append(agent.get_underbid_regret()) 138 | 139 | agent2CTR_RMSE[agent.name].append(agent.get_CTR_RMSE()) 140 | agent2CTR_bias[agent.name].append(agent.get_CTR_bias()) 141 | 142 | if isinstance(agent.bidder, PolicyLearningBidder) or isinstance(agent.bidder, DoublyRobustBidder): 143 | agent2gamma[agent.name].append(torch.mean(torch.Tensor(agent.bidder.gammas)).detach().item()) 144 | elif not agent.bidder.truthful: 145 | agent2gamma[agent.name].append(np.mean(agent.bidder.gammas)) 146 | 147 | best_expected_value = np.mean([opp.best_expected_value for opp in agent.logs]) 148 | agent2best_expected_value[agent.name].append(best_expected_value) 149 | 150 | print('Average Best Value for Agent: ', best_expected_value) 151 | agent.clear_utility() 152 | agent.clear_logs() 153 | 154 | auction_revenue.append(auction.revenue) 155 | auction.clear_revenue() 156 | 157 | if __name__ == '__main__': 158 | # Parse commandline arguments 159 | parser = argparse.ArgumentParser() 160 | parser.add_argument('config', type=str, help='Path to experiment configuration file') 161 | args = parser.parse_args() 162 | 163 | # Parse configuration file 164 | rng, config, agent_configs, agents2items, agents2item_values, num_runs, max_slots, embedding_size, embedding_var, obs_embedding_size = parse_config(args.config) 165 | 166 | # Plotting config 167 | FIGSIZE = (8, 5) 168 | FONTSIZE = 14 169 | 170 | # Placeholders for summary statistics over all runs 171 | run2agent2net_utility = {} 172 | run2agent2gross_utility = {} 173 | run2agent2allocation_regret = {} 174 | run2agent2estimation_regret = {} 175 | run2agent2overbid_regret = {} 176 | run2agent2underbid_regret = {} 177 | run2agent2best_expected_value = {} 178 | 179 | run2agent2CTR_RMSE = {} 180 | run2agent2CTR_bias = {} 181 | run2agent2gamma = {} 182 | 183 | run2auction_revenue = {} 184 | 185 | # Repeated runs 186 | for run in range(num_runs): 187 | # Reinstantiate agents and auction per run 188 | agents = instantiate_agents(rng, agent_configs, agents2item_values, agents2items) 189 | auction, num_iter, rounds_per_iter, output_dir = instantiate_auction(rng, config, agents2items, agents2item_values, agents, max_slots, embedding_size, embedding_var, obs_embedding_size) 190 | 191 | # Placeholders for summary statistics per run 192 | agent2net_utility = defaultdict(list) 193 | agent2gross_utility = defaultdict(list) 194 | agent2allocation_regret = defaultdict(list) 195 | agent2estimation_regret = defaultdict(list) 196 | agent2overbid_regret = defaultdict(list) 197 | agent2underbid_regret = defaultdict(list) 198 | agent2best_expected_value = defaultdict(list) 199 | 200 | agent2CTR_RMSE = defaultdict(list) 201 | agent2CTR_bias = defaultdict(list) 202 | agent2gamma = defaultdict(list) 203 | 204 | auction_revenue = [] 205 | 206 | # Run simulation (with global parameters -- fine for the purposes of this script) 207 | simulation_run() 208 | 209 | # Store 210 | run2agent2net_utility[run] = agent2net_utility 211 | run2agent2gross_utility[run] = agent2gross_utility 212 | run2agent2allocation_regret[run] = agent2allocation_regret 213 | run2agent2estimation_regret[run] = agent2estimation_regret 214 | run2agent2overbid_regret[run] = agent2overbid_regret 215 | run2agent2underbid_regret[run] = agent2underbid_regret 216 | run2agent2best_expected_value[run] = agent2best_expected_value 217 | 218 | run2agent2CTR_RMSE[run] = agent2CTR_RMSE 219 | run2agent2CTR_bias[run] = agent2CTR_bias 220 | run2agent2gamma[run] = agent2gamma 221 | 222 | run2auction_revenue[run] = auction_revenue 223 | 224 | # Make sure we can write results 225 | if not os.path.exists(output_dir): 226 | os.makedirs(output_dir) 227 | 228 | def measure_per_agent2df(run2agent2measure, measure_name): 229 | df_rows = {'Run': [], 'Agent': [], 'Iteration': [], measure_name: []} 230 | for run, agent2measure in run2agent2measure.items(): 231 | for agent, measures in agent2measure.items(): 232 | for iteration, measure in enumerate(measures): 233 | df_rows['Run'].append(run) 234 | df_rows['Agent'].append(agent) 235 | df_rows['Iteration'].append(iteration) 236 | df_rows[measure_name].append(measure) 237 | return pd.DataFrame(df_rows) 238 | 239 | def plot_measure_per_agent(run2agent2measure, measure_name, cumulative=False, log_y=False, yrange=None, optimal=None): 240 | # Generate DataFrame for Seaborn 241 | if type(run2agent2measure) != pd.DataFrame: 242 | df = measure_per_agent2df(run2agent2measure, measure_name) 243 | else: 244 | df = run2agent2measure 245 | 246 | fig, axes = plt.subplots(figsize=FIGSIZE) 247 | plt.title(f'{measure_name} Over Time', fontsize=FONTSIZE + 2) 248 | min_measure, max_measure = 0.0, 0.0 249 | sns.lineplot(data=df, x="Iteration", y=measure_name, hue="Agent", ax=axes) 250 | plt.xticks(fontsize=FONTSIZE - 2) 251 | plt.ylabel(f'{measure_name}', fontsize=FONTSIZE) 252 | if optimal is not None: 253 | plt.axhline(optimal, ls='--', color='gray', label='Optimal') 254 | min_measure = min(min_measure, optimal) 255 | if log_y: 256 | plt.yscale('log') 257 | if yrange is None: 258 | factor = 1.1 if min_measure < 0 else 0.9 259 | # plt.ylim(min_measure * factor, max_measure * 1.1) 260 | else: 261 | plt.ylim(yrange[0], yrange[1]) 262 | plt.yticks(fontsize=FONTSIZE - 2) 263 | plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3) 264 | plt.legend(loc='upper left', bbox_to_anchor=(-.05, -.15), fontsize=FONTSIZE, ncol=3) 265 | plt.tight_layout() 266 | plt.savefig(f"{output_dir}/{measure_name.replace(' ', '_')}_{rounds_per_iter}_rounds_{num_iter}_iters_{num_runs}_runs_{obs_embedding_size}_emb_of_{embedding_size}.pdf", bbox_inches='tight') 267 | # plt.show() 268 | return df 269 | 270 | net_utility_df = plot_measure_per_agent(run2agent2net_utility, 'Net Utility').sort_values(['Agent', 'Run', 'Iteration']) 271 | net_utility_df.to_csv(f'{output_dir}/net_utility_{rounds_per_iter}_rounds_{num_iter}_iters_{num_runs}_runs_{obs_embedding_size}_emb_of_{embedding_size}.csv', index=False) 272 | 273 | net_utility_df['Net Utility (Cumulative)'] = net_utility_df.groupby(['Agent', 'Run'])['Net Utility'].cumsum() 274 | plot_measure_per_agent(net_utility_df, 'Net Utility (Cumulative)') 275 | 276 | gross_utility_df = plot_measure_per_agent(run2agent2gross_utility, 'Gross Utility').sort_values(['Agent', 'Run', 'Iteration']) 277 | gross_utility_df.to_csv(f'{output_dir}/gross_utility_{rounds_per_iter}_rounds_{num_iter}_iters_{num_runs}_runs_{obs_embedding_size}_emb_of_{embedding_size}.csv', index=False) 278 | 279 | gross_utility_df['Gross Utility (Cumulative)'] = gross_utility_df.groupby(['Agent', 'Run'])['Gross Utility'].cumsum() 280 | plot_measure_per_agent(gross_utility_df, 'Gross Utility (Cumulative)') 281 | 282 | plot_measure_per_agent(run2agent2best_expected_value, 'Mean Expected Value for Top Ad') 283 | 284 | plot_measure_per_agent(run2agent2allocation_regret, 'Allocation Regret') 285 | plot_measure_per_agent(run2agent2estimation_regret, 'Estimation Regret') 286 | overbid_regret_df = plot_measure_per_agent(run2agent2overbid_regret, 'Overbid Regret') 287 | overbid_regret_df.to_csv(f'{output_dir}/overbid_regret_{rounds_per_iter}_rounds_{num_iter}_iters_{num_runs}_runs_{obs_embedding_size}_emb_of_{embedding_size}.csv', index=False) 288 | underbid_regret_df = plot_measure_per_agent(run2agent2underbid_regret, 'Underbid Regret') 289 | underbid_regret_df.to_csv(f'{output_dir}/underbid_regret_{rounds_per_iter}_rounds_{num_iter}_iters_{num_runs}_runs_{obs_embedding_size}_emb_of_{embedding_size}.csv', index=False) 290 | 291 | plot_measure_per_agent(run2agent2CTR_RMSE, 'CTR RMSE', log_y=True) 292 | plot_measure_per_agent(run2agent2CTR_bias, 'CTR Bias', optimal=1.0) #, yrange=(.5, 5.0)) 293 | 294 | shading_factor_df = plot_measure_per_agent(run2agent2gamma, 'Shading Factors') 295 | 296 | def measure2df(run2measure, measure_name): 297 | df_rows = {'Run': [], 'Iteration': [], measure_name: []} 298 | for run, measures in run2measure.items(): 299 | for iteration, measure in enumerate(measures): 300 | df_rows['Run'].append(run) 301 | df_rows['Iteration'].append(iteration) 302 | df_rows[measure_name].append(measure) 303 | return pd.DataFrame(df_rows) 304 | 305 | def plot_measure_overall(run2measure, measure_name): 306 | # Generate DataFrame for Seaborn 307 | if type(run2measure) != pd.DataFrame: 308 | df = measure2df(run2measure, measure_name) 309 | else: 310 | df = run2measure 311 | fig, axes = plt.subplots(figsize=FIGSIZE) 312 | plt.title(f'{measure_name} Over Time', fontsize=FONTSIZE + 2) 313 | sns.lineplot(data=df, x="Iteration", y=measure_name, ax=axes) 314 | min_measure = min(0.0, np.min(df[measure_name])) 315 | max_measure = max(0.0, np.max(df[measure_name])) 316 | plt.xlabel('Iteration', fontsize=FONTSIZE) 317 | plt.xticks(fontsize=FONTSIZE - 2) 318 | plt.ylabel(f'{measure_name}', fontsize=FONTSIZE) 319 | factor = 1.1 if min_measure < 0 else 0.9 320 | plt.ylim(min_measure * factor, max_measure * 1.1) 321 | plt.yticks(fontsize=FONTSIZE - 2) 322 | plt.grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3) 323 | plt.tight_layout() 324 | plt.savefig(f"{output_dir}/{measure_name.replace(' ', '_')}_{rounds_per_iter}_rounds_{num_iter}_iters_{num_runs}_runs_{obs_embedding_size}_emb_of_{embedding_size}.pdf", bbox_inches='tight') 325 | # plt.show() 326 | return df 327 | 328 | auction_revenue_df = plot_measure_overall(run2auction_revenue, 'Auction Revenue') 329 | 330 | net_utility_df_overall = net_utility_df.groupby(['Run', 'Iteration'])['Net Utility'].sum().reset_index().rename(columns={'Net Utility': 'Social Surplus'}) 331 | plot_measure_overall(net_utility_df_overall, 'Social Surplus') 332 | 333 | gross_utility_df_overall = gross_utility_df.groupby(['Run', 'Iteration'])['Gross Utility'].sum().reset_index().rename(columns={'Gross Utility': 'Social Welfare'}) 334 | plot_measure_overall(gross_utility_df_overall, 'Social Welfare') 335 | 336 | auction_revenue_df['Measure Name'] = 'Auction Revenue' 337 | net_utility_df_overall['Measure Name'] = 'Social Surplus' 338 | gross_utility_df_overall['Measure Name'] = 'Social Welfare' 339 | 340 | columns = ['Run', 'Iteration', 'Measure', 'Measure Name'] 341 | auction_revenue_df.columns = columns 342 | net_utility_df_overall.columns = columns 343 | gross_utility_df_overall.columns = columns 344 | 345 | pd.concat((auction_revenue_df, net_utility_df_overall, gross_utility_df_overall)).to_csv(f'{output_dir}/results_{rounds_per_iter}_rounds_{num_iter}_iters_{num_runs}_runs_{obs_embedding_size}_emb_of_{embedding_size}.csv', index=False) 346 | -------------------------------------------------------------------------------- /src/Bidder.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import scipy.stats 4 | import torch 5 | 6 | from sklearn.gaussian_process import GaussianProcessRegressor 7 | from sklearn.gaussian_process.kernels import RBF 8 | from sklearn.metrics import roc_auc_score 9 | from tqdm import tqdm 10 | 11 | from Impression import ImpressionOpportunity 12 | from Models import BidShadingContextualBandit, BidShadingPolicy, PyTorchWinRateEstimator 13 | 14 | 15 | class Bidder: 16 | """ Bidder base class""" 17 | def __init__(self, rng): 18 | self.rng = rng 19 | self.truthful = False # Default 20 | 21 | def update(self, contexts, values, bids, prices, outcomes, estimated_CTRs, won_mask, iteration, plot, figsize, fontsize, name): 22 | pass 23 | 24 | def clear_logs(self, memory): 25 | pass 26 | 27 | 28 | class TruthfulBidder(Bidder): 29 | """ A bidder that bids truthfully """ 30 | def __init__(self, rng): 31 | super(TruthfulBidder, self).__init__(rng) 32 | self.truthful = True 33 | 34 | def bid(self, value, context, estimated_CTR): 35 | return value * estimated_CTR 36 | 37 | 38 | class EmpiricalShadedBidder(Bidder): 39 | """ A bidder that learns a single bidding factor gamma from past data """ 40 | 41 | def __init__(self, rng, gamma_sigma, init_gamma=1.0): 42 | self.gamma_sigma = gamma_sigma 43 | self.prev_gamma = init_gamma 44 | self.gammas = [] 45 | super(EmpiricalShadedBidder, self).__init__(rng) 46 | 47 | def bid(self, value, context, estimated_CTR): 48 | # Compute the bid as expected value 49 | bid = value * estimated_CTR 50 | # Sample the shade factor gamma 51 | gamma = self.rng.normal(self.prev_gamma, self.gamma_sigma) 52 | if gamma < 0.0: 53 | gamma = 0.0 54 | if gamma > 1.0: 55 | gamma = 1.0 56 | bid *= gamma 57 | self.gammas.append(gamma) 58 | return bid 59 | 60 | def update(self, contexts, values, bids, prices, outcomes, estimated_CTRs, won_mask, iteration, plot, figsize, fontsize, name): 61 | # Compute net utility 62 | utilities = np.zeros_like(values) 63 | utilities[won_mask] = (values[won_mask] * outcomes[won_mask]) - prices[won_mask] 64 | 65 | # Extract shading factors to numpy 66 | gammas = np.array(self.gammas) 67 | 68 | if plot: 69 | _,_=plt.subplots(figsize=figsize) 70 | plt.title('Raw observations',fontsize=fontsize+2) 71 | plt.scatter(gammas,utilities, alpha=.25) 72 | plt.xlabel(r'Shading factor ($\gamma$)',fontsize=fontsize) 73 | plt.ylabel('Net Utility',fontsize=fontsize) 74 | plt.xticks(fontsize=fontsize-2) 75 | plt.yticks(fontsize=fontsize-2) 76 | # plt.show() 77 | 78 | # We want to be able to estimate utility for any other continuous value, but this is a hassle in continuous space. 79 | # Instead -- we'll bucketise and look at the empirical utility distribution within every bucket 80 | min_gamma, max_gamma = np.min(gammas), np.max(gammas) 81 | grid_delta = .005 82 | num_buckets = int((max_gamma-min_gamma) // grid_delta) + 1 83 | buckets = np.linspace(min_gamma, max_gamma, num_buckets) 84 | x = [] 85 | estimated_y_mean = [] 86 | estimated_y_stderr = [] 87 | bucket_lo = buckets[0] 88 | for idx, bucket_hi in enumerate(buckets[1:]): 89 | # Mean of the bucket 90 | x.append((bucket_hi-bucket_lo)/2.0 + bucket_lo) 91 | # Only look at samples within this range 92 | mask = np.logical_and(gammas < bucket_hi, bucket_lo <= gammas) 93 | # If we can draw meaningful inferences 94 | num_samples = len(utilities[mask]) 95 | if num_samples > 1: 96 | # Extrapolate mean utility from these samples 97 | bucket_utility = utilities[mask].mean() 98 | estimated_y_mean.append(bucket_utility) 99 | # Compute standard error on utility estimate 100 | estimated_y_stderr.append(np.std(utilities[mask]) / np.sqrt(num_samples)) 101 | else: 102 | estimated_y_mean.append(np.nan) 103 | estimated_y_stderr.append(np.nan) 104 | # Move sliding window for bucket 105 | bucket_lo = bucket_hi 106 | # To NumPy format 107 | x = np.asarray(x) 108 | estimated_y_mean = np.asarray(estimated_y_mean) 109 | estimated_y_stderr = np.asarray(estimated_y_stderr) 110 | 111 | # This is relatively high because we underestimate total variance 112 | # (1) Variance from click ~ Bernoulli(p) 113 | # (2) Variance from uncertainty about winning the auction 114 | critical_value = 1.96 115 | U_lower_bound = estimated_y_mean - critical_value * estimated_y_stderr 116 | 117 | # Move the mean of the policy towards the empirically best value 118 | # Search the array in reverse so we take the highest value in case of ties 119 | best_idx = len(x) - np.nanargmax(U_lower_bound[::-1]) - 1 120 | best_gamma = x[best_idx] 121 | if best_gamma < 0: 122 | best_gamma = 0 123 | if best_gamma > 1.0: 124 | best_gamma = 1.0 125 | self.prev_gamma = best_gamma 126 | 127 | if plot: 128 | fig, axes = plt.subplots(figsize=figsize) 129 | plt.suptitle(name, fontsize=fontsize+2) 130 | plt.title(f'Iteration: {iteration}', fontsize=fontsize) 131 | plt.plot(x, estimated_y_mean, label='Estimate', ls='--', color='red') 132 | plt.fill_between(x, 133 | estimated_y_mean - critical_value * estimated_y_stderr, 134 | estimated_y_mean + critical_value * estimated_y_stderr, 135 | alpha=.25, 136 | color='red', 137 | label='C.I.') 138 | plt.axvline(best_gamma, ls='--', color='gray', label='Best') 139 | plt.axhline(0, ls='-.', color='gray') 140 | plt.xlabel(r'Multiplicative Bid Shading Factor ($\gamma$)', fontsize=fontsize) 141 | plt.ylabel('Estimated Net Utility', fontsize=fontsize) 142 | plt.ylim(-1.0, 2.0) 143 | plt.xticks(fontsize=fontsize-2) 144 | plt.yticks(fontsize=fontsize-2) 145 | plt.legend(fontsize=fontsize) 146 | plt.tight_layout() 147 | #plt.show() 148 | 149 | def clear_logs(self, memory): 150 | if not memory: 151 | self.gammas = [] 152 | else: 153 | self.gammas = self.gammas[-memory:] 154 | 155 | 156 | class ValueLearningBidder(Bidder): 157 | """ A bidder that estimates the optimal bid shading distribution via value learning """ 158 | 159 | def __init__(self, rng, gamma_sigma, init_gamma=1.0, inference='search'): 160 | self.gamma_sigma = gamma_sigma 161 | self.prev_gamma = init_gamma 162 | assert inference in ['search', 'policy'] 163 | self.inference = inference 164 | self.gammas = [] 165 | self.propensities = [] 166 | self.winrate_model = PyTorchWinRateEstimator() 167 | self.bidding_policy = BidShadingPolicy() if inference == 'policy' else None 168 | self.model_initialised = False 169 | super(ValueLearningBidder, self).__init__(rng) 170 | 171 | def bid(self, value, context, estimated_CTR): 172 | # Compute the bid as expected value 173 | bid = value * estimated_CTR 174 | if not self.model_initialised: 175 | # Option 1: 176 | # Sample the bid shadin factor 'gamma' from a Gaussian 177 | gamma = self.rng.normal(self.prev_gamma, self.gamma_sigma) 178 | normal_pdf = lambda g: np.exp(-((self.prev_gamma - g) / self.gamma_sigma)**2/2) / (self.gamma_sigma * np.sqrt(2 * np.pi)) 179 | propensity = normal_pdf(gamma) 180 | elif self.inference == 'search': 181 | # Option 2: 182 | # Predict P(win|gamma,value,P(click)) 183 | # Use it to predict utility, maximise utility 184 | n_values_search = 128 185 | gamma_grid = self.rng.uniform(0.1, 1.0, size=n_values_search) 186 | gamma_grid.sort() 187 | x = torch.Tensor(np.hstack((np.tile(estimated_CTR, (n_values_search, 1)), np.tile(value, (n_values_search, 1)), gamma_grid.reshape(-1,1)))) 188 | 189 | prob_win = self.winrate_model(x).detach().numpy().ravel() 190 | 191 | # U = W (V - P) 192 | expected_value = bid 193 | shaded_bids = expected_value * gamma_grid 194 | estimated_utility = prob_win * (expected_value - shaded_bids) 195 | gamma = gamma_grid[np.argmax(estimated_utility)] 196 | propensity = 1.0 197 | 198 | elif self.inference == 'policy': 199 | # Option 3: sample from the learnt policy instead of brute force searching 200 | x = torch.Tensor([estimated_CTR, value]) 201 | with torch.no_grad(): 202 | gamma, propensity = self.bidding_policy(x) 203 | gamma = gamma.detach().item() 204 | 205 | bid *= gamma 206 | self.gammas.append(gamma) 207 | self.propensities.append(propensity) 208 | return bid 209 | 210 | def update(self, contexts, values, bids, prices, outcomes, estimated_CTRs, won_mask, iteration, plot, figsize, fontsize, name): 211 | # FALLBACK: if you lost every auction you participated in, your model collapsed 212 | # Revert to not shading for 1 round, to collect data with informational value 213 | if not won_mask.astype(np.uint8).sum(): 214 | self.model_initialised = False 215 | print(f'! Fallback for {name}') 216 | return 217 | 218 | # Compute net utility 219 | utilities = np.zeros_like(values) 220 | utilities[won_mask] = (values[won_mask] * outcomes[won_mask]) - prices[won_mask] 221 | utilities = torch.Tensor(utilities) 222 | 223 | # Augment data with samples: if you shade 100%, you will lose 224 | # If you won now, you would have also won if you bid higher 225 | X = np.hstack((estimated_CTRs.reshape(-1,1), values.reshape(-1,1), np.array(self.gammas).reshape(-1, 1))) 226 | 227 | X_aug_neg = X.copy() 228 | X_aug_neg[:, -1] = 0.0 229 | 230 | X_aug_pos = X[won_mask].copy() 231 | X_aug_pos[:, -1] = np.maximum(X_aug_pos[:, -1], 1.0) 232 | 233 | X = torch.Tensor(np.vstack((X, X_aug_neg))) 234 | 235 | y = won_mask.astype(np.uint8).reshape(-1,1) 236 | y = torch.Tensor(np.concatenate((y, np.zeros_like(y)))) 237 | 238 | # Fit the model 239 | self.winrate_model.train() 240 | epochs = 8192 * 4 241 | lr = 3e-3 242 | optimizer = torch.optim.Adam(self.winrate_model.parameters(), lr=lr, weight_decay=1e-6, amsgrad=True) 243 | scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=100, min_lr=1e-7, factor=0.1, verbose=True) 244 | criterion = torch.nn.BCELoss() 245 | losses = [] 246 | best_epoch, best_loss = -1, np.inf 247 | for epoch in tqdm(range(int(epochs)), desc=f'{name}'): 248 | optimizer.zero_grad() 249 | pred_y = self.winrate_model(X) 250 | loss = criterion(pred_y, y) 251 | loss.backward() 252 | optimizer.step() 253 | losses.append(loss.item()) 254 | scheduler.step(loss) 255 | if (best_loss - losses[-1]) > 1e-6: 256 | best_epoch = epoch 257 | best_loss = losses[-1] 258 | elif epoch - best_epoch > 512: 259 | print(f'Stopping at Epoch {epoch}') 260 | break 261 | 262 | losses = np.array(losses) 263 | 264 | self.winrate_model.eval() 265 | fig, ax = plt.subplots() 266 | plt.title(f'{name}') 267 | plt.plot(losses, label=r'P(win|$gamma$,x)') 268 | plt.ylabel('Loss') 269 | plt.legend() 270 | fig.set_tight_layout(True) 271 | # plt.show() 272 | 273 | # Predict Utility -- \hat{u} 274 | orig_features = torch.Tensor(np.hstack((estimated_CTRs.reshape(-1,1), values.reshape(-1,1), np.array(self.gammas).reshape(-1, 1)))) 275 | W = self.winrate_model(orig_features).squeeze().detach().numpy() 276 | print('AUC predicting P(win):\t\t\t\t', roc_auc_score(won_mask.astype(np.uint8), W)) 277 | 278 | if self.inference == 'policy': 279 | # Learn a policy to maximise E[U | bid] where bid ~ policy 280 | X = torch.Tensor(np.hstack((estimated_CTRs.reshape(-1,1), values.reshape(-1,1)))) 281 | 282 | self.bidding_policy.train() 283 | epochs = 8192 * 2 284 | lr = 2e-3 285 | optimizer = torch.optim.Adam(self.bidding_policy.parameters(), lr=lr, weight_decay=1e-6, amsgrad=True) 286 | scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=100, min_lr=1e-7, factor=0.1, verbose=True) 287 | losses = [] 288 | best_epoch, best_loss = -1, np.inf 289 | for epoch in tqdm(range(int(epochs)), desc=f'{name}'): 290 | optimizer.zero_grad() 291 | # Sample bid shading values 292 | sampled_gamma, propensities = self.bidding_policy(X) 293 | 294 | # Add them to input for win probability model 295 | X_with_gamma = torch.hstack((X, sampled_gamma)) 296 | 297 | # Estimate utility for these sampled bid shading values 298 | prob_win = self.winrate_model(X_with_gamma).squeeze() 299 | values = X_with_gamma[:, 0].squeeze() * X_with_gamma[:, 1].squeeze() 300 | prices = values * sampled_gamma.squeeze() 301 | 302 | estimated_utility = -(prob_win * (values - prices)).mean() 303 | estimated_utility.backward() 304 | optimizer.step() 305 | 306 | losses.append(estimated_utility.item()) 307 | scheduler.step(estimated_utility) 308 | if (best_loss - losses[-1]) > 1e-6: 309 | best_epoch = epoch 310 | best_loss = losses[-1] 311 | elif epoch - best_epoch > 256: 312 | print(f'Stopping at Epoch {epoch}') 313 | break 314 | 315 | losses = np.array(losses) 316 | self.bidding_policy.eval() 317 | fig, ax = plt.subplots() 318 | plt.title(f'{name}') 319 | plt.plot(losses, label=r'$\pi(\gamma)$') 320 | plt.ylabel('- Estimated Expected Utility') 321 | plt.legend() 322 | fig.set_tight_layout(True) 323 | #plt.show() 324 | 325 | self.model_initialised = True 326 | 327 | def clear_logs(self, memory): 328 | if not memory: 329 | self.gammas = [] 330 | self.propensities = [] 331 | else: 332 | self.gammas = self.gammas[-memory:] 333 | self.propensities = self.propensities[-memory:] 334 | 335 | 336 | class PolicyLearningBidder(Bidder): 337 | """ A bidder that estimates the optimal bid shading distribution via policy learning """ 338 | 339 | def __init__(self, rng, gamma_sigma, loss, init_gamma=1.0): 340 | self.gamma_sigma = gamma_sigma 341 | self.prev_gamma = init_gamma 342 | self.gammas = [] 343 | self.propensities = [] 344 | self.model = BidShadingContextualBandit(loss) 345 | self.model_initialised = False 346 | super(PolicyLearningBidder, self).__init__(rng) 347 | 348 | def bid(self, value, context, estimated_CTR): 349 | # Compute the bid as expected value 350 | bid = value * estimated_CTR 351 | if not self.model_initialised: 352 | # Option 1: 353 | # Sample the bid shading factor 'gamma' from a Gaussian 354 | gamma = self.rng.normal(self.prev_gamma, self.gamma_sigma) 355 | normal_pdf = lambda g: np.exp(-((self.prev_gamma - g) / self.gamma_sigma)**2/2) / (self.gamma_sigma * np.sqrt(2 * np.pi)) 356 | propensity = normal_pdf(gamma) 357 | else: 358 | # Option 2: 359 | # Sample from the contextual bandit 360 | x = torch.Tensor([estimated_CTR, value]) 361 | gamma, propensity = self.model(x) 362 | gamma = torch.clip(gamma, 0.0, 1.0) 363 | 364 | bid *= gamma.detach().item() if self.model_initialised else gamma 365 | self.gammas.append(gamma) 366 | self.propensities.append(propensity) 367 | return bid 368 | 369 | def update(self, contexts, values, bids, prices, outcomes, estimated_CTRs, won_mask, iteration, plot, figsize, fontsize, name): 370 | # Compute net utility 371 | utilities = np.zeros_like(values) 372 | utilities[won_mask] = (values[won_mask] * outcomes[won_mask]) - prices[won_mask] 373 | utilities = torch.Tensor(utilities) 374 | 375 | # Extract shading factors to torch 376 | gammas = torch.Tensor(self.gammas) 377 | 378 | # Prepare features 379 | X = torch.Tensor(np.hstack((estimated_CTRs.reshape(-1,1), values.reshape(-1,1)))) 380 | 381 | if not self.model_initialised: 382 | self.model.initialise_policy(X, gammas) 383 | 384 | # Ensure we don't have propensities that are rounded to zero 385 | propensities = torch.clip(torch.Tensor(self.propensities), min=1e-15) 386 | 387 | # Fit the model 388 | self.model.train() 389 | epochs = 8192 * 2 390 | lr = 2e-3 391 | optimizer = torch.optim.Adam(self.model.parameters(), lr=lr, weight_decay=1e-4, amsgrad=True) 392 | scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=100, min_lr=1e-8, factor=0.2, verbose=True) 393 | 394 | losses = [] 395 | best_epoch, best_loss = -1, np.inf 396 | for epoch in tqdm(range(int(epochs)), desc=f'{name}'): 397 | optimizer.zero_grad() # Setting our stored gradients equal to zero 398 | loss = self.model.loss(X, gammas, propensities, utilities, importance_weight_clipping_eps=50.0) 399 | loss.backward() # Computes the gradient of the given tensor w.r.t. the weights/bias 400 | optimizer.step() # Updates weights and biases with the optimizer (SGD) 401 | losses.append(loss.item()) 402 | scheduler.step(loss) 403 | 404 | if (best_loss - losses[-1]) > 1e-6: 405 | best_epoch = epoch 406 | best_loss = losses[-1] 407 | elif epoch - best_epoch > 512: 408 | print(f'Stopping at Epoch {epoch}') 409 | break 410 | 411 | losses = np.array(losses) 412 | if np.isnan(losses).any(): 413 | print('NAN DETECTED! in losses') 414 | print(list(losses)) 415 | print(np.isnan(X.detach().numpy()).any(), X) 416 | print(np.isnan(gammas.detach().numpy()).any(), gammas) 417 | print(np.isnan(propensities.detach().numpy()).any(), propensities) 418 | print(np.isnan(utilities.detach().numpy()).any(), utilities) 419 | exit(1) 420 | 421 | self.model.eval() 422 | expected_utility = -self.model.loss(X, gammas, propensities, utilities, KL_weight=0.0).detach().numpy() 423 | print('Expected utility:', expected_utility) 424 | 425 | pred_gammas, _ = self.model(X) 426 | pred_gammas = pred_gammas.detach().numpy() 427 | print(name, 'Number of samples: ', X.shape) 428 | print(name, 'Predicted Gammas: ', pred_gammas.min(), pred_gammas.max(), pred_gammas.mean()) 429 | 430 | self.model_initialised = True 431 | self.model.model_initialised = True 432 | 433 | def clear_logs(self, memory): 434 | if not memory: 435 | self.gammas = [] 436 | self.propensities = [] 437 | else: 438 | self.gammas = self.gammas[-memory:] 439 | self.propensities = self.propensities[-memory:] 440 | 441 | 442 | class DoublyRobustBidder(Bidder): 443 | """ A bidder that estimates the optimal bid shading distribution with a Doubly Robust Estimator """ 444 | 445 | def __init__(self, rng, gamma_sigma, init_gamma=1.0): 446 | self.gamma_sigma = gamma_sigma 447 | self.prev_gamma = init_gamma 448 | self.gammas = [] 449 | self.propensities = [] 450 | self.winrate_model = PyTorchWinRateEstimator() 451 | self.bidding_policy = BidShadingContextualBandit(loss='Doubly Robust', winrate_model=self.winrate_model) 452 | self.model_initialised = False 453 | super(DoublyRobustBidder, self).__init__(rng) 454 | 455 | def bid(self, value, context, estimated_CTR): 456 | # Compute the bid as expected value 457 | bid = value * estimated_CTR 458 | if not self.model_initialised: 459 | # Option 1: 460 | # Sample the bid shading factor 'gamma' from a Gaussian 461 | gamma = self.rng.normal(self.prev_gamma, self.gamma_sigma) 462 | normal_pdf = lambda g: np.exp(-((self.prev_gamma - g) / self.gamma_sigma)**2/2) / (self.gamma_sigma * np.sqrt(2 * np.pi)) 463 | propensity = normal_pdf(gamma) 464 | else: 465 | # Option 2: 466 | # Sample from the contextual bandit 467 | x = torch.Tensor([estimated_CTR, value]) 468 | with torch.no_grad(): 469 | gamma, propensity = self.bidding_policy(x) 470 | gamma = torch.clip(gamma, 0.0, 1.0) 471 | 472 | bid *= gamma.detach().item() if self.model_initialised else gamma 473 | self.gammas.append(gamma) 474 | self.propensities.append(propensity) 475 | return bid 476 | 477 | def update(self, contexts, values, bids, prices, outcomes, estimated_CTRs, won_mask, iteration, plot, figsize, fontsize, name): 478 | # Compute net utility 479 | utilities = np.zeros_like(values) 480 | utilities[won_mask] = (values[won_mask] * outcomes[won_mask]) - prices[won_mask] 481 | # utilities = torch.Tensor(utilities) 482 | 483 | ############################## 484 | # 1. TRAIN UTILITY ESTIMATOR # 485 | ############################## 486 | gammas_numpy = np.array([g.detach().item() if self.model_initialised else g for g in self.gammas]) 487 | if self.model_initialised: 488 | # Predict Utility -- \hat{u} 489 | orig_features = torch.Tensor(np.hstack((estimated_CTRs.reshape(-1,1), values.reshape(-1,1), gammas_numpy.reshape(-1, 1)))) 490 | W = self.winrate_model(orig_features).squeeze().detach().numpy() 491 | print('AUC predicting P(win):\t\t\t\t', roc_auc_score(won_mask.astype(np.uint8), W)) 492 | 493 | V = estimated_CTRs * values 494 | P = estimated_CTRs * values * gammas_numpy 495 | estimated_utilities = W * (V - P) 496 | 497 | errors = estimated_utilities - utilities 498 | print('Estimated Utility\t Mean Error:\t\t\t', errors.mean()) 499 | print('Estimated Utility\t Mean Absolute Error:\t', np.abs(errors).mean()) 500 | 501 | # Augment data with samples: if you shade 100%, you will lose 502 | # If you won now, you would have also won if you bid higher 503 | X = np.hstack((estimated_CTRs.reshape(-1,1), values.reshape(-1,1), gammas_numpy.reshape(-1, 1))) 504 | 505 | X_aug_neg = X.copy() 506 | X_aug_neg[:, -1] = 0.0 507 | 508 | X_aug_pos = X[won_mask].copy() 509 | X_aug_pos[:, -1] = np.maximum(X_aug_pos[:, -1], 1.0) 510 | 511 | X = torch.Tensor(np.vstack((X, X_aug_neg))) 512 | 513 | y = won_mask.astype(np.uint8).reshape(-1,1) 514 | y = torch.Tensor(np.concatenate((y, np.zeros_like(y)))) 515 | 516 | # Fit the model 517 | self.winrate_model.train() 518 | epochs = 8192 * 4 519 | lr = 3e-3 520 | optimizer = torch.optim.Adam(self.winrate_model.parameters(), lr=lr, weight_decay=1e-6, amsgrad=True) 521 | scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=256, min_lr=1e-7, factor=0.2, verbose=True) 522 | criterion = torch.nn.BCELoss() 523 | losses = [] 524 | best_epoch, best_loss = -1, np.inf 525 | for epoch in tqdm(range(int(epochs)), desc=f'{name}'): 526 | optimizer.zero_grad() 527 | pred_y = self.winrate_model(X) 528 | loss = criterion(pred_y, y) 529 | loss.backward() 530 | optimizer.step() 531 | losses.append(loss.item()) 532 | scheduler.step(loss) 533 | if (best_loss - losses[-1]) > 1e-6: 534 | best_epoch = epoch 535 | best_loss = losses[-1] 536 | elif epoch - best_epoch > 1024: 537 | print(f'Stopping at Epoch {epoch}') 538 | break 539 | 540 | losses = np.array(losses) 541 | 542 | self.winrate_model.eval() 543 | 544 | # Predict Utility -- \hat{u} 545 | orig_features = torch.Tensor(np.hstack((estimated_CTRs.reshape(-1,1), values.reshape(-1,1), gammas_numpy.reshape(-1, 1)))) 546 | W = self.winrate_model(orig_features).squeeze().detach().numpy() 547 | print('AUC predicting P(win):\t\t\t\t', roc_auc_score(won_mask.astype(np.uint8), W)) 548 | 549 | V = estimated_CTRs * values 550 | P = estimated_CTRs * values * gammas_numpy 551 | estimated_utilities = W * (V - P) 552 | 553 | errors = estimated_utilities - utilities 554 | print('Estimated Utility\t Mean Error:\t\t\t', errors.mean()) 555 | print('Estimated Utility\t Mean Absolute Error:\t', np.abs(errors).mean()) 556 | 557 | ############################## 558 | # 2. TRAIN DOUBLY ROBUST POLICY # 559 | ############################## 560 | utilities = torch.Tensor(utilities) 561 | estimated_utilities = torch.Tensor(estimated_utilities) 562 | gammas = torch.Tensor(self.gammas) 563 | 564 | # Prepare features 565 | X = torch.Tensor(np.hstack((estimated_CTRs.reshape(-1,1), values.reshape(-1,1)))) 566 | 567 | if not self.model_initialised: 568 | self.bidding_policy.initialise_policy(X, gammas) 569 | 570 | # Ensure we don't have propensities that are rounded to zero 571 | propensities = torch.clip(torch.Tensor(self.propensities), min=1e-15) 572 | 573 | # Fit the model 574 | self.bidding_policy.train() 575 | epochs = 8192 * 4 576 | lr = 7e-3 577 | optimizer = torch.optim.Adam(self.bidding_policy.parameters(), lr=lr, weight_decay=1e-4, amsgrad=True) 578 | scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=100, min_lr=1e-8, factor=0.2, threshold=5e-3, verbose=True) 579 | 580 | losses = [] 581 | best_epoch, best_loss = -1, np.inf 582 | for epoch in tqdm(range(int(epochs)), desc=f'{name}'): 583 | optimizer.zero_grad() # Setting our stored gradients equal to zero 584 | loss = self.bidding_policy.loss(X, gammas, propensities, utilities, utility_estimates=estimated_utilities, winrate_model=self.winrate_model, importance_weight_clipping_eps=50.0) 585 | loss.backward() # Computes the gradient of the given tensor w.r.t. the weights/bias 586 | optimizer.step() # Updates weights and biases with the optimizer (SGD) 587 | losses.append(loss.item()) 588 | scheduler.step(loss) 589 | 590 | if (best_loss - losses[-1]) > 1e-6: 591 | best_epoch = epoch 592 | best_loss = losses[-1] 593 | elif epoch - best_epoch > 512: 594 | print(f'Stopping at Epoch {epoch}') 595 | break 596 | 597 | losses = np.array(losses) 598 | if np.isnan(losses).any(): 599 | print('NAN DETECTED! in losses') 600 | print(list(losses)) 601 | print(np.isnan(X.detach().numpy()).any(), X) 602 | print(np.isnan(gammas.detach().numpy()).any(), gammas) 603 | print(np.isnan(propensities.detach().numpy()).any(), propensities) 604 | print(np.isnan(utilities.detach().numpy()).any(), utilities) 605 | exit(1) 606 | 607 | self.bidding_policy.eval() 608 | 609 | pred_gammas, _ = self.bidding_policy(X) 610 | pred_gammas = pred_gammas.detach().numpy() 611 | print(name, 'Number of samples: ', X.shape) 612 | print(name, 'Predicted Gammas: ', pred_gammas.min(), pred_gammas.max(), pred_gammas.mean()) 613 | 614 | self.model_initialised = True 615 | self.bidding_policy.model_initialised = True 616 | 617 | def clear_logs(self, memory): 618 | if not memory: 619 | self.gammas = [] 620 | self.propensities = [] 621 | else: 622 | self.gammas = self.gammas[-memory:] 623 | self.propensities = self.propensities[-memory:] 624 | -------------------------------------------------------------------------------- /src/Getting Started with AuctionGym (2. Effects of Bid Shading).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "090120de", 6 | "metadata": {}, 7 | "source": [ 8 | "# Getting Started with AuctionGym\n", 9 | "## Effects of Bid Shading in First-Price Auctions" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "id": "f321a488", 15 | "metadata": {}, 16 | "source": [ 17 | "AuctionGym has several configuration options that detail the type of auction, how bidders behave, and how confounded the contextual information is that bidders observe. To avoid clutter, we define these in configuration files.\n", 18 | "For the purposes of this introductory notebook, we will ignore some aspects such as repeated runs.\n", 19 | "\n", 20 | "We parse an existing file, detailing a first-price auction with oracle bidders.\n", 21 | "We will look into the effects of bid shading on the auctioneer's revenue, social welfare and surplus.\n", 22 | "For this, we repeat simulated experiments with the same set of bidders, changing how their bidding strategy." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "id": "ca8aeae4", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import matplotlib.pyplot as plt\n", 33 | "import numpy as np\n", 34 | "from collections import defaultdict\n", 35 | "from main import parse_config, instantiate_agents, instantiate_auction\n", 36 | "from tqdm.notebook import tqdm" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "id": "af5fa209", 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# Parse configuration file\n", 47 | "rng, config, agent_configs, agents2items, agents2item_values,\\\n", 48 | "num_runs, max_slots, embedding_size, embedding_var,\\\n", 49 | "obs_embedding_size = parse_config('../config/FP_DM_Oracle.json')" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "id": "e105767c", 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "def run_repeated_auctions():\n", 60 | " # Placeholders for output\n", 61 | " auction_revenue = []\n", 62 | " social_welfare = []\n", 63 | " social_surplus = []\n", 64 | " \n", 65 | " # Instantiate Agent and Auction objects\n", 66 | " agents = instantiate_agents(rng, agent_configs, agents2item_values, agents2items)\n", 67 | "\n", 68 | " # Instantiate Auction object\n", 69 | " auction, num_iter, rounds_per_iter, output_dir =\\\n", 70 | " instantiate_auction(rng,\n", 71 | " config,\n", 72 | " agents2items,\n", 73 | " agents2item_values,\n", 74 | " agents,\n", 75 | " max_slots,\n", 76 | " embedding_size,\n", 77 | " embedding_var,\n", 78 | " obs_embedding_size)\n", 79 | "\n", 80 | " # Run repeated auctions\n", 81 | " # This logic is encoded in the `simulation_run()` method in main.py\n", 82 | " for i in tqdm(range(num_iter)):\n", 83 | "\n", 84 | " # Simulate impression opportunities\n", 85 | " for _ in range(rounds_per_iter):\n", 86 | " auction.simulate_opportunity()\n", 87 | "\n", 88 | " # Log 'Gross utility' or welfare\n", 89 | " social_welfare.append(sum([agent.gross_utility for agent in auction.agents]))\n", 90 | "\n", 91 | " # Log 'Net utility' or surplus\n", 92 | " social_surplus.append(sum([agent.net_utility for agent in auction.agents]))\n", 93 | " \n", 94 | " # Update agents (does nothing in this example, as we have truthful oracles)\n", 95 | " # Clear running metrics\n", 96 | " for agent_id, agent in enumerate(auction.agents):\n", 97 | " agent.update(iteration=i)\n", 98 | " agent.clear_utility()\n", 99 | " agent.clear_logs()\n", 100 | "\n", 101 | " # Log revenue\n", 102 | " auction_revenue.append(auction.revenue)\n", 103 | " auction.clear_revenue()\n", 104 | " \n", 105 | " # Rescale metrics per auction round\n", 106 | " auction_revenue = np.array(auction_revenue) / rounds_per_iter\n", 107 | " social_welfare = np.array(social_welfare) / rounds_per_iter\n", 108 | " social_surplus = np.array(social_surplus) / rounds_per_iter\n", 109 | " \n", 110 | " return auction_revenue, social_welfare, social_surplus" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "id": "624db1ca", 117 | "metadata": { 118 | "scrolled": true 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "shaded_revenue, shaded_welfare, shaded_surplus = run_repeated_auctions()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 5, 128 | "id": "df4d59a7", 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "application/vnd.jupyter.widget-view+json": { 134 | "model_id": "efb87d22675f4a4c915ddf12a85de156", 135 | "version_major": 2, 136 | "version_minor": 0 137 | }, 138 | "text/plain": [ 139 | " 0%| | 0/20 [00:00" 170 | ] 171 | }, 172 | "metadata": { 173 | "needs_background": "light" 174 | }, 175 | "output_type": "display_data" 176 | } 177 | ], 178 | "source": [ 179 | "fontsize=16\n", 180 | "fig, axes = plt.subplots(1, 3, sharey='row', figsize=(15,4))\n", 181 | "\n", 182 | "axes[0].plot(truthful_welfare, label=f'Truthful')\n", 183 | "axes[1].plot(truthful_surplus, label=f'Truthful')\n", 184 | "axes[2].plot(truthful_revenue, label=f'Truthful')\n", 185 | "\n", 186 | "axes[0].plot(shaded_welfare, label=f'Shaded')\n", 187 | "axes[1].plot(shaded_surplus, label=f'Shaded')\n", 188 | "axes[2].plot(shaded_revenue, label=f'Shaded')\n", 189 | "\n", 190 | "axes[1].set_title('Effects of Bid-Shading in First-Price Auctions with Oracle bidders', fontsize=fontsize+4)\n", 191 | "\n", 192 | "for i in range(3):\n", 193 | " axes[i].set_xlabel('Iterations', fontsize=fontsize)\n", 194 | " axes[i].set_xticks(list(range(0,len(truthful_revenue),2)))\n", 195 | " axes[i].grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)\n", 196 | " \n", 197 | "axes[0].set_ylabel('Social Welfare', fontsize=fontsize)\n", 198 | "axes[1].set_ylabel('Social Surplus', fontsize=fontsize)\n", 199 | "axes[2].set_ylabel('Auction Revenue', fontsize=fontsize)\n", 200 | "\n", 201 | "legend = axes[2].legend(loc='upper left',\n", 202 | " bbox_to_anchor=(1.0, 1.0),\n", 203 | " fontsize=fontsize)\n", 204 | "legend.set_title('Strategy', prop={'size': fontsize})\n", 205 | "fig.tight_layout()\n", 206 | "\n", 207 | "plt.show()" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "id": "faeff496", 213 | "metadata": {}, 214 | "source": [ 215 | "The leftmost plot shows social welfare (the sum of generated welfare over all participants in the auction).\n", 216 | "Whether participants shade their bids (or not) does not have affect welfare: indeed, the value that is generated by the ads that are shown remains unchanged.\n", 217 | "\n", 218 | "The middle plot shows social surplus.\n", 219 | "Bidding truthfully in a first-price auction gives an expected surplus of 0.\n", 220 | "As bidders start to shade their bids, we can see that bidders succesfully generate surplus.\n", 221 | "\n", 222 | "The rightmost plot shows revenue for the auctioneer.\n", 223 | "When bidders bid truthfully, the monetary value for all generated welfare goes to the auctioneer.\n", 224 | "As bidders start to shade their bids, we can see that revenue diminishes as surplus increases." 225 | ] 226 | } 227 | ], 228 | "metadata": { 229 | "kernelspec": { 230 | "display_name": "Python 3 (ipykernel)", 231 | "language": "python", 232 | "name": "python3" 233 | }, 234 | "language_info": { 235 | "codemirror_mode": { 236 | "name": "ipython", 237 | "version": 3 238 | }, 239 | "file_extension": ".py", 240 | "mimetype": "text/x-python", 241 | "name": "python", 242 | "nbconvert_exporter": "python", 243 | "pygments_lexer": "ipython3", 244 | "version": "3.9.7" 245 | } 246 | }, 247 | "nbformat": 4, 248 | "nbformat_minor": 5 249 | } 250 | -------------------------------------------------------------------------------- /src/Getting Started with AuctionGym (1. Effects of Competition).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "47a55e28", 6 | "metadata": {}, 7 | "source": [ 8 | "# Getting Started with AuctionGym\n", 9 | "## Effects of Competition in Second-Price Auctions" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "id": "c93512ce", 15 | "metadata": {}, 16 | "source": [ 17 | "AuctionGym has several configuration options that detail the type of auction, how bidders behave, and how confounded the contextual information is that bidders observe. To avoid clutter, we define these in configuration files.\n", 18 | "For the purposes of this introductory notebook, we will ignore some aspects such as repeated runs.\n", 19 | "\n", 20 | "We parse an existing file, detailing a second-price auction with truthful oracle bidders.\n", 21 | "We will look into the effects of increased competition on the auctioneer's revenue, social welfare and surplus.\n", 22 | "For this, we repeat simulated experiments with a single changed parameter over runs: 'num_participants_per_round'." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "id": "b84ac0a9", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import matplotlib.pyplot as plt\n", 33 | "import numpy as np\n", 34 | "from collections import defaultdict\n", 35 | "from main import parse_config, instantiate_agents, instantiate_auction\n", 36 | "from tqdm.notebook import tqdm" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "id": "a9b5f14c", 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# Parse configuration file\n", 47 | "rng, config, agent_configs, agents2items, agents2item_values,\\\n", 48 | "num_runs, max_slots, embedding_size, embedding_var,\\\n", 49 | "obs_embedding_size = parse_config('../config/SP_Oracle.json')" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "id": "293153c6", 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "def run_repeated_auctions(num_participants_per_round):\n", 60 | " # Placeholders for output\n", 61 | " auction_revenue = []\n", 62 | " social_welfare = []\n", 63 | " social_surplus = []\n", 64 | " \n", 65 | " # Set the environmental parameter\n", 66 | " config['num_participants_per_round'] = num_participants_per_round\n", 67 | " \n", 68 | " # Instantiate Agent and Auction objects\n", 69 | " agents = instantiate_agents(rng, agent_configs, agents2item_values, agents2items)\n", 70 | "\n", 71 | " # Instantiate Auction object\n", 72 | " auction, num_iter, rounds_per_iter, output_dir =\\\n", 73 | " instantiate_auction(rng,\n", 74 | " config,\n", 75 | " agents2items,\n", 76 | " agents2item_values,\n", 77 | " agents,\n", 78 | " max_slots,\n", 79 | " embedding_size,\n", 80 | " embedding_var,\n", 81 | " obs_embedding_size)\n", 82 | "\n", 83 | " # Run repeated auctions\n", 84 | " # This logic is encoded in the `simulation_run()` method in main.py\n", 85 | " for i in tqdm(range(num_iter)):\n", 86 | "\n", 87 | " # Simulate impression opportunities\n", 88 | " for _ in range(rounds_per_iter):\n", 89 | " auction.simulate_opportunity()\n", 90 | "\n", 91 | " # Log 'Gross utility' or welfare\n", 92 | " social_welfare.append(sum([agent.gross_utility for agent in auction.agents]))\n", 93 | "\n", 94 | " # Log 'Net utility' or surplus\n", 95 | " social_surplus.append(sum([agent.net_utility for agent in auction.agents]))\n", 96 | " \n", 97 | " # Update agents (does nothing in this example, as we have truthful oracles)\n", 98 | " # Clear running metrics\n", 99 | " for agent_id, agent in enumerate(auction.agents):\n", 100 | " agent.update(iteration=i)\n", 101 | " agent.clear_utility()\n", 102 | " agent.clear_logs()\n", 103 | "\n", 104 | " # Log revenue\n", 105 | " auction_revenue.append(auction.revenue)\n", 106 | " auction.clear_revenue()\n", 107 | " \n", 108 | " # Rescale metrics per auction round\n", 109 | " auction_revenue = np.array(auction_revenue) / rounds_per_iter\n", 110 | " social_welfare = np.array(social_welfare) / rounds_per_iter\n", 111 | " social_surplus = np.array(social_surplus) / rounds_per_iter\n", 112 | " \n", 113 | " return auction_revenue, social_welfare, social_surplus" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 4, 119 | "id": "73518a49", 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "application/vnd.jupyter.widget-view+json": { 125 | "model_id": "c9642cc69d684641a2bef8dc5185a586", 126 | "version_major": 2, 127 | "version_minor": 0 128 | }, 129 | "text/plain": [ 130 | " 0%| | 0/20 [00:00" 211 | ] 212 | }, 213 | "metadata": { 214 | "needs_background": "light" 215 | }, 216 | "output_type": "display_data" 217 | } 218 | ], 219 | "source": [ 220 | "fontsize=16\n", 221 | "fig, axes = plt.subplots(1, 3, sharey='row', figsize=(15,4))\n", 222 | "\n", 223 | "for num_participants_per_round, (revenue, welfare, surplus) in num_participants_2_metrics.items(): \n", 224 | " axes[0].plot(welfare, label=f'{num_participants_per_round}')\n", 225 | " axes[1].plot(surplus, label=f'{num_participants_per_round}')\n", 226 | " axes[2].plot(revenue, label=f'{num_participants_per_round}')\n", 227 | "\n", 228 | "\n", 229 | "axes[1].set_title('Effects of Competition in Second-Price Auctions with Oracle bidders', fontsize=fontsize+4)\n", 230 | "\n", 231 | "for i in range(3):\n", 232 | " axes[i].set_xlabel('Iterations', fontsize=fontsize)\n", 233 | " axes[i].set_xticks(list(range(0,len(revenue),2)))\n", 234 | " axes[i].grid(True, 'major', 'y', ls='--', lw=.5, c='k', alpha=.3)\n", 235 | "\n", 236 | "axes[0].set_ylim(0.0, None)\n", 237 | "\n", 238 | " \n", 239 | "axes[0].set_ylabel('Social Welfare', fontsize=fontsize)\n", 240 | "axes[1].set_ylabel('Social Surplus', fontsize=fontsize)\n", 241 | "axes[2].set_ylabel('Auction Revenue', fontsize=fontsize)\n", 242 | "\n", 243 | "handles, labels = axes[0].get_legend_handles_labels()\n", 244 | "legend = axes[2].legend(reversed(handles),\n", 245 | " reversed(labels),\n", 246 | " loc='upper left',\n", 247 | " bbox_to_anchor=(1.0, 1.0),\n", 248 | " fontsize=fontsize)\n", 249 | "legend.set_title('# Bidders', prop={'size': fontsize})\n", 250 | "fig.tight_layout()\n", 251 | "\n", 252 | "plt.show()" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "id": "4b555591", 258 | "metadata": {}, 259 | "source": [ 260 | "The leftmost plot shows social welfare (the sum of generated welfare over all participants in the auction).\n", 261 | "As we increase the number of participants in every auction round, the probability that the advertiser with the highest value ad is participating increases, and so does expected welfare.\n", 262 | "\n", 263 | "The middle plot shows social surplus.\n", 264 | "As the number of participants in the auction increases, there will be stronger competition, which drives up the second price. As a result, the surplus that bidders attain decreases.\n", 265 | "\n", 266 | "The rightmost plot shows revenue for the auctioneer.\n", 267 | "Because of the combined effect of (1) increasing welfare and (2) decreasing surplus, we see significant increases in auction revenue as the number of participants per auction round increases." 268 | ] 269 | } 270 | ], 271 | "metadata": { 272 | "kernelspec": { 273 | "display_name": "Python 3 (ipykernel)", 274 | "language": "python", 275 | "name": "python3" 276 | }, 277 | "language_info": { 278 | "codemirror_mode": { 279 | "name": "ipython", 280 | "version": 3 281 | }, 282 | "file_extension": ".py", 283 | "mimetype": "text/x-python", 284 | "name": "python", 285 | "nbconvert_exporter": "python", 286 | "pygments_lexer": "ipython3", 287 | "version": "3.9.7" 288 | } 289 | }, 290 | "nbformat": 4, 291 | "nbformat_minor": 5 292 | } 293 | --------------------------------------------------------------------------------