├── .flake8
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── README_FAIR.md
├── exp
    ├── rand_search.sql
    ├── rand_search2.sql
    └── rand_search3.sql
├── requirements.txt
├── requirements_pre_commit.txt
├── rlcompopt
    ├── cl
    │   ├── __init__.py
    │   ├── conf
    │   │   ├── generate.yaml
    │   │   ├── model
    │   │   │   ├── attn.yaml
    │   │   │   ├── attn_bc.yaml
    │   │   │   ├── attn_q_value.yaml
    │   │   │   ├── autophase.yaml
    │   │   │   ├── autophase_bc.yaml
    │   │   │   ├── autophase_q_value.yaml
    │   │   │   ├── gcn.yaml
    │   │   │   ├── gcn_real.yaml
    │   │   │   └── gnn_type2.yaml
    │   │   ├── rl_online
    │   │   │   ├── generate_autophase.yaml
    │   │   │   ├── generate_online.yaml
    │   │   │   ├── train_attn.yaml
    │   │   │   ├── train_autophase.yaml
    │   │   │   ├── train_gcn.yaml
    │   │   │   └── train_gnn_type2.yaml
    │   │   └── train.yaml
    │   ├── data_socket.py
    │   ├── database_socket.sql
    │   ├── dataset.py
    │   ├── dataset_statistics.py
    │   ├── faster_balanced_sampler.py
    │   ├── faster_balanced_sampler_stream.py
    │   ├── generate.py
    │   ├── generate_utils.py
    │   ├── generate_utils_online.py
    │   ├── merge.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── conv.py
    │   │   ├── edge_attn.py
    │   │   ├── gnn_pyg.py
    │   │   ├── graph_encoders.py
    │   │   ├── math_utils.py
    │   │   └── model_utils.py
    │   └── repr_queue.py
    ├── env_wrapper
    │   ├── __init__.py
    │   ├── database_model.sql
    │   ├── database_schema.sql
    │   ├── database_schema2.sql
    │   ├── database_schema4.sql
    │   ├── merge_db.py
    │   ├── parsing_utils.py
    │   ├── pyg_utils.py
    │   ├── wrapper_offline.py
    │   └── wrapper_online.py
    ├── eval_local.py
    ├── misc
    │   └── convert_smaller_coreset.py
    ├── model_testing.py
    ├── pipeline
    │   ├── action_seq_50.txt
    │   └── lib
    │   │   ├── coreset_sorted.txt
    │   │   └── types.py
    ├── train.py
    └── utils.py
├── scripts
    ├── generate_autophase_history_online.sh
    ├── generate_graph_reward_history_online.sh
    ├── rl_ppo_generator_submitit.sh
    ├── rl_ppo_trainer_submitit.sh
    ├── submit_online_train_ppo_action_histogram.py
    ├── submit_ppo_autophase_action_histogram.py
    ├── test.sh
    ├── train_autophase_bc.sh
    ├── train_autophase_history_online.sh
    ├── train_autophase_offline_q_value_rank.sh
    ├── train_autophase_offline_seq_cls.sh
    ├── train_graph_edgeattn_bc.sh
    ├── train_graph_edgeattn_nvp.sh
    ├── train_graph_gcn_nvp.sh
    ├── train_graph_gcn_real_nvp.sh
    ├── train_graph_gnn_type2_nvp.sh
    ├── train_graph_offline_edgeattn_q_value_rank.sh
    └── train_graph_reward_history_online.sh
└── setup.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = D100, D101, D102, D103, D104, E203, E402, E501, W503
3 | docstring-convention = numpy
4 | max-line-length = 88
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | opentuner.log
 2 | opentuner.db
 3 | __pycache__
 4 | platform.info
 5 | .benchmarks
 6 | .DS_Store
 7 | .env
 8 | *.pyc
 9 | .ipynb_checkpoints
10 | .pytest_cache
11 | outputs
12 | multirun
13 | testing
14 | scripts_old
15 | /.act
16 | /.clwb
17 | /.vscode
18 | /*.egg-info
19 | /build
20 | /coverage.xml
21 | /dist
22 | /node_modules
23 | /package-lock.json
24 | .idea
25 | data
26 | outputs_rl
27 | log_dir
28 | *.log
29 | *.db
30 | *.pkl
31 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | use_parentheses=True
3 | include_trailing_comma=True
4 | multi_line_output=3
5 | ensure_newline_before_comments=True
6 | line_length=88
7 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | 
 3 | # remove unused python imports
 4 | -   repo: https://github.com/myint/autoflake.git
 5 |     rev: v1.4
 6 |     hooks:
 7 |     -   id: autoflake
 8 |         args: ["--in-place", "--remove-all-unused-imports", "--ignore-init-module-imports"]
 9 | 
10 | # sort imports
11 | -   repo: https://github.com/timothycrosley/isort
12 |     rev: 5.10.1
13 |     hooks:
14 |     -   id: isort
15 | 
16 | # code format according to black
17 | -   repo: https://github.com/ambv/black
18 |     rev: 22.3.0
19 |     hooks:
20 |     -   id: black
21 | 
22 | # # check for python styling with flake8
23 | # -   repo: https://gitlab.com/pycqa/flake8
24 | #     rev: 3.9.2
25 | #     hooks:
26 | #     -   id: flake8
27 | #         additional_dependencies: [
28 | #           'flake8-docstrings',
29 | #           'flake8-bugbear',
30 | #           'flake8-comprehensions',
31 | #           'flake8-simplify',
32 | #         ]
33 | 
34 | # cleanup notebooks
35 | -   repo: https://github.com/kynan/nbstripout
36 |     rev: 0.5.0
37 |     hooks:
38 |     -   id: nbstripout
39 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | This Code of Conduct also applies outside the project spaces when there is a
56 | reasonable belief that an individual's behavior may have a negative impact on
57 | the project or its community.
58 | 
59 | ## Enforcement
60 | 
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported by contacting the project team at <opensource-conduct@fb.com>. All
63 | complaints will be reviewed and investigated and will result in a response that
64 | is deemed necessary and appropriate to the circumstances. The project team is
65 | obligated to maintain confidentiality with regard to the reporter of an incident.
66 | Further details of specific enforcement policies may be posted separately.
67 | 
68 | Project maintainers who do not follow or enforce the Code of Conduct in good
69 | faith may face temporary or permanent repercussions as determined by other
70 | members of the project's leadership.
71 | 
72 | ## Attribution
73 | 
74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
76 | 
77 | [homepage]: https://www.contributor-covenant.org
78 | 
79 | For answers to common questions about this code of conduct, see
80 | https://www.contributor-covenant.org/faq
81 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to RLCompOpt
 2 | 
 3 | We want to make contributing to this project as easy and transparent as
 4 | possible.
 5 | 
 6 | ## Pull Requests
 7 | 
 8 | We actively welcome your pull requests.
 9 | 
10 | 1. Fork the repo and create your branch from `main`.
11 | 2. If you've added code that should be tested, add tests.
12 | 3. If you've changed APIs, update the documentation.
13 | 4. Ensure the test suite passes.
14 | 5. Make sure your code lints.
15 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
16 | 
17 | ## Contributor License Agreement ("CLA")
18 | In order to accept your pull request, we need you to submit a CLA. You only need
19 | to do this once to work on any of Facebook's open source projects.
20 | 
21 | Complete your CLA here: <https://code.facebook.com/cla>
22 | 
23 | ## Issues
24 | We use GitHub issues to track public bugs. Please ensure your description is
25 | clear and has sufficient instructions to be able to reproduce the issue.
26 | 
27 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
28 | disclosure of security bugs. In those cases, please go through the process
29 | outlined on that page and do not file a public issue.
30 | 
31 | ## License
32 | 
33 | By contributing to RLCompOpt, you agree that your contributions will be licensed
34 | under the LICENSE file in the root directory of this source tree.
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | define HELP
 2 | CompilerGym experiments. Available targets:
 3 | 
 4 | 	make init
 5 | 		Install the build and runtime python dependencies. This should be run
 6 | 		once before any other targets.
 7 | 
 8 | 	make install
 9 | 		Install the package itself.
10 | endef
11 | export HELP
12 | 
13 | # Configurable paths to binaries.
14 | PYTHON ?= python3
15 | 
16 | .DEFAULT_GOAL := help
17 | 
18 | help:
19 | 	@echo "$$HELP"
20 | 
21 | init:
22 | 	$(PYTHON) -m pip install -r requirements.txt
23 | 	pre-commit install
24 | 
25 | install:
26 | 	$(PYTHON) setup.py install
27 | 	pre-commit install
28 | 
29 | .PHONY: init install
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | This repo contains experiments to learn to optimize program compilation using RL.
  3 | 
  4 | For people in FAIR (Meta AI), check [README_FAIR.md](README_FAIR.md) to get started.
  5 | 
  6 | ## System requirements
  7 | The codebase was tested on Ubuntu 18.04. To install some possible missing libraries on Ubuntu 18.04, we need to run `sudo apt-get install libtinfo-dev` and `sudo apt-get install m4`. 
  8 | 
  9 | 
 10 | ## Installing compilers
 11 | 
 12 | We use `~/.local/opt` as the installation directory of compilers.
 13 | 
 14 | ```sh
 15 | # Download and unpack a modern clang release.
 16 | mkdir -p ~/.local/opt && cd ~/.local/opt
 17 | wget https://github.com/llvm/llvm-project/releases/download/llvmorg-10.0.0/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz
 18 | tar xf clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz
 19 | ```
 20 | 
 21 | We then need to set some environment variables whenever we build or use
 22 | CompilerGym. The easiest way to do that is to add them to your `~/.bashrc`:
 23 | 
 24 | ```sh
 25 | cat <<EOF >>~/.bashrc
 26 | # === Building CompilerGym ===
 27 | 
 28 | # Set clang as the compiler of choice.
 29 | export CC=$HOME/.local/opt/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04/bin/clang
 30 | export CXX=$HOME/.local/opt/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04/bin/clang++
 31 | export PATH=$HOME/.local/opt/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04/bin:$PATH
 32 | export BAZEL_BUILD_OPTS=--repo_env=CC=$HOME/.local/opt/clang+llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04/bin/clang
 33 | EOF
 34 | ```
 35 | 
 36 | So the environment variables are set every time you logs in, or you can run `source ~/.bashrc` in the current shell to set the environment variables. Run `echo $CC` to verify the environment variables are set. It should output a path of the clang compiler.
 37 | 
 38 | ## Environment setup
 39 | 
 40 | Follow these steps to set up a development environment on Ubuntu 18.04 (or any other Linux
 41 | / macOS machine, with some tweaks). 
 42 | 
 43 | 1. **Setup conda environment:**
 44 | 
 45 | ```sh
 46 | conda create -n rlcompopt python=3.8 cmake pandoc patchelf
 47 | conda activate rlcompopt
 48 | ```
 49 | 
 50 | 2. **Install bazel:** Bazel is used to compile the C++/python package. Here we
 51 |    will use bazelisk to manage our bazel installation and download it to
 52 |    `~/.local/bin`:
 53 | 
 54 | ```sh
 55 | mkdir -p ~/.local/bin
 56 | wget https://github.com/bazelbuild/bazelisk/releases/download/v1.7.5/bazelisk-linux-amd64 -O bazel
 57 | chmod +x bazel && mkdir -p ~/.local/bin && mv -v bazel ~/.local/bin
 58 | export PATH=~/.local/bin:$PATH
 59 | ```
 60 | 
 61 | 3. **Install PyTorch:** The codebase requires 2.0 > PyTorch >= 1.12.1. We can install it following [here](https://pytorch.org/get-started/previous-versions). We recommend using conda to install PyTorch to avoid possible dependencies conflict. You need to find the correct command according to the CUDA version your GPU driver supports (check `nvidia-smi`). For example, I found my GPU driver supported CUDA 11.6, so I run `conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.6 -c pytorch -c nvidia` to install pytorch 1.13.1. After the installation, verify PyTorch is usable on GPU by running `python -c "import torch; print(torch.matmul(torch.rand(2, 8).to(0), torch.rand(8, 4).to(0)).shape)"`. If it outputs `torch.Size([2, 4])` then we can go to next step, otherwise try to fix the issues by reinstall PyTorch.
 62 | 
 63 | 
 64 | 4. **Install `torch-geometric`, `pyzmq`, and logging tools:** 
 65 | We recommend using conda to install `torch-geometric` and `pyzmq` to avoid possible dependencies conflict. 
 66 | ```sh
 67 | conda install -c pyg pyg=2.1.0
 68 | conda install -c anaconda pyzmq=23.2.0
 69 | conda install -c dglteam dgl=1.1.0
 70 | cd ..
 71 | git clone https://github.com/yuandong-tian/tools2.git
 72 | cd tools2
 73 | python -m pip install .
 74 | ```
 75 | 
 76 | 5. **Clone CompilerGym and this repo:** We will check out both this repo and
 77 |    CompilerGym and install all development dependencies by running the following commands. Note that we clone the specific folk of CompilerGym that includes the type graph patch. We change to a desired directory to clone the repo: `cd /path/of/your/choice`.
 78 | 
 79 | 
 80 | ```sh
 81 | cd ..
 82 | git clone --depth 1 --branch rlcompopt https://github.com/youweiliang/CompilerGym.git
 83 | cd CompilerGym
 84 | make init
 85 | 
 86 | cd ..
 87 | git clone https://github.com/facebookresearch/RLCompOpt.git
 88 | cd RLCompOpt
 89 | make init
 90 | ```
 91 | 
 92 | 6. **Build and install CompilerGym from source.** 
 93 | 
 94 | ```sh
 95 | cd ../CompilerGym
 96 | make install
 97 | ```
 98 | If you encounter an error related to installing the library [gym](https://github.com/openai/gym), try to run `pip install setuptools==65.5.0` and then run `make install` again (see this [issue](https://github.com/openai/gym/issues/3176)).
 99 | 
100 | If you want to modify the CompilerGym codebase, you need to make your desired changes and then re-run `make install`.
101 | 
102 | 
103 | 7. **Install this repo:**
104 | 
105 | ```sh
106 | cd ../RLCompOpt
107 | make install
108 | ```
109 | **If you modify this repo, you will need to reinstall it to make any changes to take effect.**
110 | 
111 | 8. **Use RAM rather than NFS for faster environments:** CompilerGym
112 |    does quite a lot of disk operations which can be slow on the cluster NFS.
113 |    Force CompilerGym to instead keep everything in memory using:
114 | 
115 | ```sh
116 | export COMPILER_GYM_SITE_DATA=/dev/shm/compiler_gym_site_data
117 | ```
118 | 
119 | (Optional) You can even put the entire bazel build tree in memory if you want to speed up
120 | build times. If you want to do this:
121 | 
122 | ```sh
123 | mv ~/.cache ~/.old-cache
124 | mkdir "/dev/shm/${USER}_cache"
125 | ln -s "/dev/shm/${USER}_cache" ~/.cache
126 | ```
127 | You may need to change it back `mv ~/.old-cache ~/.cache` afterward.
128 | 
129 | 9. (Optional) **Automate the environment setup:** Create a script to set up
130 |    these environment variables so that you don't have to redo it next time you
131 |    spawn a shell:
132 | 
133 | ```sh
134 | cat <<EOF > ~/.rlcompopt_env
135 | conda activate rlcompopt
136 | export PATH=$HOME/.local/bin:$PATH
137 | export COMPILER_GYM_SITE_DATA=/dev/shm/compiler_gym_site_data
138 | 
139 | EOF
140 | ```
141 | 
142 | Now you can do `source ~/.rlcompopt_env` to restore the environment.
143 | 
144 | 
145 | ## Preparing data files
146 | The data files can be downloaded from this [Google Drive](https://drive.google.com/drive/folders/1lATNWBKmsubw8bGeFyDlBHXlYbcRrw7S?usp=sharing). You can install gdown to download it:
147 | ```
148 | conda install -c conda-forge gdown
149 | gdown --folder https://drive.google.com/drive/folders/1lATNWBKmsubw8bGeFyDlBHXlYbcRrw7S?usp=sharing
150 | ```
151 | The commands should save the files under a folder named data. 
152 | 
153 | Or you can download it from the website and place the data folder under the repo, which results in the following file structure.
154 | ```
155 | data
156 | ├── all_ssl_vocab.db
157 | ...
158 | ```
159 | 
160 | ## Training
161 | ### Training of Normalized Value Prediction (NVP), Behavior Cloning (BC), and Q value
162 | Run the scripts under the `scripts` folder to start training models of NVP, BC or Q value. The model checkpoints, training log, and configurations will be saved under `./outputs`. The configurations are saved in a file named `args.pkl` and can be used for testing later.
163 | 
164 | 
165 | ### Testing of Normalized Value Prediction (NVP), Behavior Cloning (BC), and Q value
166 | First we create a directory for gathering testing results: `mkdir cg_paper_exp`. 
167 | Set the number of CPUs/GPUs to use for testing by setting environment variables `NUM_CPU` and `NUM_GPU`. For example, if you want to use 10 CPUs and 1 GPU, you can run `export NUM_CPU=10; export NUM_GPU=1`. 
168 | Run `python rlcompopt/eval_local.py --args_path /path/to/output/args.pkl` to obtain model performance on the validation set and test set. 
169 | There is a [script](scripts/test.sh) for testing all models in the outputs folder. You can modify it and run it `bash scripts/test.sh`.
170 | 
171 | 
172 | ### Training and testing RL-PPO agents
173 | Run `bash scripts/generate_graph_reward_history_online.sh` to start a group of processes (generators) that do the exploration and send trajectories data to the model for training. 
174 | 
175 | And at the same time, in another shell, run `bash scripts/train_graph_reward_history_online.sh` to start the trainer of RL-PPO, which receives trajectories data from the generators.
176 | 
177 | Alternatively, you can run `python scripts/submit_online_train_ppo_action_histogram.py` and `python scripts/submit_ppo_autophase_action_histogram.py` to run all the RL-PPO experiments. You should check the files and provide necessary arguments to the two scripts.
178 | 
179 | ## Contributing
180 | See the [CONTRIBUTING](CONTRIBUTING.md) file for how to help out.
181 | 
182 | ## License
183 | RLCompOpt is MIT licensed, as found in the [LICENSE](LICENSE) file.
184 | 
185 | ## Citing RLCompOpt
186 | ```BibTeX
187 | @InProceedings{liang2023rlcompopt,
188 |   title={Learning Compiler Pass Orders using Coreset and Normalized Value Prediction},
189 |   author={Liang, Youwei and Stone, Kevin and Shameli, Ali and Cummins, Chris and Elhoushi, Mostafa and Guo, Jiadong and Steiner, Benoit and Yang, Xiaomeng and Xie, Pengtao and Leather, Hugh and Tian, Yuandong},
190 |   year={2023},
191 |   booktitle={Proceedings of the 40th International Conference on Machine Learning}
192 | }
193 | ```


--------------------------------------------------------------------------------
/README_FAIR.md:
--------------------------------------------------------------------------------
  1 | 
  2 | This instruction is for people in FAIR (Meta AI) to run experiments.
  3 | 
  4 | 
  5 | ## Installation on devfair
  6 | 
  7 | Run `echo $CC` to verify the environment variables related to compilers are set. It should output a path of the clang compiler.
  8 | 
  9 | Follow these steps to set up a development environment on devfair:
 10 | 
 11 | 1. **Setup conda environment:**
 12 | 
 13 | ```sh
 14 | conda create -n rlcompopt python=3.8 cmake pandoc patchelf
 15 | conda activate rlcompopt
 16 | ```
 17 | 
 18 | 2. **Install bazel:** Bazel is used to compile the C++/python package. Here we
 19 |    will use bazelisk to manage our bazel installation and download it to
 20 |    `~/.local/bin`:
 21 | 
 22 | ```sh
 23 | mkdir -p ~/.local/bin
 24 | wget https://github.com/bazelbuild/bazelisk/releases/download/v1.7.5/bazelisk-linux-amd64 -O bazel
 25 | chmod +x bazel && mkdir -p ~/.local/bin && mv -v bazel ~/.local/bin
 26 | export PATH=~/.local/bin:$PATH
 27 | ```
 28 | 
 29 | 3. **Install PyTorch:** The codebase requires 2.0 > PyTorch >= 1.12.1. We can install it following [here](https://pytorch.org/get-started/previous-versions). We recommend using conda to install PyTorch to avoid possible dependencies conflict. You need to find the correct command according to the CUDA version your GPU driver supports (check `nvidia-smi`). For example, I found my GPU driver supported CUDA 11.6, so I run `conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cuda=11.6 -c pytorch -c nvidia` to install pytorch 1.13.1. After the installation, verify PyTorch is usable on GPU by running `python -c "import torch; print(torch.matmul(torch.rand(2, 8).to(0), torch.rand(8, 4).to(0)).shape)"`. If it outputs `torch.Size([2, 4])` then we can go to next step, otherwise try to fix the issues by reinstall PyTorch.
 30 | 
 31 | 
 32 | 4. **Install `torch-geometric`, `pyzmq`, and logging tools:** 
 33 | We recommend using conda to install `torch-geometric` and `pyzmq` to avoid possible dependencies conflict. 
 34 | ```sh
 35 | conda install -c pyg pyg=2.1.0
 36 | conda install -c anaconda pyzmq=23.2.0
 37 | conda install -c dglteam dgl=1.1.0
 38 | cd ..
 39 | git clone https://github.com/yuandong-tian/tools2.git
 40 | cd tools2
 41 | python -m pip install .
 42 | ```
 43 | 
 44 | 5. **Clone CompilerGym and this repo:** We will check out both this repo and
 45 |    CompilerGym and install all development dependencies by running the following commands. Note that we clone the specific folk of CompilerGym that includes the type graph patch. We change to a desired directory to clone the repo: `cd /path/of/your/choice`.
 46 | 
 47 | 
 48 | ```sh
 49 | cd ..
 50 | git clone --depth 1 --branch rlcompopt https://github.com/youweiliang/CompilerGym.git
 51 | cd CompilerGym
 52 | make init
 53 | 
 54 | cd ..
 55 | git clone https://github.com/facebookresearch/RLCompOpt.git
 56 | cd RLCompOpt
 57 | make init
 58 | ```
 59 | 
 60 | 6. **Build and install CompilerGym from source.** 
 61 | 
 62 | Run `pip install setuptools==65.5.0` so that the library [gym](https://github.com/openai/gym) can be installed properly.
 63 | 
 64 | ```sh
 65 | cd ../CompilerGym
 66 | make install
 67 | ```
 68 | 
 69 | If you want to modify the CompilerGym codebase, you need to make your desired changes and then re-run `make install`.
 70 | 
 71 | 
 72 | 7. **Install this repo:**
 73 | 
 74 | ```sh
 75 | cd ../RLCompOpt
 76 | make install
 77 | ```
 78 | **If you modify this repo, you will need to reinstall it to make any changes to take effect.**
 79 | 
 80 | 8. **Use RAM rather than NFS for faster environments:** CompilerGym
 81 |    does quite a lot of disk operations which can be slow on the cluster NFS.
 82 |    Force CompilerGym to instead keep everything in memory using:
 83 | 
 84 | ```sh
 85 | export COMPILER_GYM_SITE_DATA=/dev/shm/compiler_gym_site_data
 86 | ```
 87 | 
 88 | 
 89 | 9. (Optional) **Automate the environment setup:** Create a script to set up
 90 |    these environment variables so that you don't have to redo it next time you
 91 |    spawn a shell:
 92 | 
 93 | ```sh
 94 | cat <<EOF > ~/.rlcompopt_env
 95 | conda activate rlcompopt
 96 | export PATH=$HOME/.local/bin:$PATH
 97 | export COMPILER_GYM_SITE_DATA=/dev/shm/compiler_gym_site_data
 98 | 
 99 | EOF
100 | ```
101 | 
102 | Now you can do `source ~/.rlcompopt_env` to restore the environment.
103 | 
104 | 
105 | ## Preparing data files
106 | The data files can be downloaded from this [Google Drive](https://drive.google.com/drive/folders/1lATNWBKmsubw8bGeFyDlBHXlYbcRrw7S?usp=sharing). You can install gdown to download it:
107 | ```
108 | conda install -c conda-forge gdown
109 | gdown --folder https://drive.google.com/drive/folders/1lATNWBKmsubw8bGeFyDlBHXlYbcRrw7S?usp=sharing
110 | ```
111 | The commands should save the files under a folder named data. 
112 | 
113 | Or you can download it from the website and place the data folder under the repo, which results in the following file structure.
114 | ```
115 | data
116 | ├── all_ssl_vocab.db
117 | ...
118 | ```
119 | 
120 | ## Training
121 | 
122 | **Note that you may need to load CUDA/CUDNN modules**.
123 | 
124 | ### Training and testing of Normalized Value Prediction (NVP), Behavior Cloning (BC), and Q value
125 | Same as the instructions in README. If you submit the training jobs to Slurm, the testing script will be automatically submitted to Slurm once the training is done. So you don't need to start testing manually.
126 | 
127 | 
128 | ### Training and testing RL-PPO agents
129 | Same as the instructions in README.
130 | 
131 | ## Contributing
132 | See the [CONTRIBUTING](CONTRIBUTING.md) file for how to help out.
133 | 
134 | ## License
135 | RLCompOpt is MIT licensed, as found in the [LICENSE](LICENSE) file.
136 | 
137 | ## Citing RLCompOpt
138 | ```BibTeX
139 | @InProceedings{liang2023rlcompopt,
140 |   title={Learning Compiler Pass Orders using Coreset and Normalized Value Prediction},
141 |   author={Liang, Youwei and Stone, Kevin and Shameli, Ali and Cummins, Chris and Elhoushi, Mostafa and Guo, Jiadong and Steiner, Benoit and Yang, Xiaomeng and Xie, Pengtao and Leather, Hugh and Tian, Yuandong},
142 |   year={2023},
143 |   booktitle={Proceedings of the 40th International Conference on Machine Learning}
144 | }
145 | ```


--------------------------------------------------------------------------------
/exp/rand_search.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CREATE TABLE IF NOT EXISTS Metrics (
 8 |     benchmark_uri TEXT NOT NULL,         -- The URI of the benchmark.
 9 |     score REAL,  -- (Oz - min_ir) / Oz
10 |     diff REAL,  -- Oz - min_ir
11 |     oz REAL, -- Oz
12 |     o0 REAL, -- O0
13 |     best_ir_seq TEXT, -- the ir count seq corresponding to the min_ir; space separated
14 |     best_action_seq TEXT -- the action seq corresponding to the min_ir; space separated
15 | );
16 | 


--------------------------------------------------------------------------------
/exp/rand_search2.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CREATE TABLE IF NOT EXISTS Val (
 8 |     benchmark_uri TEXT NOT NULL,         -- The URI of the benchmark.
 9 |     oz REAL, -- Oz
10 |     o0 REAL, -- O0
11 |     val_seq BLOB  -- the cumulative rewards of each seq; python list
12 | );
13 | 


--------------------------------------------------------------------------------
/exp/rand_search3.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CREATE TABLE IF NOT EXISTS SearchScores (
 8 |     benchmark_uri TEXT NOT NULL,         -- The URI of the benchmark.
 9 |     score1 REAL,  -- (Oz - min_ir) / Oz
10 |     score2 REAL,  -- (Oz - min_ir) / Oz
11 |     score3 REAL,  -- (Oz - min_ir) / Oz
12 |     score4 REAL,  -- (Oz - min_ir) / Oz
13 |     score5 REAL  -- (Oz - min_ir) / Oz
14 | );
15 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | -r requirements_pre_commit.txt
 2 | hydra-core==1.3.2
 3 | matplotlib>=3.3.3
 4 | nevergrad==0.4.3.post3
 5 | omegaconf==2.3.0
 6 | opentuner==0.8.5
 7 | pandas>=1.2.0
 8 | psutil>=5.8.0
 9 | py-cpuinfo>=7.0.0
10 | submitit~=1.2.1
11 | typer[all]==0.3.2
12 | depq==1.5.5
13 | tensorboardX==2.5.1
14 | GPUtil==1.4.0


--------------------------------------------------------------------------------
/requirements_pre_commit.txt:
--------------------------------------------------------------------------------
1 | black==19.10b0
2 | isort==4.3.21
3 | pre-commit==2.13.0
4 | 


--------------------------------------------------------------------------------
/rlcompopt/cl/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
3 | 
4 | # This source code is licensed under the MIT license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | 


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/generate.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   run:
  4 |     dir: ./outputs/generate/auto_${now:%Y_%m_%d_%H_%M_%S}
  5 | 
  6 |   sweep:
  7 |     dir: ./outputs/generate/
  8 |     subdir: auto_${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.num}
  9 | 
 10 |   output_subdir: "generate_config"
 11 | 
 12 | dataset_name: null # cbench,mibench
 13 | benchmark_exclude: null # comma separated keywords for exclusion, e.g., ghostscript,sort
 14 | benchmark_repeat: 80
 15 | seed: 0
 16 | get_stat: null # only query benchmarks statistics
 17 | load_benchmarks_from_json: null
 18 | json_key: null  # json key of the data split, e.g., 'test-small'
 19 | num_benchmarks: null  # if set, will try to use first `num_benchmarks` benchmarks
 20 | reset_best_return_on_every_episode: False  # if set true and benchmark_repeat > 1 and using the offline file (generate_utils.py), best return will be overwritten
 21 | online_update_vocab: False  # if True, update the common vocab on the fly if unknown token is encountered
 22 | graph_version: 0  # 0 for old graph type, 1 for new graph type
 23 | 
 24 | # Output db control
 25 | # if null, then we won't save db
 26 | outdir: null
 27 | 
 28 | vocab_db_path: null
 29 | 
 30 | # Run time control
 31 | patience: 30  # for offline data generation
 32 | runtime_per_job: 600
 33 | max_step_per_job: 10000
 34 | nproc: null # if null, use cpu_count()
 35 | max_episodes: null
 36 | 
 37 | # Models
 38 | model_path: null
 39 | gpu: 0 # null = cpu, or use a list (e.g. [0,1]) to specify multiple GPUs
 40 | eps: 0 # episilon greedy
 41 | T: 0  # Temperature used for sampling. If T = 0, then we do argmax
 42 | best_n: 1  # best n in A*
 43 | use_Astar: False  # use this option (instead of setting best_n) to control whether to use A*
 44 | use_AQ: False  # use the AQ* as in https://arxiv.org/pdf/2102.04518.pdf
 45 | use_policy: False
 46 | 
 47 | # how long to wait before flushing buffer into database
 48 | commit_frequency_in_seconds: 300
 49 | max_state_buffer_length: 1000
 50 | 
 51 | # use submitit to send the tasks to other nodes instead of running locally
 52 | submitit:
 53 |   log_dir: null
 54 |   partition: learnlab
 55 |   timeout_min: 300
 56 |   jobs_per_task: 80
 57 |   cpus_per_task: 80
 58 |   mem_gb: 500
 59 |   gpus_per_node: 8
 60 |   constraint: volta32gb
 61 | 
 62 | traj_data: null  # read feather file and follow the trajectories therein, to replace offline random exploration
 63 | 
 64 | divided_by_this_ir: False  # for A* / AQ*, set this flag properly to get the correct estimation of future reward
 65 | 
 66 | # all parameters below are basically for online learning
 67 | generate_v4: False
 68 | device: cuda
 69 | traj_last_n: 5  # the number of last transitions to cut off
 70 | reward_discount: 0.9
 71 | model_db_path: null
 72 | return_lower_bound: -1
 73 | n_model_workers: 8
 74 | use_autophase: False
 75 | 
 76 | # aggreate the jobs for a single forward pass in the neural network
 77 | model_capacity: 512  # influence the waiting time for an item in the queue, either the #nodes (for graphs) or the batch size (for autophase)
 78 | load_full_rate: 0.8
 79 | job_full_rate: 0.8
 80 | wait_time: 0.1
 81 | 
 82 | load_model_frequency: 30
 83 | avg_last_n_scores: 100
 84 | 
 85 | min_ir: 100
 86 | max_ir: 10000
 87 | use_history: false
 88 | run_model_locally: false  # if true, the model is in the same process as the environment
 89 | GAE_lambda: 0.97  # the lambda for GAE-Lambda
 90 | use_ppo: false
 91 | 
 92 | norm_reward: true
 93 | 
 94 | eval_on_policy: false
 95 | model_rowid: null
 96 | 
 97 | # for online testing
 98 | online_test_json: null
 99 | online_test_max_step: 50
100 | test_frequency_in_seconds: 3600
101 | 
102 | # for creating socket that transfers data from generator to trainer
103 | send_data_via_socket: False
104 | 
105 | use_only_anghabench: false  # for debugging purpose, only train on the Anghabench
106 | traj_db: null  # the path of a db where action sequences of benchmarks are stored
107 | for_reinforce: true  # use_ppo can overwrite this
108 | pydantic_datasource: null
109 | pydantic_val_dataset_path: null
110 | pydantic_test_dataset_path: null
111 | simple_generation: true
112 | early_stop_patience: 2
113 | min_per_benchmark: 0.05
114 | highest_reward: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/model/attn.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   job:
  4 |     config:
  5 |       override_dirname:
  6 |         exclude_keys:
  7 |           - dataset.pydantic_dataset_path
  8 |           - dataset.pydantic_dataset_path_dev
  9 |           - dataset.train
 10 |           - dataset.dev
 11 |   run:
 12 |     dir: ./outputs/attn/${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}/
 13 | 
 14 |   sweep:
 15 |     dir: ./outputs/attn/
 16 |     subdir: ${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}
 17 | 
 18 | ssl: False
 19 | ssl_config:
 20 |   rm_edges_perct: 0.2
 21 |   rm_nodes_perct: 0.2
 22 |   use_node_type_loss: False
 23 | 
 24 | # finetuning parameters
 25 | finetune:
 26 |   ckpt: null
 27 |   skip_ckpt: False
 28 |   stage1:
 29 |     epochs: 50
 30 |     lr: 0.0002
 31 |     wd: 1e-4
 32 |   stage2:
 33 |     epochs: 150
 34 |     lr: 0.0001
 35 |     wd: 1e-5
 36 | load_ckpt: null  # load ckpt and then just like training from scratch
 37 | 
 38 | # distributed training config
 39 | distributed: True
 40 | dist_eval: True
 41 | world_size: 1  # number of distributed processes
 42 | dist_url: env://  # url used to set up distributed training
 43 | device: cuda  # device to use for training / testing
 44 | rank: 0
 45 | dist_backend: nccl
 46 | seed: 0
 47 | 
 48 | dataset:
 49 |   num_workers: 4
 50 | 
 51 |   train: data/all10k-train-medium-all10k.db
 52 |   dev: data/all10k-val-medium-all10k.db
 53 |   vocab: data/all_ssl_vocab.db
 54 | 
 55 |   autophase_max_bin: 10
 56 | 
 57 |   load_next_state: True
 58 |   remove_large_graph: False
 59 |   max_nodes: 80000  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 60 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 61 |   load_balance: False  # whether to use load balance for distributed training
 62 |   load_cumulative_reward2: False
 63 |   pre_load: True  # whether to pre-load the data into memory
 64 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 65 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 66 | 
 67 |   # for new contrastive SSL
 68 |   queue_size: 1000
 69 |   min_queue_size: 1
 70 |   # for data transfer via socket
 71 |   send_data_via_socket: False
 72 |   num_generators: 1
 73 | 
 74 |   # for learning the immediate reward
 75 |   q_learning: true
 76 |   circulate_data: true  # for offline training. When set to `true`, it reuses the online training logic
 77 |   cache_data: false
 78 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 79 |   num_records: 100000000000000  # for offline training
 80 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 81 |   timeout: 0
 82 |   random_mixup: 1  # mixup probability
 83 |   weight_data_resample: false
 84 |   real_q_learning: false  # for cummulative reward regression
 85 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 86 |   pydantic_dataset_path: data/trajdataset_all10k-train-medium-all10k.json
 87 |   pydantic_dataset_path_dev: data/trajdataset_all10k-val-medium-all10k.json
 88 |   pydantic_dataset_path_test: data/benchmarkdataset_all-test.json
 89 |   cp_db_to_mem: true
 90 |   split: "all10k"
 91 |   dense_cls_metric: oz
 92 |   auto_batchsize: false
 93 |   remove_type_graph: false
 94 | 
 95 | # use submitit to send the tasks to other nodes instead of running locally
 96 | submitit:
 97 |   log_dir: null
 98 |   partition: learnlab
 99 |   timeout_min: 180
100 |   jobs_per_task: null
101 |   cpus_per_task: 10
102 |   gpus_per_node: 8
103 |   constraint: volta32gb
104 |   mem_gb: 500
105 | 
106 | start_epoch: 0
107 | save_dir: "./"
108 | gpu: null 
109 | num_epoch: 50
110 | save_per_epoch: 10
111 | optim:
112 |   lr: 3e-4
113 |   weight_decay: 1e-6
114 |   lr_schedular: True
115 |   lr_schedular_steps: 0
116 | 
117 | train_batch_size: 256 
118 | eval_batch_size: 256
119 | 
120 | generate_v4: true
121 | model_db_path: null
122 | save_frequence: 200
123 | print_frequence: 200
124 | eval_frequence: 100
125 | warmup_steps: 500
126 | total_steps: 10000
127 | 
128 | load_model_db: null
129 | sampling: false
130 | 
131 | behavior_cloning: true
132 | seq_classification: False
133 | eval_model_rowid: False
134 | early_stop: true
135 | outdir: null
136 | 
137 | model:
138 |   _target_: "rlcompopt.cl.models.gnn_pyg.CLSLearner"
139 |   mode: "pyg"
140 |   node_hidden_size: 256
141 |   use_node_embedding: True
142 |   use_action_embedding: False
143 |   use_autophase: False
144 |   autophase_dim: 56
145 |   n_steps: 1
146 |   n_etypes: 3
147 |   n_message_passes: 8
148 |   gnn_type: "EdgeAttn"
149 |   aggr: 'mean'
150 |   use_edge_embedding: False
151 |   use_flow_embedding: False
152 |   heads: null  # number of heads in multi-head attention for GAT
153 |   edge_emb_dim: 64
154 |   max_edge_position: 64
155 |   graph_version: 1
156 |   feat_drop: 0.0
157 |   concat_intermediate: False
158 |   discount_factor: 0.9
159 |   update_frequence: 150
160 |   zero_terminal_reward: False
161 |   node_level_action: False
162 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
163 |   num_actions: 50
164 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
165 |   subgraph: "function"
166 |   use_fc: False
167 |   use_relu: False  # can be string like "nn.GELU" to specify other activations
168 |   use_reversed_edge: False  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
169 |   on_policy_gradient: true
170 |   entropy_factor: 0.0003
171 |   use_history: false
172 |   use_reward_history: false
173 |   history_trans_heads: 4
174 |   # for PPO
175 |   use_value_function: false
176 |   use_ppo: false
177 |   clip_ratio: 0.2
178 |   target_kl: 0.01
179 |   num_local_updates: 1
180 |   use_reinforce: false
181 |   use_reward_only: false
182 |   use_reward_and_graph: false
183 | 
184 |   # for new contrastive SSL
185 |   use_cl: false
186 |   ema_momentum: 0.99
187 |   temperature: 0.002
188 |   action_dim: 32
189 | 
190 |   # for learning the immediate reward (online q_learning)
191 |   logit_temperature: 1
192 |   avg_instruct_nodes: true
193 |   num_heads: 4
194 |   adv_factor: 10.
195 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
196 |   label_smoothing: 0
197 |   dense_label: true
198 |   type_graph: false
199 |   random_mixup: false
200 |   loss_mixup_coef: 0
201 |   norm_for_cls: False
202 |   action_histogram_steps: 0
203 |   action_histogram_for_values: false
204 |   zero_edge_emb: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/model/attn_bc.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   job:
  4 |     config:
  5 |       override_dirname:
  6 |         exclude_keys:
  7 |           - dataset.pydantic_dataset_path
  8 |           - dataset.pydantic_dataset_path_dev
  9 |           - dataset.train
 10 |           - dataset.dev
 11 |   run:
 12 |     dir: ./outputs/attn_bc/${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}/
 13 | 
 14 |   sweep:
 15 |     dir: ./outputs/attn_bc/
 16 |     subdir: ${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}
 17 | 
 18 | ssl: False
 19 | ssl_config:
 20 |   rm_edges_perct: 0.2
 21 |   rm_nodes_perct: 0.2
 22 |   use_node_type_loss: False
 23 | 
 24 | # finetuning parameters
 25 | finetune:
 26 |   ckpt: null
 27 |   skip_ckpt: False
 28 |   stage1:
 29 |     epochs: 50
 30 |     lr: 0.0002
 31 |     wd: 1e-4
 32 |   stage2:
 33 |     epochs: 150
 34 |     lr: 0.0001
 35 |     wd: 1e-5
 36 | load_ckpt: null  # load ckpt and then just like training from scratch
 37 | 
 38 | # distributed training config
 39 | distributed: True
 40 | dist_eval: True
 41 | world_size: 1  # number of distributed processes
 42 | dist_url: env://  # url used to set up distributed training
 43 | device: cuda  # device to use for training / testing
 44 | rank: 0
 45 | dist_backend: nccl
 46 | seed: 0
 47 | 
 48 | dataset:
 49 |   num_workers: 4
 50 | 
 51 |   train: data/all10k-train-medium-all10k.db
 52 |   dev: data/all10k-val-medium-all10k.db
 53 |   vocab: data/all_ssl_vocab.db
 54 | 
 55 |   autophase_max_bin: 10
 56 | 
 57 |   load_next_state: True
 58 |   remove_large_graph: False
 59 |   max_nodes: 80000  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 60 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 61 |   load_balance: False  # whether to use load balance for distributed training
 62 |   load_cumulative_reward2: False
 63 |   pre_load: True  # whether to pre-load the data into memory
 64 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 65 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 66 | 
 67 |   # for new contrastive SSL
 68 |   queue_size: 1000
 69 |   min_queue_size: 1
 70 |   # for data transfer via socket
 71 |   send_data_via_socket: False
 72 |   num_generators: 1
 73 | 
 74 |   # for learning the immediate reward
 75 |   q_learning: true
 76 |   circulate_data: true  # for offline training. When set to `true`, it reuses the online training logic
 77 |   cache_data: false
 78 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 79 |   num_records: 100000000000000  # for offline training
 80 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 81 |   timeout: 0
 82 |   random_mixup: 0  # mixup probability
 83 |   weight_data_resample: false
 84 |   real_q_learning: false  # for cummulative reward regression
 85 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 86 |   pydantic_dataset_path: data/trajdataset_all10k-train-medium-all10k.json
 87 |   pydantic_dataset_path_dev: data/trajdataset_all10k-val-medium-all10k.json
 88 |   pydantic_dataset_path_test: data/benchmarkdataset_all-test.json
 89 |   cp_db_to_mem: true
 90 |   split: "all10k"
 91 |   dense_cls_metric: oz
 92 |   auto_batchsize: false
 93 |   remove_type_graph: false
 94 | 
 95 | # use submitit to send the tasks to other nodes instead of running locally
 96 | submitit:
 97 |   log_dir: null
 98 |   partition: learnlab
 99 |   timeout_min: 180
100 |   jobs_per_task: null
101 |   cpus_per_task: 10
102 |   gpus_per_node: 8
103 |   constraint: volta32gb
104 |   mem_gb: 500
105 | 
106 | start_epoch: 0
107 | save_dir: "./"
108 | gpu: null 
109 | num_epoch: 50
110 | save_per_epoch: 10
111 | optim:
112 |   lr: 1e-4
113 |   weight_decay: 1e-5
114 |   lr_schedular: True
115 |   lr_schedular_steps: 0
116 | 
117 | train_batch_size: 256 
118 | eval_batch_size: 256
119 | 
120 | generate_v4: true
121 | model_db_path: null
122 | save_frequence: 200
123 | print_frequence: 200
124 | eval_frequence: 100
125 | warmup_steps: 500
126 | total_steps: 10000
127 | 
128 | load_model_db: null
129 | sampling: false
130 | 
131 | behavior_cloning: true
132 | seq_classification: true
133 | eval_model_rowid: False
134 | early_stop: true
135 | outdir: null
136 | 
137 | model:
138 |   _target_: "rlcompopt.cl.models.gnn_pyg.CLSLearner"
139 |   mode: "pyg"
140 |   node_hidden_size: 256
141 |   use_node_embedding: True
142 |   use_action_embedding: False
143 |   use_autophase: False
144 |   autophase_dim: 56
145 |   n_steps: 1
146 |   n_etypes: 3
147 |   n_message_passes: 8
148 |   gnn_type: "EdgeAttn"
149 |   aggr: 'mean'
150 |   use_edge_embedding: False
151 |   use_flow_embedding: False
152 |   heads: null  # number of heads in multi-head attention for GAT
153 |   edge_emb_dim: 64
154 |   max_edge_position: 64
155 |   graph_version: 1
156 |   feat_drop: 0.0
157 |   concat_intermediate: False
158 |   discount_factor: 0.9
159 |   update_frequence: 150
160 |   zero_terminal_reward: False
161 |   node_level_action: False
162 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
163 |   num_actions: 50
164 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
165 |   subgraph: "function"
166 |   use_fc: False
167 |   use_relu: False  # can be string like "nn.GELU" to specify other activations
168 |   use_reversed_edge: False  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
169 |   on_policy_gradient: true
170 |   entropy_factor: 0.0003
171 |   use_history: false
172 |   use_reward_history: false
173 |   history_trans_heads: 4
174 |   # for PPO
175 |   use_value_function: false
176 |   use_ppo: false
177 |   clip_ratio: 0.2
178 |   target_kl: 0.01
179 |   num_local_updates: 1
180 |   use_reinforce: false
181 |   use_reward_only: false
182 |   use_reward_and_graph: false
183 | 
184 |   # for new contrastive SSL
185 |   use_cl: false
186 |   ema_momentum: 0.99
187 |   temperature: 0.002
188 |   action_dim: 32
189 | 
190 |   # for learning the immediate reward (online q_learning)
191 |   logit_temperature: 1
192 |   avg_instruct_nodes: true
193 |   num_heads: 4
194 |   adv_factor: 10.
195 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
196 |   label_smoothing: 0
197 |   dense_label: false
198 |   type_graph: false
199 |   random_mixup: false
200 |   loss_mixup_coef: 0
201 |   norm_for_cls: False
202 |   action_histogram_steps: 0
203 |   action_histogram_for_values: false
204 |   zero_edge_emb: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/model/attn_q_value.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   job:
  4 |     config:
  5 |       override_dirname:
  6 |         exclude_keys:
  7 |           - dataset.pydantic_dataset_path
  8 |           - dataset.pydantic_dataset_path_dev
  9 |           - dataset.train
 10 |           - dataset.dev
 11 |   run:
 12 |     dir: ./outputs/attn_q/${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}/
 13 | 
 14 |   sweep:
 15 |     dir: ./outputs/attn_q/
 16 |     subdir: ${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}
 17 | 
 18 | ssl: False
 19 | ssl_config:
 20 |   rm_edges_perct: 0.2
 21 |   rm_nodes_perct: 0.2
 22 |   use_node_type_loss: False
 23 | 
 24 | # finetuning parameters
 25 | finetune:
 26 |   ckpt: null
 27 |   skip_ckpt: False
 28 |   stage1:
 29 |     epochs: 50
 30 |     lr: 0.0002
 31 |     wd: 1e-4
 32 |   stage2:
 33 |     epochs: 150
 34 |     lr: 0.0001
 35 |     wd: 1e-5
 36 | load_ckpt: null  # load ckpt and then just like training from scratch
 37 | 
 38 | # distributed training config
 39 | distributed: True
 40 | dist_eval: True
 41 | world_size: 1  # number of distributed processes
 42 | dist_url: env://  # url used to set up distributed training
 43 | device: cuda  # device to use for training / testing
 44 | rank: 0
 45 | dist_backend: nccl
 46 | seed: 0
 47 | 
 48 | dataset:
 49 |   num_workers: 4
 50 | 
 51 |   train: data/all10k-train-medium-all10k.db
 52 |   dev: data/all10k-val-medium-all10k.db
 53 |   vocab: data/all_ssl_vocab.db
 54 | 
 55 |   autophase_max_bin: 10
 56 | 
 57 |   load_next_state: True
 58 |   remove_large_graph: False
 59 |   max_nodes: 80000  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 60 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 61 |   load_balance: False  # whether to use load balance for distributed training
 62 |   load_cumulative_reward2: False
 63 |   pre_load: True  # whether to pre-load the data into memory
 64 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 65 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 66 | 
 67 |   # for new contrastive SSL
 68 |   queue_size: 1000
 69 |   min_queue_size: 1
 70 |   # for data transfer via socket
 71 |   send_data_via_socket: False
 72 |   num_generators: 1
 73 | 
 74 |   # for learning the immediate reward
 75 |   q_learning: true
 76 |   circulate_data: true  # for offline training. When set to `true`, it reuses the online training logic
 77 |   cache_data: false
 78 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 79 |   num_records: 100000000000000  # for offline training
 80 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 81 |   timeout: 0
 82 |   random_mixup: 0  # mixup probability
 83 |   weight_data_resample: false
 84 |   real_q_learning: false  # for cummulative reward regression
 85 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 86 |   pydantic_dataset_path: data/trajdataset_all10k-train-medium-all10k.json
 87 |   pydantic_dataset_path_dev: data/trajdataset_all10k-val-medium-all10k.json
 88 |   pydantic_dataset_path_test: data/benchmarkdataset_all-test.json
 89 |   cp_db_to_mem: true
 90 |   split: "all10k"
 91 |   dense_cls_metric: oz
 92 |   auto_batchsize: false
 93 |   remove_type_graph: false
 94 | 
 95 | # use submitit to send the tasks to other nodes instead of running locally
 96 | submitit:
 97 |   log_dir: null
 98 |   partition: learnlab
 99 |   timeout_min: 180
100 |   jobs_per_task: null
101 |   cpus_per_task: 10
102 |   gpus_per_node: 8
103 |   constraint: volta32gb
104 |   mem_gb: 500
105 | 
106 | start_epoch: 0
107 | save_dir: "./"
108 | gpu: null 
109 | num_epoch: 50
110 | save_per_epoch: 10
111 | optim:
112 |   lr: 1e-4
113 |   weight_decay: 1e-5
114 |   lr_schedular: True
115 |   lr_schedular_steps: 0
116 | 
117 | train_batch_size: 256 
118 | eval_batch_size: 256
119 | 
120 | generate_v4: true
121 | model_db_path: null
122 | save_frequence: 200
123 | print_frequence: 200
124 | eval_frequence: 100
125 | warmup_steps: 500
126 | total_steps: 10000
127 | 
128 | load_model_db: null
129 | sampling: false
130 | 
131 | behavior_cloning: true
132 | seq_classification: False
133 | eval_model_rowid: False
134 | early_stop: true
135 | outdir: null
136 | 
137 | model:
138 |   _target_: "rlcompopt.cl.models.gnn_pyg.CLSLearner"
139 |   mode: "pyg"
140 |   node_hidden_size: 256
141 |   use_node_embedding: True
142 |   use_action_embedding: False
143 |   use_autophase: False
144 |   autophase_dim: 56
145 |   n_steps: 1
146 |   n_etypes: 3
147 |   n_message_passes: 8
148 |   gnn_type: "EdgeAttn"
149 |   aggr: 'mean'
150 |   use_edge_embedding: False
151 |   use_flow_embedding: False
152 |   heads: null  # number of heads in multi-head attention for GAT
153 |   edge_emb_dim: 64
154 |   max_edge_position: 64
155 |   graph_version: 1
156 |   feat_drop: 0.0
157 |   concat_intermediate: False
158 |   discount_factor: 0.9
159 |   update_frequence: 150
160 |   zero_terminal_reward: False
161 |   node_level_action: False
162 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
163 |   num_actions: 50
164 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
165 |   subgraph: "function"
166 |   use_fc: False
167 |   use_relu: False  # can be string like "nn.GELU" to specify other activations
168 |   use_reversed_edge: False  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
169 |   on_policy_gradient: true
170 |   entropy_factor: 0.0003
171 |   use_history: false
172 |   use_reward_history: false
173 |   history_trans_heads: 4
174 |   # for PPO
175 |   use_value_function: false
176 |   use_ppo: false
177 |   clip_ratio: 0.2
178 |   target_kl: 0.01
179 |   num_local_updates: 1
180 |   use_reinforce: false
181 |   use_reward_only: false
182 |   use_reward_and_graph: false
183 | 
184 |   # for new contrastive SSL
185 |   use_cl: false
186 |   ema_momentum: 0.99
187 |   temperature: 0.002
188 |   action_dim: 32
189 | 
190 |   # for learning the immediate reward (online q_learning)
191 |   logit_temperature: 1
192 |   avg_instruct_nodes: true
193 |   num_heads: 4
194 |   adv_factor: 10.
195 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
196 |   label_smoothing: 0
197 |   dense_label: true
198 |   type_graph: false
199 |   random_mixup: false
200 |   loss_mixup_coef: 0
201 |   norm_for_cls: mse
202 |   action_histogram_steps: 0
203 |   action_histogram_for_values: false
204 |   zero_edge_emb: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/model/autophase.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   job:
  4 |     config:
  5 |       override_dirname:
  6 |         exclude_keys:
  7 |           - dataset.pydantic_dataset_path
  8 |           - dataset.pydantic_dataset_path_dev
  9 |           - dataset.train
 10 |           - dataset.dev
 11 |   run:
 12 |     dir: ./outputs/autophase/${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}/
 13 | 
 14 |   sweep:
 15 |     dir: ./outputs/autophase/
 16 |     subdir: ${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}
 17 | 
 18 | ssl: False
 19 | ssl_config:
 20 |   rm_edges_perct: 0.2
 21 |   rm_nodes_perct: 0.2
 22 |   use_node_type_loss: False
 23 | 
 24 | # finetuning parameters
 25 | finetune:
 26 |   ckpt: null
 27 |   skip_ckpt: False
 28 |   stage1:
 29 |     epochs: 50
 30 |     lr: 0.0002
 31 |     wd: 1e-4
 32 |   stage2:
 33 |     epochs: 150
 34 |     lr: 0.0001
 35 |     wd: 1e-5
 36 | load_ckpt: null  # load ckpt and then just like training from scratch
 37 | 
 38 | # distributed training config
 39 | distributed: True
 40 | dist_eval: True
 41 | world_size: 1  # number of distributed processes
 42 | dist_url: env://  # url used to set up distributed training
 43 | device: cuda  # device to use for training / testing
 44 | rank: 0
 45 | dist_backend: nccl
 46 | seed: 0
 47 | 
 48 | dataset:
 49 |   num_workers: 4
 50 | 
 51 |   train: data/all10k-train-medium-all10k-autophase.db
 52 |   dev: data/all10k-val-medium-all10k-autophase.db
 53 |   vocab: data/all_ssl_vocab.db
 54 | 
 55 |   autophase_max_bin: 10
 56 | 
 57 |   load_next_state: True
 58 |   remove_large_graph: False
 59 |   max_nodes: 500  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 60 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 61 |   load_balance: False  # whether to use load balance for distributed training
 62 |   load_cumulative_reward2: False
 63 |   pre_load: True  # whether to pre-load the data into memory
 64 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 65 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 66 | 
 67 |   # for new contrastive SSL
 68 |   queue_size: 1000
 69 |   min_queue_size: 1
 70 |   # for data transfer via socket
 71 |   send_data_via_socket: False
 72 |   num_generators: 1
 73 | 
 74 |   # for learning the immediate reward
 75 |   q_learning: true
 76 |   circulate_data: true  # for offline training. When set to `true`, it reuses the online training logic
 77 |   cache_data: false
 78 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 79 |   num_records: 100000000000000  # for offline training
 80 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 81 |   timeout: 0
 82 |   random_mixup: 0  # mixup probability
 83 |   weight_data_resample: false
 84 |   real_q_learning: false  # for cummulative reward regression
 85 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 86 |   pydantic_dataset_path: data/trajdataset_all10k-train-medium-all10k.json
 87 |   pydantic_dataset_path_dev: data/trajdataset_all10k-val-medium-all10k.json
 88 |   pydantic_dataset_path_test: data/benchmarkdataset_all-test.json
 89 |   cp_db_to_mem: true
 90 |   split: "all10k"
 91 |   dense_cls_metric: oz
 92 |   auto_batchsize: false
 93 |   remove_type_graph: false
 94 | 
 95 | # use submitit to send the tasks to other nodes instead of running locally
 96 | submitit:
 97 |   log_dir: null
 98 |   partition: learnlab
 99 |   timeout_min: 100
100 |   jobs_per_task: null
101 |   cpus_per_task: 10
102 |   gpus_per_node: 8
103 |   constraint: volta32gb
104 |   mem_gb: 500
105 | 
106 | start_epoch: 0
107 | save_dir: "./"
108 | gpu: null 
109 | num_epoch: 100
110 | save_per_epoch: 10
111 | optim:
112 |   lr: 5e-4
113 |   weight_decay: 1e-5
114 |   lr_schedular: True
115 |   lr_schedular_steps: 0
116 | 
117 | train_batch_size: 256 
118 | eval_batch_size: 256
119 | 
120 | generate_v4: true
121 | model_db_path: null
122 | save_frequence: 200
123 | print_frequence: 200
124 | eval_frequence: 100
125 | warmup_steps: 500
126 | total_steps: 10000
127 | 
128 | load_model_db: null
129 | sampling: false
130 | 
131 | behavior_cloning: true
132 | seq_classification: False
133 | eval_model_rowid: False
134 | early_stop: true
135 | outdir: null
136 | 
137 | model:
138 |   _target_: "rlcompopt.cl.models.gnn_pyg.CLSLearner"
139 |   mode: "pyg"
140 |   node_hidden_size: 256
141 |   use_node_embedding: False
142 |   use_action_embedding: False
143 |   use_autophase: true
144 |   autophase_dim: 56
145 |   n_steps: 1
146 |   n_etypes: 3
147 |   n_message_passes: 1
148 |   gnn_type: "EdgeAttn"
149 |   aggr: 'mean'
150 |   use_edge_embedding: False
151 |   use_flow_embedding: False
152 |   heads: null  # number of heads in multi-head attention for GAT
153 |   edge_emb_dim: 0
154 |   max_edge_position: 64
155 |   graph_version: 1
156 |   feat_drop: 0.0
157 |   concat_intermediate: False
158 |   discount_factor: 0.9
159 |   update_frequence: 150
160 |   zero_terminal_reward: False
161 |   node_level_action: False
162 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
163 |   num_actions: 50
164 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
165 |   subgraph: "function"
166 |   use_fc: False
167 |   use_relu: False  # can be string like "nn.GELU" to specify other activations
168 |   use_reversed_edge: False  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
169 |   on_policy_gradient: true
170 |   entropy_factor: 0.0003
171 |   use_history: false
172 |   use_reward_history: false
173 |   history_trans_heads: 4
174 |   # for PPO
175 |   use_value_function: false
176 |   use_ppo: false
177 |   clip_ratio: 0.2
178 |   target_kl: 0.01
179 |   num_local_updates: 1
180 |   use_reinforce: false
181 |   use_reward_only: false
182 |   use_reward_and_graph: false
183 | 
184 |   # for new contrastive SSL
185 |   use_cl: false
186 |   ema_momentum: 0.99
187 |   temperature: 0.05
188 |   action_dim: 32
189 | 
190 |   # for learning the immediate reward (online q_learning)
191 |   logit_temperature: 1
192 |   avg_instruct_nodes: true
193 |   num_heads: 4
194 |   adv_factor: 10.
195 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
196 |   label_smoothing: 0
197 |   dense_label: true
198 |   type_graph: false
199 |   random_mixup: false
200 |   loss_mixup_coef: 0
201 |   norm_for_cls: False
202 |   action_histogram_steps: 0
203 |   action_histogram_for_values: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/model/autophase_bc.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   job:
  4 |     config:
  5 |       override_dirname:
  6 |         exclude_keys:
  7 |           - dataset.pydantic_dataset_path
  8 |           - dataset.pydantic_dataset_path_dev
  9 |           - dataset.train
 10 |           - dataset.dev
 11 |   run:
 12 |     dir: ./outputs/autophase_bc/${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}/
 13 | 
 14 |   sweep:
 15 |     dir: ./outputs/autophase_bc/
 16 |     subdir: ${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}
 17 | 
 18 | ssl: False
 19 | ssl_config:
 20 |   rm_edges_perct: 0.2
 21 |   rm_nodes_perct: 0.2
 22 |   use_node_type_loss: False
 23 | 
 24 | # finetuning parameters
 25 | finetune:
 26 |   ckpt: null
 27 |   skip_ckpt: False
 28 |   stage1:
 29 |     epochs: 50
 30 |     lr: 0.0002
 31 |     wd: 1e-4
 32 |   stage2:
 33 |     epochs: 150
 34 |     lr: 0.0001
 35 |     wd: 1e-5
 36 | load_ckpt: null  # load ckpt and then just like training from scratch
 37 | 
 38 | # distributed training config
 39 | distributed: True
 40 | dist_eval: True
 41 | world_size: 1  # number of distributed processes
 42 | dist_url: env://  # url used to set up distributed training
 43 | device: cuda  # device to use for training / testing
 44 | rank: 0
 45 | dist_backend: nccl
 46 | seed: 0
 47 | 
 48 | dataset:
 49 |   num_workers: 4
 50 | 
 51 |   train: data/all10k-train-medium-all10k-autophase.db
 52 |   dev: data/all10k-val-medium-all10k-autophase.db
 53 |   vocab: data/all_ssl_vocab.db
 54 | 
 55 |   autophase_max_bin: 10
 56 | 
 57 |   load_next_state: True
 58 |   remove_large_graph: False
 59 |   max_nodes: 500  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 60 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 61 |   load_balance: False  # whether to use load balance for distributed training
 62 |   load_cumulative_reward2: False
 63 |   pre_load: True  # whether to pre-load the data into memory
 64 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 65 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 66 | 
 67 |   # for new contrastive SSL
 68 |   queue_size: 1000
 69 |   min_queue_size: 1
 70 |   # for data transfer via socket
 71 |   send_data_via_socket: False
 72 |   num_generators: 1
 73 | 
 74 |   # for learning the immediate reward
 75 |   q_learning: true
 76 |   circulate_data: true  # for offline training. When set to `true`, it reuses the online training logic
 77 |   cache_data: false
 78 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 79 |   num_records: 100000000000000  # for offline training
 80 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 81 |   timeout: 0
 82 |   random_mixup: 0  # mixup probability
 83 |   weight_data_resample: false
 84 |   real_q_learning: false  # for cummulative reward regression
 85 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 86 |   pydantic_dataset_path: data/trajdataset_all10k-train-medium-all10k.json
 87 |   pydantic_dataset_path_dev: data/trajdataset_all10k-val-medium-all10k.json
 88 |   pydantic_dataset_path_test: data/benchmarkdataset_all-test.json
 89 |   cp_db_to_mem: true
 90 |   split: "all10k"
 91 |   dense_cls_metric: oz
 92 |   auto_batchsize: false
 93 |   remove_type_graph: false
 94 | 
 95 | # use submitit to send the tasks to other nodes instead of running locally
 96 | submitit:
 97 |   log_dir: null
 98 |   partition: learnlab
 99 |   timeout_min: 100
100 |   jobs_per_task: null
101 |   cpus_per_task: 10
102 |   gpus_per_node: 8
103 |   constraint: volta32gb
104 |   mem_gb: 500
105 | 
106 | start_epoch: 0
107 | save_dir: "./"
108 | gpu: null 
109 | num_epoch: 100
110 | save_per_epoch: 10
111 | optim:
112 |   lr: 5e-4
113 |   weight_decay: 1e-5
114 |   lr_schedular: True
115 |   lr_schedular_steps: 0
116 | 
117 | train_batch_size: 256 
118 | eval_batch_size: 256
119 | 
120 | generate_v4: true
121 | model_db_path: null
122 | save_frequence: 200
123 | print_frequence: 200
124 | eval_frequence: 100
125 | warmup_steps: 500
126 | total_steps: 10000
127 | 
128 | load_model_db: null
129 | sampling: false
130 | 
131 | behavior_cloning: true
132 | seq_classification: true
133 | eval_model_rowid: False
134 | early_stop: true
135 | outdir: null
136 | 
137 | model:
138 |   _target_: "rlcompopt.cl.models.gnn_pyg.CLSLearner"
139 |   mode: "pyg"
140 |   node_hidden_size: 256
141 |   use_node_embedding: False
142 |   use_action_embedding: False
143 |   use_autophase: true
144 |   autophase_dim: 56
145 |   n_steps: 1
146 |   n_etypes: 3
147 |   n_message_passes: 1
148 |   gnn_type: "EdgeAttn"
149 |   aggr: 'mean'
150 |   use_edge_embedding: False
151 |   use_flow_embedding: False
152 |   heads: null  # number of heads in multi-head attention for GAT
153 |   edge_emb_dim: 0
154 |   max_edge_position: 64
155 |   graph_version: 1
156 |   feat_drop: 0.0
157 |   concat_intermediate: False
158 |   discount_factor: 0.9
159 |   update_frequence: 150
160 |   zero_terminal_reward: False
161 |   node_level_action: False
162 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
163 |   num_actions: 50
164 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
165 |   subgraph: "function"
166 |   use_fc: False
167 |   use_relu: False  # can be string like "nn.GELU" to specify other activations
168 |   use_reversed_edge: False  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
169 |   on_policy_gradient: true
170 |   entropy_factor: 0.0003
171 |   use_history: false
172 |   use_reward_history: false
173 |   history_trans_heads: 4
174 |   # for PPO
175 |   use_value_function: false
176 |   use_ppo: false
177 |   clip_ratio: 0.2
178 |   target_kl: 0.01
179 |   num_local_updates: 1
180 |   use_reinforce: false
181 |   use_reward_only: false
182 |   use_reward_and_graph: false
183 | 
184 |   # for new contrastive SSL
185 |   use_cl: false
186 |   ema_momentum: 0.99
187 |   temperature: 0.05
188 |   action_dim: 32
189 | 
190 |   # for learning the immediate reward (online q_learning)
191 |   logit_temperature: 1
192 |   avg_instruct_nodes: true
193 |   num_heads: 4
194 |   adv_factor: 10.
195 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
196 |   label_smoothing: 0
197 |   dense_label: false
198 |   type_graph: false
199 |   random_mixup: false
200 |   loss_mixup_coef: 0
201 |   norm_for_cls: False
202 |   action_histogram_steps: 0
203 |   action_histogram_for_values: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/model/autophase_q_value.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   job:
  4 |     config:
  5 |       override_dirname:
  6 |         exclude_keys:
  7 |           - dataset.pydantic_dataset_path
  8 |           - dataset.pydantic_dataset_path_dev
  9 |           - dataset.train
 10 |           - dataset.dev
 11 |   run:
 12 |     dir: ./outputs/autophase_q/${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}/
 13 | 
 14 |   sweep:
 15 |     dir: ./outputs/autophase_q/
 16 |     subdir: ${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}
 17 | 
 18 | ssl: False
 19 | ssl_config:
 20 |   rm_edges_perct: 0.2
 21 |   rm_nodes_perct: 0.2
 22 |   use_node_type_loss: False
 23 | 
 24 | # finetuning parameters
 25 | finetune:
 26 |   ckpt: null
 27 |   skip_ckpt: False
 28 |   stage1:
 29 |     epochs: 50
 30 |     lr: 0.0002
 31 |     wd: 1e-4
 32 |   stage2:
 33 |     epochs: 150
 34 |     lr: 0.0001
 35 |     wd: 1e-5
 36 | load_ckpt: null  # load ckpt and then just like training from scratch
 37 | 
 38 | # distributed training config
 39 | distributed: True
 40 | dist_eval: True
 41 | world_size: 1  # number of distributed processes
 42 | dist_url: env://  # url used to set up distributed training
 43 | device: cuda  # device to use for training / testing
 44 | rank: 0
 45 | dist_backend: nccl
 46 | seed: 0
 47 | 
 48 | dataset:
 49 |   num_workers: 4
 50 | 
 51 |   train: data/all10k-train-medium-all10k-autophase.db
 52 |   dev: data/all10k-val-medium-all10k-autophase.db
 53 |   vocab: data/all_ssl_vocab.db
 54 | 
 55 |   autophase_max_bin: 10
 56 | 
 57 |   load_next_state: True
 58 |   remove_large_graph: False
 59 |   max_nodes: 500  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 60 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 61 |   load_balance: False  # whether to use load balance for distributed training
 62 |   load_cumulative_reward2: False
 63 |   pre_load: True  # whether to pre-load the data into memory
 64 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 65 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 66 | 
 67 |   # for new contrastive SSL
 68 |   queue_size: 1000
 69 |   min_queue_size: 1
 70 |   # for data transfer via socket
 71 |   send_data_via_socket: False
 72 |   num_generators: 1
 73 | 
 74 |   # for learning the immediate reward
 75 |   q_learning: true
 76 |   circulate_data: true  # for offline training. When set to `true`, it reuses the online training logic
 77 |   cache_data: false
 78 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 79 |   num_records: 100000000000000  # for offline training
 80 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 81 |   timeout: 0
 82 |   random_mixup: 0  # mixup probability
 83 |   weight_data_resample: false
 84 |   real_q_learning: false  # for cummulative reward regression
 85 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 86 |   pydantic_dataset_path: data/trajdataset_all10k-train-medium-all10k.json
 87 |   pydantic_dataset_path_dev: data/trajdataset_all10k-val-medium-all10k.json
 88 |   pydantic_dataset_path_test: data/benchmarkdataset_all-test.json
 89 |   cp_db_to_mem: true
 90 |   split: "all10k"
 91 |   dense_cls_metric: oz
 92 |   auto_batchsize: false
 93 |   remove_type_graph: false
 94 | 
 95 | # use submitit to send the tasks to other nodes instead of running locally
 96 | submitit:
 97 |   log_dir: null
 98 |   partition: learnlab
 99 |   timeout_min: 100
100 |   jobs_per_task: null
101 |   cpus_per_task: 10
102 |   gpus_per_node: 8
103 |   constraint: volta32gb
104 |   mem_gb: 500
105 | 
106 | start_epoch: 0
107 | save_dir: "./"
108 | gpu: null 
109 | num_epoch: 100
110 | save_per_epoch: 10
111 | optim:
112 |   lr: 1e-4
113 |   weight_decay: 1e-5
114 |   lr_schedular: True
115 |   lr_schedular_steps: 0
116 | 
117 | train_batch_size: 256 
118 | eval_batch_size: 256
119 | 
120 | generate_v4: true
121 | model_db_path: null
122 | save_frequence: 200
123 | print_frequence: 200
124 | eval_frequence: 100
125 | warmup_steps: 500
126 | total_steps: 10000
127 | 
128 | load_model_db: null
129 | sampling: false
130 | 
131 | behavior_cloning: true
132 | seq_classification: False
133 | eval_model_rowid: False
134 | early_stop: true
135 | outdir: null
136 | 
137 | model:
138 |   _target_: "rlcompopt.cl.models.gnn_pyg.CLSLearner"
139 |   mode: "pyg"
140 |   node_hidden_size: 256
141 |   use_node_embedding: False
142 |   use_action_embedding: False
143 |   use_autophase: true
144 |   autophase_dim: 56
145 |   n_steps: 1
146 |   n_etypes: 3
147 |   n_message_passes: 1
148 |   gnn_type: "EdgeAttn"
149 |   aggr: 'mean'
150 |   use_edge_embedding: False
151 |   use_flow_embedding: False
152 |   heads: null  # number of heads in multi-head attention for GAT
153 |   edge_emb_dim: 0
154 |   max_edge_position: 64
155 |   graph_version: 1
156 |   feat_drop: 0.0
157 |   concat_intermediate: False
158 |   discount_factor: 0.9
159 |   update_frequence: 150
160 |   zero_terminal_reward: False
161 |   node_level_action: False
162 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
163 |   num_actions: 50
164 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
165 |   subgraph: "function"
166 |   use_fc: False
167 |   use_relu: False  # can be string like "nn.GELU" to specify other activations
168 |   use_reversed_edge: False  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
169 |   on_policy_gradient: true
170 |   entropy_factor: 0.0003
171 |   use_history: false
172 |   use_reward_history: false
173 |   history_trans_heads: 4
174 |   # for PPO
175 |   use_value_function: false
176 |   use_ppo: false
177 |   clip_ratio: 0.2
178 |   target_kl: 0.01
179 |   num_local_updates: 1
180 |   use_reinforce: false
181 |   use_reward_only: false
182 |   use_reward_and_graph: false
183 | 
184 |   # for new contrastive SSL
185 |   use_cl: false
186 |   ema_momentum: 0.99
187 |   temperature: 0.05
188 |   action_dim: 32
189 | 
190 |   # for learning the immediate reward (online q_learning)
191 |   logit_temperature: 1
192 |   avg_instruct_nodes: true
193 |   num_heads: 4
194 |   adv_factor: 10.
195 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
196 |   label_smoothing: 0
197 |   dense_label: true
198 |   type_graph: false
199 |   random_mixup: false
200 |   loss_mixup_coef: 0
201 |   norm_for_cls: mse
202 |   action_histogram_steps: 0
203 |   action_histogram_for_values: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/model/gcn.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   job:
  4 |     config:
  5 |       override_dirname:
  6 |         exclude_keys:
  7 |           - dataset.pydantic_dataset_path
  8 |           - dataset.pydantic_dataset_path_dev
  9 |           - dataset.train
 10 |           - dataset.dev
 11 |   run:
 12 |     dir: ./outputs/ggc/${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}/
 13 | 
 14 |   sweep:
 15 |     dir: ./outputs/ggc/
 16 |     subdir: ${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}
 17 | 
 18 | ssl: False
 19 | ssl_config:
 20 |   rm_edges_perct: 0.2
 21 |   rm_nodes_perct: 0.2
 22 |   use_node_type_loss: False
 23 | 
 24 | # finetuning parameters
 25 | finetune:
 26 |   ckpt: null
 27 |   skip_ckpt: False
 28 |   stage1:
 29 |     epochs: 50
 30 |     lr: 0.0002
 31 |     wd: 1e-4
 32 |   stage2:
 33 |     epochs: 150
 34 |     lr: 0.0001
 35 |     wd: 1e-5
 36 | load_ckpt: null  # load ckpt and then just like training from scratch
 37 | 
 38 | # distributed training config
 39 | distributed: True
 40 | dist_eval: True
 41 | world_size: 1  # number of distributed processes
 42 | dist_url: env://  # url used to set up distributed training
 43 | device: cuda  # device to use for training / testing
 44 | rank: 0
 45 | dist_backend: nccl
 46 | seed: 0
 47 | 
 48 | dataset:
 49 |   num_workers: 4
 50 | 
 51 |   train: data/all10k-train-medium-all10k.db
 52 |   dev: data/all10k-val-medium-all10k.db
 53 |   vocab: data/all_ssl_vocab.db
 54 | 
 55 |   autophase_max_bin: 10
 56 | 
 57 |   load_next_state: True
 58 |   remove_large_graph: False
 59 |   max_nodes: 80000  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 60 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 61 |   load_balance: False  # whether to use load balance for distributed training
 62 |   load_cumulative_reward2: False
 63 |   pre_load: True  # whether to pre-load the data into memory
 64 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 65 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 66 | 
 67 |   # for new contrastive SSL
 68 |   queue_size: 1000
 69 |   min_queue_size: 1
 70 |   # for data transfer via socket
 71 |   send_data_via_socket: False
 72 |   num_generators: 1
 73 | 
 74 |   # for learning the immediate reward
 75 |   q_learning: true
 76 |   circulate_data: true  # for offline training. When set to `true`, it reuses the online training logic
 77 |   cache_data: false
 78 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 79 |   num_records: 100000000000000  # for offline training
 80 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 81 |   timeout: 0
 82 |   random_mixup: 1  # mixup probability
 83 |   weight_data_resample: false
 84 |   real_q_learning: false  # for cummulative reward regression
 85 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 86 |   pydantic_dataset_path: data/trajdataset_all10k-train-medium-all10k.json
 87 |   pydantic_dataset_path_dev: data/trajdataset_all10k-val-medium-all10k.json
 88 |   pydantic_dataset_path_test: data/benchmarkdataset_all-test.json
 89 |   cp_db_to_mem: true
 90 |   split: "all10k"
 91 |   dense_cls_metric: oz
 92 |   auto_batchsize: false
 93 |   remove_type_graph: false
 94 | 
 95 | # use submitit to send the tasks to other nodes instead of running locally
 96 | submitit:
 97 |   log_dir: null
 98 |   partition: learnlab
 99 |   timeout_min: 180
100 |   jobs_per_task: null
101 |   cpus_per_task: 10
102 |   gpus_per_node: 8
103 |   constraint: volta32gb
104 |   mem_gb: 500
105 | 
106 | start_epoch: 0
107 | save_dir: "./"
108 | gpu: null 
109 | num_epoch: 100
110 | save_per_epoch: 10
111 | optim:
112 |   lr: 1e-3
113 |   weight_decay: 1e-5
114 |   lr_schedular: True
115 |   lr_schedular_steps: 0
116 | 
117 | train_batch_size: 256 
118 | eval_batch_size: 256
119 | 
120 | generate_v4: true
121 | model_db_path: null
122 | save_frequence: 200
123 | print_frequence: 200
124 | eval_frequence: 100
125 | warmup_steps: 500
126 | total_steps: 10000
127 | 
128 | load_model_db: null
129 | sampling: false
130 | 
131 | behavior_cloning: true
132 | seq_classification: False
133 | eval_model_rowid: False
134 | early_stop: true
135 | outdir: null
136 | 
137 | model:
138 |   _target_: "rlcompopt.cl.models.gnn_pyg.CLSLearner"
139 |   mode: "pyg"
140 |   node_hidden_size: 256
141 |   use_node_embedding: True
142 |   use_action_embedding: False
143 |   use_autophase: False
144 |   autophase_dim: 56
145 |   n_steps: 2
146 |   n_etypes: 3
147 |   n_message_passes: 8
148 |   gnn_type: "GatedGraphConv"
149 |   aggr: 'mean'
150 |   use_edge_embedding: False
151 |   use_flow_embedding: False
152 |   heads: null  # number of heads in multi-head attention for GAT
153 |   edge_emb_dim: 0
154 |   max_edge_position: 64
155 |   graph_version: 1
156 |   feat_drop: 0.0
157 |   concat_intermediate: False
158 |   discount_factor: 0.9
159 |   update_frequence: 150
160 |   zero_terminal_reward: False
161 |   node_level_action: False
162 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
163 |   num_actions: 50
164 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
165 |   subgraph: "function"
166 |   use_fc: False
167 |   use_relu: False  # can be string like "nn.GELU" to specify other activations
168 |   use_reversed_edge: true  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
169 |   on_policy_gradient: true
170 |   entropy_factor: 0.0003
171 |   use_history: false
172 |   use_reward_history: false
173 |   history_trans_heads: 4
174 |   # for PPO
175 |   use_value_function: false
176 |   use_ppo: false
177 |   clip_ratio: 0.2
178 |   target_kl: 0.01
179 |   num_local_updates: 1
180 |   use_reinforce: false
181 |   use_reward_only: false
182 |   use_reward_and_graph: false
183 | 
184 |   # for new contrastive SSL
185 |   use_cl: false
186 |   ema_momentum: 0.99
187 |   temperature: 0.003
188 |   action_dim: 32
189 | 
190 |   # for learning the immediate reward (online q_learning)
191 |   logit_temperature: 1
192 |   avg_instruct_nodes: true
193 |   num_heads: 4
194 |   adv_factor: 10.
195 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
196 |   label_smoothing: 0
197 |   dense_label: true
198 |   type_graph: false
199 |   random_mixup: false
200 |   loss_mixup_coef: 0
201 |   norm_for_cls: False
202 |   action_histogram_steps: 0
203 |   action_histogram_for_values: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/model/gcn_real.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   job:
  4 |     config:
  5 |       override_dirname:
  6 |         exclude_keys:
  7 |           - dataset.pydantic_dataset_path
  8 |           - dataset.pydantic_dataset_path_dev
  9 |           - dataset.train
 10 |           - dataset.dev
 11 |   run:
 12 |     dir: ./outputs/gcn_real/${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}/
 13 | 
 14 |   sweep:
 15 |     dir: ./outputs/gcn_real/
 16 |     subdir: ${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}
 17 | 
 18 | ssl: False
 19 | ssl_config:
 20 |   rm_edges_perct: 0.2
 21 |   rm_nodes_perct: 0.2
 22 |   use_node_type_loss: False
 23 | 
 24 | # finetuning parameters
 25 | finetune:
 26 |   ckpt: null
 27 |   skip_ckpt: False
 28 |   stage1:
 29 |     epochs: 50
 30 |     lr: 0.0002
 31 |     wd: 1e-4
 32 |   stage2:
 33 |     epochs: 150
 34 |     lr: 0.0001
 35 |     wd: 1e-5
 36 | load_ckpt: null  # load ckpt and then just like training from scratch
 37 | 
 38 | # distributed training config
 39 | distributed: True
 40 | dist_eval: True
 41 | world_size: 1  # number of distributed processes
 42 | dist_url: env://  # url used to set up distributed training
 43 | device: cuda  # device to use for training / testing
 44 | rank: 0
 45 | dist_backend: nccl
 46 | seed: 0
 47 | 
 48 | dataset:
 49 |   num_workers: 4
 50 | 
 51 |   train: data/all10k-train-medium-all10k.db
 52 |   dev: data/all10k-val-medium-all10k.db
 53 |   vocab: data/all_ssl_vocab.db
 54 | 
 55 |   autophase_max_bin: 10
 56 | 
 57 |   load_next_state: True
 58 |   remove_large_graph: False
 59 |   max_nodes: 60000  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 60 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 61 |   load_balance: False  # whether to use load balance for distributed training
 62 |   load_cumulative_reward2: False
 63 |   pre_load: True  # whether to pre-load the data into memory
 64 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 65 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 66 | 
 67 |   # for new contrastive SSL
 68 |   queue_size: 1000
 69 |   min_queue_size: 1
 70 |   # for data transfer via socket
 71 |   send_data_via_socket: False
 72 |   num_generators: 1
 73 | 
 74 |   # for learning the immediate reward
 75 |   q_learning: true
 76 |   circulate_data: true  # for offline training. When set to `true`, it reuses the online training logic
 77 |   cache_data: false
 78 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 79 |   num_records: 100000000000000  # for offline training
 80 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 81 |   timeout: 0
 82 |   random_mixup: 1  # mixup probability
 83 |   weight_data_resample: false
 84 |   real_q_learning: false  # for cummulative reward regression
 85 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 86 |   pydantic_dataset_path: data/trajdataset_all10k-train-medium-all10k.json
 87 |   pydantic_dataset_path_dev: data/trajdataset_all10k-val-medium-all10k.json
 88 |   pydantic_dataset_path_test: data/benchmarkdataset_all-test.json
 89 |   cp_db_to_mem: true
 90 |   split: "all10k"
 91 |   dense_cls_metric: oz
 92 |   auto_batchsize: false
 93 |   remove_type_graph: false
 94 | 
 95 | # use submitit to send the tasks to other nodes instead of running locally
 96 | submitit:
 97 |   log_dir: null
 98 |   partition: learnlab
 99 |   timeout_min: 240
100 |   jobs_per_task: null
101 |   cpus_per_task: 10
102 |   gpus_per_node: 8
103 |   constraint: volta32gb
104 |   mem_gb: 500
105 | 
106 | start_epoch: 0
107 | save_dir: "./"
108 | gpu: null 
109 | num_epoch: 100
110 | save_per_epoch: 10
111 | optim:
112 |   lr: 1e-3
113 |   weight_decay: 1e-5
114 |   lr_schedular: True
115 |   lr_schedular_steps: 0
116 | 
117 | train_batch_size: 256 
118 | eval_batch_size: 256
119 | 
120 | generate_v4: true
121 | model_db_path: null
122 | save_frequence: 200
123 | print_frequence: 200
124 | eval_frequence: 100
125 | warmup_steps: 500
126 | total_steps: 10000
127 | 
128 | load_model_db: null
129 | sampling: false
130 | 
131 | behavior_cloning: true
132 | seq_classification: False
133 | eval_model_rowid: False
134 | early_stop: true
135 | outdir: null
136 | 
137 | model:
138 |   _target_: "rlcompopt.cl.models.gnn_pyg.CLSLearner"
139 |   mode: "pyg"
140 |   node_hidden_size: 256
141 |   use_node_embedding: True
142 |   use_action_embedding: False
143 |   use_autophase: False
144 |   autophase_dim: 56
145 |   n_steps: 1
146 |   n_etypes: 3
147 |   n_message_passes: 8
148 |   gnn_type: "GCN"
149 |   aggr: 'mean'
150 |   use_edge_embedding: true
151 |   use_flow_embedding: False
152 |   heads: null  # number of heads in multi-head attention for GAT
153 |   edge_emb_dim: 64
154 |   max_edge_position: 64
155 |   graph_version: 1
156 |   feat_drop: 0.0
157 |   concat_intermediate: False
158 |   discount_factor: 0.9
159 |   update_frequence: 150
160 |   zero_terminal_reward: False
161 |   node_level_action: False
162 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
163 |   num_actions: 50
164 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
165 |   subgraph: "function"
166 |   use_fc: False
167 |   use_relu: true  # can be string like "nn.GELU" to specify other activations
168 |   use_reversed_edge: False  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
169 |   on_policy_gradient: true
170 |   entropy_factor: 0.0003
171 |   use_history: false
172 |   use_reward_history: false
173 |   history_trans_heads: 4
174 |   # for PPO
175 |   use_value_function: false
176 |   use_ppo: false
177 |   clip_ratio: 0.2
178 |   target_kl: 0.01
179 |   num_local_updates: 1
180 |   use_reinforce: false
181 |   use_reward_only: false
182 |   use_reward_and_graph: false
183 | 
184 |   # for new contrastive SSL
185 |   use_cl: false
186 |   ema_momentum: 0.99
187 |   temperature: 0.002
188 |   action_dim: 32
189 | 
190 |   # for learning the immediate reward (online q_learning)
191 |   logit_temperature: 1
192 |   avg_instruct_nodes: true
193 |   num_heads: 4
194 |   adv_factor: 10.
195 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
196 |   label_smoothing: 0
197 |   dense_label: true
198 |   type_graph: false
199 |   random_mixup: false
200 |   loss_mixup_coef: 0
201 |   norm_for_cls: False
202 |   action_histogram_steps: 0
203 |   action_histogram_for_values: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/model/gnn_type2.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   job:
  4 |     config:
  5 |       override_dirname:
  6 |         exclude_keys:
  7 |           - dataset.pydantic_dataset_path
  8 |           - dataset.pydantic_dataset_path_dev
  9 |           - dataset.train
 10 |           - dataset.dev
 11 |   run:
 12 |     dir: ./outputs/gnn_type2/${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}/
 13 | 
 14 |   sweep:
 15 |     dir: ./outputs/gnn_type2/
 16 |     subdir: ${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.override_dirname}
 17 | 
 18 | ssl: False
 19 | ssl_config:
 20 |   rm_edges_perct: 0.2
 21 |   rm_nodes_perct: 0.2
 22 |   use_node_type_loss: False
 23 | 
 24 | # finetuning parameters
 25 | finetune:
 26 |   ckpt: null
 27 |   skip_ckpt: False
 28 |   stage1:
 29 |     epochs: 50
 30 |     lr: 0.0002
 31 |     wd: 1e-4
 32 |   stage2:
 33 |     epochs: 150
 34 |     lr: 0.0001
 35 |     wd: 1e-5
 36 | load_ckpt: null  # load ckpt and then just like training from scratch
 37 | 
 38 | # distributed training config
 39 | distributed: True
 40 | dist_eval: True
 41 | world_size: 1  # number of distributed processes
 42 | dist_url: env://  # url used to set up distributed training
 43 | device: cuda  # device to use for training / testing
 44 | rank: 0
 45 | dist_backend: nccl
 46 | seed: 0
 47 | 
 48 | dataset:
 49 |   num_workers: 4
 50 | 
 51 |   train: data/all10k-train-medium-all10k.db
 52 |   dev: data/all10k-val-medium-all10k.db
 53 |   vocab: data/all_ssl_vocab.db
 54 | 
 55 |   autophase_max_bin: 10
 56 | 
 57 |   load_next_state: True
 58 |   remove_large_graph: False
 59 |   max_nodes: 60000  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 60 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 61 |   load_balance: False  # whether to use load balance for distributed training
 62 |   load_cumulative_reward2: False
 63 |   pre_load: True  # whether to pre-load the data into memory
 64 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 65 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 66 | 
 67 |   # for new contrastive SSL
 68 |   queue_size: 1000
 69 |   min_queue_size: 1
 70 |   # for data transfer via socket
 71 |   send_data_via_socket: False
 72 |   num_generators: 1
 73 | 
 74 |   # for learning the immediate reward
 75 |   q_learning: true
 76 |   circulate_data: true  # for offline training. When set to `true`, it reuses the online training logic
 77 |   cache_data: false
 78 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 79 |   num_records: 100000000000000  # for offline training
 80 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 81 |   timeout: 0
 82 |   random_mixup: 1  # mixup probability
 83 |   weight_data_resample: false
 84 |   real_q_learning: false  # for cummulative reward regression
 85 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 86 |   pydantic_dataset_path: data/trajdataset_all10k-train-medium-all10k.json
 87 |   pydantic_dataset_path_dev: data/trajdataset_all10k-val-medium-all10k.json
 88 |   pydantic_dataset_path_test: data/benchmarkdataset_all-test.json
 89 |   cp_db_to_mem: true
 90 |   split: "all10k"
 91 |   dense_cls_metric: oz
 92 |   auto_batchsize: false
 93 |   remove_type_graph: false
 94 | 
 95 | # use submitit to send the tasks to other nodes instead of running locally
 96 | submitit:
 97 |   log_dir: null
 98 |   partition: learnlab
 99 |   timeout_min: 240
100 |   jobs_per_task: null
101 |   cpus_per_task: 10
102 |   gpus_per_node: 8
103 |   constraint: volta32gb
104 |   mem_gb: 500
105 | 
106 | start_epoch: 0
107 | save_dir: "./"
108 | gpu: null 
109 | num_epoch: 100
110 | save_per_epoch: 10
111 | optim:
112 |   lr: 3e-4
113 |   weight_decay: 1e-6
114 |   lr_schedular: True
115 |   lr_schedular_steps: 0
116 | 
117 | train_batch_size: 256 
118 | eval_batch_size: 256
119 | 
120 | generate_v4: true
121 | model_db_path: null
122 | save_frequence: 200
123 | print_frequence: 200
124 | eval_frequence: 100
125 | warmup_steps: 500
126 | total_steps: 10000
127 | 
128 | load_model_db: null
129 | sampling: false
130 | 
131 | behavior_cloning: true
132 | seq_classification: False
133 | eval_model_rowid: False
134 | early_stop: true
135 | outdir: null
136 | 
137 | model:
138 |   _target_: "rlcompopt.cl.models.gnn_pyg.CLSLearner"
139 |   mode: "pyg"
140 |   node_hidden_size: 256
141 |   use_node_embedding: True
142 |   use_action_embedding: False
143 |   use_autophase: False
144 |   autophase_dim: 56
145 |   n_steps: 1
146 |   n_etypes: 3
147 |   n_message_passes: 8
148 |   gnn_type: "GAT"
149 |   aggr: 'mean'
150 |   use_edge_embedding: true
151 |   use_flow_embedding: False
152 |   heads: null  # number of heads in multi-head attention for GAT
153 |   edge_emb_dim: 64
154 |   max_edge_position: 64
155 |   graph_version: 1
156 |   feat_drop: 0.0
157 |   concat_intermediate: False
158 |   discount_factor: 0.9
159 |   update_frequence: 150
160 |   zero_terminal_reward: False
161 |   node_level_action: False
162 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
163 |   num_actions: 50
164 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
165 |   subgraph: "function"
166 |   use_fc: False
167 |   use_relu: true  # can be string like "nn.GELU" to specify other activations
168 |   use_reversed_edge: False  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
169 |   on_policy_gradient: true
170 |   entropy_factor: 0.0003
171 |   use_history: false
172 |   use_reward_history: false
173 |   history_trans_heads: 4
174 |   # for PPO
175 |   use_value_function: false
176 |   use_ppo: false
177 |   clip_ratio: 0.2
178 |   target_kl: 0.01
179 |   num_local_updates: 1
180 |   use_reinforce: false
181 |   use_reward_only: false
182 |   use_reward_and_graph: false
183 | 
184 |   # for new contrastive SSL
185 |   use_cl: false
186 |   ema_momentum: 0.99
187 |   temperature: 0.002
188 |   action_dim: 32
189 | 
190 |   # for learning the immediate reward (online q_learning)
191 |   logit_temperature: 1
192 |   avg_instruct_nodes: true
193 |   num_heads: 4
194 |   adv_factor: 10.
195 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
196 |   label_smoothing: 0
197 |   dense_label: true
198 |   type_graph: false
199 |   random_mixup: false
200 |   loss_mixup_coef: 0
201 |   norm_for_cls: False
202 |   action_histogram_steps: 0
203 |   action_histogram_for_values: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/rl_online/generate_autophase.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   run:
  4 |     dir: outputs_rl/auto_${now:%Y_%m_%d_%H_%M_%S}
  5 | 
  6 |   sweep:
  7 |     dir: outputs_rl/
  8 |     subdir: auto_${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.num}
  9 | 
 10 |   output_subdir: "generate_config"
 11 | 
 12 | dataset_name: null # cbench,mibench
 13 | benchmark_exclude: null # comma separated keywords for exclusion, e.g., ghostscript,sort
 14 | benchmark_repeat: 80
 15 | seed: 0
 16 | get_stat: null # only query benchmarks statistics
 17 | load_benchmarks_from_json: null
 18 | json_key: null  # json key of the data split, e.g., 'test-small'
 19 | num_benchmarks: null  # if set, will try to use first `num_benchmarks` benchmarks
 20 | reset_best_return_on_every_episode: False  # if set true and benchmark_repeat > 1 and using the offline file (generate_utils.py), best return will be overwritten
 21 | online_update_vocab: False  # if True, update the common vocab on the fly if unknown token is encountered
 22 | graph_version: 1  # 0 for old graph type, 1 for new graph type
 23 | 
 24 | # Output db control
 25 | # if null, then we won't save db
 26 | outdir: null
 27 | 
 28 | vocab_db_path: "data/all_ssl_vocab.db"
 29 | 
 30 | # Run time control
 31 | patience: 30  # for offline data generation
 32 | runtime_per_job: null
 33 | max_step_per_job: 45
 34 | nproc: 80 # if null, use cpu_count()
 35 | max_episodes: null
 36 | 
 37 | # Models
 38 | model_path: null
 39 | gpu: cuda # null = cpu, or use a list (e.g. [0,1]) to specify multiple GPUs
 40 | eps: 0 # episilon greedy
 41 | T: 0  # Temperature used for sampling. If T = 0, then we do argmax
 42 | best_n: 1  # best n in A*
 43 | use_Astar: False  # use this option (instead of setting best_n) to control whether to use A*
 44 | use_AQ: False  # use the AQ* as in https://arxiv.org/pdf/2102.04518.pdf
 45 | use_policy: False
 46 | 
 47 | # how long to wait before flushing buffer into database
 48 | commit_frequency_in_seconds: 10
 49 | max_state_buffer_length: 200
 50 | 
 51 | # use submitit to send the tasks to other nodes instead of running locally
 52 | submitit:
 53 |   log_dir: null
 54 |   partition: learnlab
 55 |   timeout_min: 360
 56 |   jobs_per_task: 80
 57 |   cpus_per_task: 80
 58 |   mem_gb: 500
 59 |   gpus_per_node: 8
 60 |   constraint: volta32gb
 61 | 
 62 | traj_data: null  # read feather file and follow the trajectories therein, to replace offline random exploration
 63 | 
 64 | divided_by_this_ir: False  # for A* / AQ*, set this flag properly to get the correct estimation of future reward
 65 | 
 66 | # all parameters below are basically for online learning
 67 | generate_v4: true
 68 | device: cuda
 69 | traj_last_n: 5  # the number of last transitions to cut off
 70 | reward_discount: 0.9
 71 | model_db_path: ${outdir}/model.db  # seems not working
 72 | return_lower_bound: -1
 73 | n_model_workers: 8
 74 | use_autophase: true
 75 | 
 76 | # aggreate the jobs for a single forward pass in the neural network
 77 | model_capacity: 800000  # influence the waiting time for an item in the queue, either the #nodes (for graphs) or the batch size (for autophase)
 78 | load_full_rate: 0.6
 79 | job_full_rate: 0.5
 80 | wait_time: 0.1
 81 | 
 82 | load_model_frequency: 20
 83 | avg_last_n_scores: 100
 84 | 
 85 | min_ir: 100
 86 | max_ir: 10000
 87 | use_history: true
 88 | run_model_locally: true  # if true, the model is in the same process as the environment
 89 | GAE_lambda: 0.97  # the lambda for GAE-Lambda
 90 | use_ppo: true
 91 | 
 92 | norm_reward: false
 93 | 
 94 | eval_on_policy: false
 95 | model_rowid: null
 96 | 
 97 | # for online testing
 98 | online_test_json: null
 99 | online_test_max_step: 45
100 | test_frequency_in_seconds: 900
101 | 
102 | # for creating socket that transfers data from generator to trainer
103 | send_data_via_socket: true
104 | 
105 | use_only_anghabench: false  # for debugging purpose, only train on the Anghabench
106 | traj_db: null  # the path of a db where action sequences of benchmarks are stored
107 | for_reinforce: true  # use_ppo can overwrite this
108 | pydantic_datasource: data/trajdataset_all10k-train-medium-all10k.json
109 | pydantic_val_dataset_path: data/trajdataset_all10k-val-medium-all10k.json
110 | pydantic_test_dataset_path: data/benchmarkdataset_all-test.json
111 | simple_generation: false
112 | early_stop_patience: 2
113 | min_per_benchmark: 0.05
114 | highest_reward: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/rl_online/generate_online.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | hydra:
  3 |   run:
  4 |     dir: outputs_rl/auto_${now:%Y_%m_%d_%H_%M_%S}
  5 | 
  6 |   sweep:
  7 |     dir: outputs_rl/
  8 |     subdir: auto_${now:%Y_%m_%d_%H_%M_%S}_${hydra.job.num}
  9 | 
 10 |   output_subdir: "generate_config"
 11 | 
 12 | dataset_name: null # cbench,mibench
 13 | benchmark_exclude: null # comma separated keywords for exclusion, e.g., ghostscript,sort
 14 | benchmark_repeat: 80
 15 | seed: 0
 16 | get_stat: null # only query benchmarks statistics
 17 | load_benchmarks_from_json: null
 18 | json_key: null  # json key of the data split, e.g., 'test-small'
 19 | num_benchmarks: null  # if set, will try to use first `num_benchmarks` benchmarks
 20 | reset_best_return_on_every_episode: False  # if set true and benchmark_repeat > 1 and using the offline file (generate_utils.py), best return will be overwritten
 21 | online_update_vocab: False  # if True, update the common vocab on the fly if unknown token is encountered
 22 | graph_version: 1  # 0 for old graph type, 1 for new graph type
 23 | 
 24 | # Output db control
 25 | # if null, then we won't save db
 26 | outdir: null
 27 | 
 28 | vocab_db_path: "data/all_ssl_vocab.db"
 29 | 
 30 | # Run time control
 31 | patience: 30  # for offline data generation
 32 | runtime_per_job: null
 33 | max_step_per_job: 45
 34 | nproc: 80 # if null, use cpu_count()
 35 | max_episodes: null
 36 | 
 37 | # Models
 38 | model_path: null
 39 | gpu: cuda # null = cpu, or use a list (e.g. [0,1]) to specify multiple GPUs
 40 | eps: 0 # episilon greedy
 41 | T: 0  # Temperature used for sampling. If T = 0, then we do argmax
 42 | best_n: 1  # best n in A*
 43 | use_Astar: False  # use this option (instead of setting best_n) to control whether to use A*
 44 | use_AQ: False  # use the AQ* as in https://arxiv.org/pdf/2102.04518.pdf
 45 | use_policy: False
 46 | 
 47 | # how long to wait before flushing buffer into database
 48 | commit_frequency_in_seconds: 10
 49 | max_state_buffer_length: 200
 50 | 
 51 | # use submitit to send the tasks to other nodes instead of running locally
 52 | submitit:
 53 |   log_dir: null
 54 |   partition: learnlab
 55 |   timeout_min: 360
 56 |   jobs_per_task: 80
 57 |   cpus_per_task: 80
 58 |   mem_gb: 500
 59 |   gpus_per_node: 8
 60 |   constraint: volta32gb
 61 | 
 62 | traj_data: null  # read feather file and follow the trajectories therein, to replace offline random exploration
 63 | 
 64 | divided_by_this_ir: False  # for A* / AQ*, set this flag properly to get the correct estimation of future reward
 65 | 
 66 | # all parameters below are basically for online learning
 67 | generate_v4: true
 68 | device: cuda
 69 | traj_last_n: 5  # the number of last transitions to cut off
 70 | reward_discount: 0.9
 71 | model_db_path: ${outdir}/model.db  # seems not working
 72 | return_lower_bound: -1
 73 | n_model_workers: 8
 74 | use_autophase: False
 75 | 
 76 | # aggreate the jobs for a single forward pass in the neural network
 77 | model_capacity: 800000  # influence the waiting time for an item in the queue, either the #nodes (for graphs) or the batch size (for autophase)
 78 | load_full_rate: 0.6
 79 | job_full_rate: 0.5
 80 | wait_time: 0.1
 81 | 
 82 | load_model_frequency: 20
 83 | avg_last_n_scores: 100
 84 | 
 85 | min_ir: 100
 86 | max_ir: 10000
 87 | use_history: true
 88 | run_model_locally: true  # if true, the model is in the same process as the environment
 89 | GAE_lambda: 0.97  # the lambda for GAE-Lambda
 90 | use_ppo: true
 91 | 
 92 | norm_reward: false
 93 | 
 94 | eval_on_policy: false
 95 | model_rowid: null
 96 | 
 97 | # for online testing
 98 | online_test_json: null
 99 | online_test_max_step: 45
100 | test_frequency_in_seconds: 900
101 | 
102 | # for creating socket that transfers data from generator to trainer
103 | send_data_via_socket: true
104 | 
105 | use_only_anghabench: false  # for debugging purpose, only train on the Anghabench
106 | traj_db: null  # the path of a db where action sequences of benchmarks are stored
107 | for_reinforce: true  # use_ppo can overwrite this
108 | pydantic_datasource: data/trajdataset_all10k-train-medium-all10k.json
109 | pydantic_val_dataset_path: data/trajdataset_all10k-val-medium-all10k.json
110 | pydantic_test_dataset_path: data/benchmarkdataset_all-test.json
111 | simple_generation: false
112 | early_stop_patience: 2
113 | min_per_benchmark: 0.05
114 | highest_reward: false


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/rl_online/train_attn.yaml:
--------------------------------------------------------------------------------
  1 | hydra:
  2 |   output_subdir: train_config
  3 | 
  4 | ssl: False
  5 | ssl_config:
  6 |   rm_edges_perct: 0.2
  7 |   rm_nodes_perct: 0.2
  8 |   use_node_type_loss: False
  9 | 
 10 | # finetuning parameters
 11 | finetune:
 12 |   ckpt: null
 13 |   skip_ckpt: False
 14 |   stage1:
 15 |     epochs: 50
 16 |     lr: 0.0002
 17 |     wd: 1e-4
 18 |   stage2:
 19 |     epochs: 150
 20 |     lr: 0.0001
 21 |     wd: 1e-5
 22 | load_ckpt: null  # load ckpt and then just like training from scratch
 23 | 
 24 | # distributed training config
 25 | distributed: true
 26 | dist_eval: True
 27 | world_size: 8 # number of distributed processes
 28 | dist_url: env://  # url used to set up distributed training
 29 | device: cuda  # device to use for training / testing
 30 | rank: 0
 31 | dist_backend: nccl
 32 | seed: 0
 33 | 
 34 | dataset:
 35 |   num_workers: 1
 36 | 
 37 |   train: null
 38 |   dev: null  # dev set is not being used currently
 39 |   vocab: data/all_ssl_vocab.db
 40 | 
 41 |   autophase_max_bin: 10
 42 | 
 43 |   load_next_state: True
 44 |   remove_large_graph: False
 45 |   max_nodes: 60000  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 46 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 47 |   load_balance: False  # whether to use load balance for distributed training
 48 |   load_cumulative_reward2: False
 49 |   pre_load: True  # whether to pre-load the data into memory
 50 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 51 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 52 | 
 53 |   # for new contrastive SSL
 54 |   queue_size: 1000
 55 |   min_queue_size: 50
 56 |   # for data transfer via socket
 57 |   send_data_via_socket: true
 58 |   num_generators: 80
 59 | 
 60 |   # for learning the immediate reward
 61 |   q_learning: false
 62 |   circulate_data: false  # for offline training. When set to `true`, it reuses the online training logic
 63 |   cache_data: false
 64 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 65 |   num_records: 10000  # for offline training
 66 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 67 |   timeout: 0
 68 |   random_mixup: 0  # mixup probability
 69 |   weight_data_resample: false
 70 |   real_q_learning: false  # for cummulative reward regression
 71 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 72 |   pydantic_dataset_path: null
 73 |   pydantic_dataset_path_dev: null
 74 |   pydantic_dataset_path_test: null
 75 |   cp_db_to_mem: false
 76 |   split: "all10k"  # have no effect; for recording the dataset
 77 |   dense_cls_metric: oz
 78 |   auto_batchsize: false
 79 |   remove_type_graph: false
 80 | 
 81 | # use submitit to send the tasks to other nodes instead of running locally
 82 | submitit:
 83 |   log_dir: null
 84 |   partition: learnlab
 85 |   timeout_min: 360
 86 |   jobs_per_task: null
 87 |   cpus_per_task: 10
 88 |   gpus_per_node: 8
 89 |   constraint: volta32gb
 90 |   mem_gb: 500
 91 | 
 92 | start_epoch: 0
 93 | save_dir: "./"
 94 | gpu: null 
 95 | num_epoch: 100
 96 | save_per_epoch: 10
 97 | optim:
 98 |   lr: 3e-5
 99 |   weight_decay: 1e-5
100 |   lr_schedular: true
101 |   lr_schedular_steps: 10000
102 | 
103 | train_batch_size: 256 
104 | eval_batch_size: 256
105 | 
106 | generate_v4: true
107 | model_db_path: null
108 | save_frequence: 10
109 | print_frequence: 50
110 | eval_frequence: 100
111 | warmup_steps: 100
112 | total_steps: null  # for the online training logic
113 | 
114 | load_model_db: null
115 | sampling: false
116 | 
117 | behavior_cloning: False
118 | seq_classification: False
119 | eval_model_rowid: False
120 | early_stop: false  # early stop using validation loss
121 | outdir: null  # log files
122 | 
123 | model:
124 |   _target_: "rlcompopt.cl.models.gnn_pyg.PPO"
125 |   mode: "pyg"
126 |   node_hidden_size: 128
127 |   use_node_embedding: True
128 |   use_action_embedding: False
129 |   use_autophase: False
130 |   autophase_dim: 56
131 |   n_steps: 1
132 |   n_etypes: 3
133 |   n_message_passes: 8
134 |   gnn_type: "EdgeAttn"
135 |   aggr: 'mean'
136 |   use_edge_embedding: False
137 |   use_flow_embedding: False
138 |   heads: null  # number of heads in multi-head attention for GAT
139 |   edge_emb_dim: 64
140 |   max_edge_position: 64
141 |   graph_version: 1
142 |   feat_drop: 0.0
143 |   concat_intermediate: False
144 |   discount_factor: 0.9
145 |   update_frequence: 150
146 |   zero_terminal_reward: False
147 |   node_level_action: False
148 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
149 |   num_actions: 124
150 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
151 |   subgraph: "function"
152 |   use_fc: False
153 |   use_relu: False  # can be string like "nn.GELU" to specify other activations
154 |   use_reversed_edge: False  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
155 |   on_policy_gradient: true
156 |   entropy_factor: 0.0003
157 |   use_history: true
158 |   use_reward_history: false
159 |   history_trans_heads: 4
160 |   # for PPO
161 |   use_value_function: true
162 |   use_ppo: true
163 |   clip_ratio: 0.2
164 |   target_kl: 0.01
165 |   num_local_updates: 1
166 |   use_reinforce: false
167 |   use_reward_only: false
168 |   use_reward_and_graph: false
169 | 
170 |   # for new contrastive SSL
171 |   use_cl: false
172 |   ema_momentum: 0.99
173 |   temperature: 0.07
174 |   action_dim: 32
175 | 
176 |   # for learning the immediate reward (online q_learning)
177 |   logit_temperature: 1
178 |   avg_instruct_nodes: true
179 |   num_heads: 4
180 |   adv_factor: 10.
181 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
182 |   label_smoothing: 0
183 |   dense_label: false
184 |   type_graph: false  # in shallow layers, only encode the subgraph of type nodes
185 |   random_mixup: false
186 |   loss_mixup_coef: 0
187 |   norm_for_cls: False
188 |   action_histogram_steps: 45  # greater than 0 indicates using the history action histogram
189 |   action_histogram_for_values: true


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/rl_online/train_autophase.yaml:
--------------------------------------------------------------------------------
  1 | hydra:
  2 |   output_subdir: train_config
  3 | 
  4 | ssl: False
  5 | ssl_config:
  6 |   rm_edges_perct: 0.2
  7 |   rm_nodes_perct: 0.2
  8 |   use_node_type_loss: False
  9 | 
 10 | # finetuning parameters
 11 | finetune:
 12 |   ckpt: null
 13 |   skip_ckpt: False
 14 |   stage1:
 15 |     epochs: 50
 16 |     lr: 0.0002
 17 |     wd: 1e-4
 18 |   stage2:
 19 |     epochs: 150
 20 |     lr: 0.0001
 21 |     wd: 1e-5
 22 | load_ckpt: null  # load ckpt and then just like training from scratch
 23 | 
 24 | # distributed training config
 25 | distributed: true
 26 | dist_eval: True
 27 | world_size: 8  # number of distributed processes
 28 | dist_url: env://  # url used to set up distributed training
 29 | device: cuda  # device to use for training / testing
 30 | rank: 0
 31 | dist_backend: nccl
 32 | seed: 0
 33 | 
 34 | dataset:
 35 |   num_workers: 1
 36 | 
 37 |   train: null
 38 |   dev: null  # dev set is not being used currently
 39 |   vocab: data/all_ssl_vocab.db
 40 | 
 41 |   autophase_max_bin: 10
 42 | 
 43 |   load_next_state: True
 44 |   remove_large_graph: False
 45 |   max_nodes: 1000  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 46 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 47 |   load_balance: False  # whether to use load balance for distributed training
 48 |   load_cumulative_reward2: False
 49 |   pre_load: True  # whether to pre-load the data into memory
 50 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 51 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 52 | 
 53 |   # for new contrastive SSL
 54 |   queue_size: 1000
 55 |   min_queue_size: 50
 56 |   # for data transfer via socket
 57 |   send_data_via_socket: true
 58 |   num_generators: 80
 59 | 
 60 |   # for learning the immediate reward
 61 |   q_learning: false
 62 |   circulate_data: false  # for offline training. When set to `true`, it reuses the online training logic
 63 |   cache_data: false
 64 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 65 |   num_records: 10000  # for offline training
 66 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 67 |   timeout: 0
 68 |   random_mixup: 0  # mixup probability
 69 |   weight_data_resample: false
 70 |   real_q_learning: false  # for cummulative reward regression
 71 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 72 |   pydantic_dataset_path: null
 73 |   pydantic_dataset_path_dev: null
 74 |   pydantic_dataset_path_test: null
 75 |   cp_db_to_mem: false
 76 |   split: "all10k"  # have no effect; for recording the dataset
 77 |   dense_cls_metric: oz
 78 |   auto_batchsize: false
 79 |   remove_type_graph: false
 80 | 
 81 | # use submitit to send the tasks to other nodes instead of running locally
 82 | submitit:
 83 |   log_dir: null
 84 |   partition: learnlab
 85 |   timeout_min: 360
 86 |   jobs_per_task: null
 87 |   cpus_per_task: 10
 88 |   gpus_per_node: 8
 89 |   constraint: volta32gb
 90 |   mem_gb: 500
 91 | 
 92 | start_epoch: 0
 93 | save_dir: "./"
 94 | gpu: null 
 95 | num_epoch: 100
 96 | save_per_epoch: 10
 97 | optim:
 98 |   lr: 1e-5
 99 |   weight_decay: 1e-6
100 |   lr_schedular: true
101 |   lr_schedular_steps: 10000
102 | 
103 | train_batch_size: 1000 
104 | eval_batch_size: 1000
105 | 
106 | generate_v4: true
107 | model_db_path: null
108 | save_frequence: 10
109 | print_frequence: 50
110 | eval_frequence: 100
111 | warmup_steps: 100
112 | total_steps: null  # for the online training logic
113 | 
114 | load_model_db: null
115 | sampling: false
116 | 
117 | behavior_cloning: False
118 | seq_classification: False
119 | eval_model_rowid: False
120 | early_stop: false  # early stop using validation loss
121 | outdir: null  # log files
122 | 
123 | model:
124 |   _target_: "rlcompopt.cl.models.gnn_pyg.PPO"
125 |   mode: "pyg"
126 |   node_hidden_size: 128
127 |   use_node_embedding: False
128 |   use_action_embedding: False
129 |   use_autophase: True
130 |   autophase_dim: 56
131 |   n_steps: 2
132 |   n_etypes: 3
133 |   n_message_passes: 2
134 |   gnn_type: "GatedGraphConv"
135 |   aggr: 'add'
136 |   use_edge_embedding: False
137 |   use_flow_embedding: False
138 |   heads: null  # number of heads in multi-head attention for GAT
139 |   edge_emb_dim: 0
140 |   max_edge_position: 64
141 |   graph_version: 1
142 |   feat_drop: 0.0
143 |   concat_intermediate: False
144 |   discount_factor: 0.9
145 |   update_frequence: 150
146 |   zero_terminal_reward: False
147 |   node_level_action: False
148 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
149 |   num_actions: 124
150 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
151 |   subgraph: "function"
152 |   use_fc: False
153 |   use_relu: False  # can be string like "nn.GELU" to specify other activations
154 |   use_reversed_edge: true  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
155 |   on_policy_gradient: true
156 |   entropy_factor: 0.0003
157 |   use_history: true
158 |   use_reward_history: false
159 |   history_trans_heads: 4
160 |   # for PPO
161 |   use_value_function: true
162 |   use_ppo: true
163 |   clip_ratio: 0.2
164 |   target_kl: 0.01
165 |   num_local_updates: 1
166 |   use_reinforce: false
167 |   use_reward_only: false
168 |   use_reward_and_graph: false
169 | 
170 |   # for new contrastive SSL
171 |   use_cl: false
172 |   ema_momentum: 0.99
173 |   temperature: 0.07
174 |   action_dim: 32
175 | 
176 |   # for learning the immediate reward (online q_learning)
177 |   logit_temperature: 1
178 |   avg_instruct_nodes: true
179 |   num_heads: 4
180 |   adv_factor: 10.
181 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
182 |   label_smoothing: 0
183 |   dense_label: false
184 |   type_graph: false  # in shallow layers, only encode the subgraph of type nodes
185 |   random_mixup: false
186 |   loss_mixup_coef: 0
187 |   norm_for_cls: False
188 |   action_histogram_steps: 45  # greater than 0 indicates using the history action histogram
189 |   action_histogram_for_values: true


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/rl_online/train_gcn.yaml:
--------------------------------------------------------------------------------
  1 | hydra:
  2 |   output_subdir: train_config
  3 | 
  4 | ssl: False
  5 | ssl_config:
  6 |   rm_edges_perct: 0.2
  7 |   rm_nodes_perct: 0.2
  8 |   use_node_type_loss: False
  9 | 
 10 | # finetuning parameters
 11 | finetune:
 12 |   ckpt: null
 13 |   skip_ckpt: False
 14 |   stage1:
 15 |     epochs: 50
 16 |     lr: 0.0002
 17 |     wd: 1e-4
 18 |   stage2:
 19 |     epochs: 150
 20 |     lr: 0.0001
 21 |     wd: 1e-5
 22 | load_ckpt: null  # load ckpt and then just like training from scratch
 23 | 
 24 | # distributed training config
 25 | distributed: true
 26 | dist_eval: True
 27 | world_size: 8  # number of distributed processes
 28 | dist_url: env://  # url used to set up distributed training
 29 | device: cuda  # device to use for training / testing
 30 | rank: 0
 31 | dist_backend: nccl
 32 | seed: 0
 33 | 
 34 | dataset:
 35 |   num_workers: 1
 36 | 
 37 |   train: null
 38 |   dev: null  # dev set is not being used currently
 39 |   vocab: data/all_ssl_vocab.db
 40 | 
 41 |   autophase_max_bin: 10
 42 | 
 43 |   load_next_state: True
 44 |   remove_large_graph: False
 45 |   max_nodes: 60000  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 46 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 47 |   load_balance: False  # whether to use load balance for distributed training
 48 |   load_cumulative_reward2: False
 49 |   pre_load: True  # whether to pre-load the data into memory
 50 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 51 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 52 | 
 53 |   # for new contrastive SSL
 54 |   queue_size: 1000
 55 |   min_queue_size: 50
 56 |   # for data transfer via socket
 57 |   send_data_via_socket: true
 58 |   num_generators: 80
 59 | 
 60 |   # for learning the immediate reward
 61 |   q_learning: false
 62 |   circulate_data: false  # for offline training. When set to `true`, it reuses the online training logic
 63 |   cache_data: false
 64 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 65 |   num_records: 10000  # for offline training
 66 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 67 |   timeout: 0
 68 |   random_mixup: 0  # mixup probability
 69 |   weight_data_resample: false
 70 |   real_q_learning: false  # for cummulative reward regression
 71 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 72 |   pydantic_dataset_path: null
 73 |   pydantic_dataset_path_dev: null
 74 |   pydantic_dataset_path_test: null
 75 |   cp_db_to_mem: false
 76 |   split: "all10k"  # have no effect; for recording the dataset
 77 |   dense_cls_metric: oz
 78 |   auto_batchsize: false
 79 |   remove_type_graph: false
 80 | 
 81 | # use submitit to send the tasks to other nodes instead of running locally
 82 | submitit:
 83 |   log_dir: null
 84 |   partition: learnlab
 85 |   timeout_min: 360
 86 |   jobs_per_task: null
 87 |   cpus_per_task: 10
 88 |   gpus_per_node: 8
 89 |   constraint: volta32gb
 90 |   mem_gb: 500
 91 | 
 92 | start_epoch: 0
 93 | save_dir: "./"
 94 | gpu: null 
 95 | num_epoch: 100
 96 | save_per_epoch: 10
 97 | optim:
 98 |   lr: 3e-5
 99 |   weight_decay: 1e-5
100 |   lr_schedular: true
101 |   lr_schedular_steps: 10000
102 | 
103 | train_batch_size: 256 
104 | eval_batch_size: 256
105 | 
106 | generate_v4: true
107 | model_db_path: null
108 | save_frequence: 10
109 | print_frequence: 50
110 | eval_frequence: 100
111 | warmup_steps: 100
112 | total_steps: null  # for the online training logic
113 | 
114 | load_model_db: null
115 | sampling: false
116 | 
117 | behavior_cloning: False
118 | seq_classification: False
119 | eval_model_rowid: False
120 | early_stop: false  # early stop using validation loss
121 | outdir: null  # log files
122 | 
123 | model:
124 |   _target_: "rlcompopt.cl.models.gnn_pyg.PPO"
125 |   mode: "pyg"
126 |   node_hidden_size: 128
127 |   use_node_embedding: True
128 |   use_action_embedding: False
129 |   use_autophase: False
130 |   autophase_dim: 56
131 |   n_steps: 2
132 |   n_etypes: 3
133 |   n_message_passes: 8
134 |   gnn_type: "GatedGraphConv"
135 |   aggr: 'add'
136 |   use_edge_embedding: False
137 |   use_flow_embedding: False
138 |   heads: null  # number of heads in multi-head attention for GAT
139 |   edge_emb_dim: 0
140 |   max_edge_position: 64
141 |   graph_version: 1
142 |   feat_drop: 0.0
143 |   concat_intermediate: False
144 |   discount_factor: 0.9
145 |   update_frequence: 150
146 |   zero_terminal_reward: False
147 |   node_level_action: False
148 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
149 |   num_actions: 124
150 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
151 |   subgraph: "function"
152 |   use_fc: False
153 |   use_relu: False  # can be string like "nn.GELU" to specify other activations
154 |   use_reversed_edge: true  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
155 |   on_policy_gradient: true
156 |   entropy_factor: 0.0003
157 |   use_history: true
158 |   use_reward_history: false
159 |   history_trans_heads: 4
160 |   # for PPO
161 |   use_value_function: true
162 |   use_ppo: true
163 |   clip_ratio: 0.2
164 |   target_kl: 0.01
165 |   num_local_updates: 1
166 |   use_reinforce: false
167 |   use_reward_only: false
168 |   use_reward_and_graph: false
169 | 
170 |   # for new contrastive SSL
171 |   use_cl: false
172 |   ema_momentum: 0.99
173 |   temperature: 0.07
174 |   action_dim: 32
175 | 
176 |   # for learning the immediate reward (online q_learning)
177 |   logit_temperature: 1
178 |   avg_instruct_nodes: true
179 |   num_heads: 4
180 |   adv_factor: 10.
181 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
182 |   label_smoothing: 0
183 |   dense_label: false
184 |   type_graph: false  # in shallow layers, only encode the subgraph of type nodes
185 |   random_mixup: false
186 |   loss_mixup_coef: 0
187 |   norm_for_cls: False
188 |   action_histogram_steps: 45  # greater than 0 indicates using the history action histogram
189 |   action_histogram_for_values: true


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/rl_online/train_gnn_type2.yaml:
--------------------------------------------------------------------------------
  1 | hydra:
  2 |   output_subdir: train_config
  3 | 
  4 | ssl: False
  5 | ssl_config:
  6 |   rm_edges_perct: 0.2
  7 |   rm_nodes_perct: 0.2
  8 |   use_node_type_loss: False
  9 | 
 10 | # finetuning parameters
 11 | finetune:
 12 |   ckpt: null
 13 |   skip_ckpt: False
 14 |   stage1:
 15 |     epochs: 50
 16 |     lr: 0.0002
 17 |     wd: 1e-4
 18 |   stage2:
 19 |     epochs: 150
 20 |     lr: 0.0001
 21 |     wd: 1e-5
 22 | load_ckpt: null  # load ckpt and then just like training from scratch
 23 | 
 24 | # distributed training config
 25 | distributed: true
 26 | dist_eval: True
 27 | world_size: 8  # number of distributed processes
 28 | dist_url: env://  # url used to set up distributed training
 29 | device: cuda  # device to use for training / testing
 30 | rank: 0
 31 | dist_backend: nccl
 32 | seed: 0
 33 | 
 34 | dataset:
 35 |   num_workers: 1
 36 | 
 37 |   train: null
 38 |   dev: null  # dev set is not being used currently
 39 |   vocab: data/all_ssl_vocab.db
 40 | 
 41 |   autophase_max_bin: 10
 42 | 
 43 |   load_next_state: True
 44 |   remove_large_graph: False
 45 |   max_nodes: 60000  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 46 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 47 |   load_balance: False  # whether to use load balance for distributed training
 48 |   load_cumulative_reward2: False
 49 |   pre_load: True  # whether to pre-load the data into memory
 50 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 51 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 52 | 
 53 |   # for new contrastive SSL
 54 |   queue_size: 1000
 55 |   min_queue_size: 50
 56 |   # for data transfer via socket
 57 |   send_data_via_socket: true
 58 |   num_generators: 80
 59 | 
 60 |   # for learning the immediate reward
 61 |   q_learning: false
 62 |   circulate_data: false  # for offline training. When set to `true`, it reuses the online training logic
 63 |   cache_data: false
 64 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 65 |   num_records: 10000  # for offline training
 66 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 67 |   timeout: 0
 68 |   random_mixup: 0  # mixup probability
 69 |   weight_data_resample: false
 70 |   real_q_learning: false  # for cummulative reward regression
 71 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 72 |   pydantic_dataset_path: null
 73 |   pydantic_dataset_path_dev: null
 74 |   pydantic_dataset_path_test: null
 75 |   cp_db_to_mem: false
 76 |   split: "all10k"  # have no effect; for recording the dataset
 77 |   dense_cls_metric: oz
 78 |   auto_batchsize: false
 79 |   remove_type_graph: false
 80 | 
 81 | # use submitit to send the tasks to other nodes instead of running locally
 82 | submitit:
 83 |   log_dir: null
 84 |   partition: learnlab
 85 |   timeout_min: 360
 86 |   jobs_per_task: null
 87 |   cpus_per_task: 10
 88 |   gpus_per_node: 8
 89 |   constraint: volta32gb
 90 |   mem_gb: 500
 91 | 
 92 | start_epoch: 0
 93 | save_dir: "./"
 94 | gpu: null 
 95 | num_epoch: 100
 96 | save_per_epoch: 10
 97 | optim:
 98 |   lr: 3e-5
 99 |   weight_decay: 1e-5
100 |   lr_schedular: true
101 |   lr_schedular_steps: 10000
102 | 
103 | train_batch_size: 256 
104 | eval_batch_size: 256
105 | 
106 | generate_v4: true
107 | model_db_path: null
108 | save_frequence: 10
109 | print_frequence: 50
110 | eval_frequence: 100
111 | warmup_steps: 100
112 | total_steps: null  # for the online training logic
113 | 
114 | load_model_db: null
115 | sampling: false
116 | 
117 | behavior_cloning: False
118 | seq_classification: False
119 | eval_model_rowid: False
120 | early_stop: false  # early stop using validation loss
121 | outdir: null  # log files
122 | 
123 | model:
124 |   _target_: "rlcompopt.cl.models.gnn_pyg.PPO"
125 |   mode: "pyg"
126 |   node_hidden_size: 256
127 |   use_node_embedding: True
128 |   use_action_embedding: False
129 |   use_autophase: False
130 |   autophase_dim: 56
131 |   n_steps: 1
132 |   n_etypes: 3
133 |   n_message_passes: 8
134 |   gnn_type: "GAT"
135 |   aggr: 'mean'
136 |   use_edge_embedding: true
137 |   use_flow_embedding: False
138 |   heads: null  # number of heads in multi-head attention for GAT
139 |   edge_emb_dim: 64
140 |   max_edge_position: 64
141 |   graph_version: 1
142 |   feat_drop: 0.0
143 |   concat_intermediate: False
144 |   discount_factor: 0.9
145 |   update_frequence: 150
146 |   zero_terminal_reward: False
147 |   node_level_action: False
148 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
149 |   num_actions: 124
150 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
151 |   subgraph: "function"
152 |   use_fc: False
153 |   use_relu: true  # can be string like "nn.GELU" to specify other activations
154 |   use_reversed_edge: false  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
155 |   on_policy_gradient: true
156 |   entropy_factor: 0.0003
157 |   use_history: true
158 |   use_reward_history: false
159 |   history_trans_heads: 4
160 |   # for PPO
161 |   use_value_function: true
162 |   use_ppo: true
163 |   clip_ratio: 0.2
164 |   target_kl: 0.01
165 |   num_local_updates: 1
166 |   use_reinforce: false
167 |   use_reward_only: false
168 |   use_reward_and_graph: false
169 | 
170 |   # for new contrastive SSL
171 |   use_cl: false
172 |   ema_momentum: 0.99
173 |   temperature: 0.07
174 |   action_dim: 32
175 | 
176 |   # for learning the immediate reward (online q_learning)
177 |   logit_temperature: 1
178 |   avg_instruct_nodes: true
179 |   num_heads: 4
180 |   adv_factor: 10.
181 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
182 |   label_smoothing: 0
183 |   dense_label: false
184 |   type_graph: false  # in shallow layers, only encode the subgraph of type nodes
185 |   random_mixup: false
186 |   loss_mixup_coef: 0
187 |   norm_for_cls: False
188 |   action_histogram_steps: 45  # greater than 0 indicates using the history action histogram
189 |   action_histogram_for_values: true


--------------------------------------------------------------------------------
/rlcompopt/cl/conf/train.yaml:
--------------------------------------------------------------------------------
  1 | ssl: False
  2 | ssl_config:
  3 |   rm_edges_perct: 0.2
  4 |   rm_nodes_perct: 0.2
  5 |   use_node_type_loss: False
  6 | 
  7 | # finetuning parameters
  8 | finetune:
  9 |   ckpt: null
 10 |   skip_ckpt: False
 11 |   stage1:
 12 |     epochs: 50
 13 |     lr: 0.0002
 14 |     wd: 1e-4
 15 |   stage2:
 16 |     epochs: 150
 17 |     lr: 0.0001
 18 |     wd: 1e-5
 19 | load_ckpt: null  # load ckpt and then just like training from scratch
 20 | 
 21 | # distributed training config
 22 | distributed: False
 23 | dist_eval: True
 24 | world_size: 1  # number of distributed processes
 25 | dist_url: env://  # url used to set up distributed training
 26 | device: cuda  # device to use for training / testing
 27 | rank: 0
 28 | dist_backend: nccl
 29 | seed: 0
 30 | 
 31 | dataset:
 32 |   num_workers: 1
 33 | 
 34 |   train: null
 35 |   dev: null  # dev set is not being used currently
 36 |   vocab: null
 37 | 
 38 |   autophase_max_bin: 10
 39 | 
 40 |   load_next_state: True
 41 |   remove_large_graph: False
 42 |   max_nodes: 100000  # the max #nodes that fit in a GPU, tune this number to avoid CUDA OOM
 43 |   full_rate: 0.95  # if load_balance, [max_nodes * full_rate] will be the minimum number of nodes for a bin to be considered full
 44 |   load_balance: False  # whether to use load balance for distributed training
 45 |   load_cumulative_reward2: False
 46 |   pre_load: True  # whether to pre-load the data into memory
 47 |   use_aggregated: False  # whether to use an aggregated dataset that merges same state together
 48 |   divided_by_this_ir: False  # the denominator of the reward is set to the ir count of current state
 49 | 
 50 |   # for new contrastive SSL
 51 |   queue_size: 1000
 52 |   min_queue_size: 50
 53 |   # for data transfer via socket
 54 |   send_data_via_socket: False
 55 |   num_generators: 1
 56 | 
 57 |   # for learning the immediate reward
 58 |   q_learning: false
 59 |   circulate_data: false  # for offline training. When set to `true`, it reuses the online training logic
 60 |   cache_data: false
 61 |   eval_data_len: 0  # set a positive number to do evaluation in offline training
 62 |   num_records: 10000  # for offline training
 63 |   exclude_sets: null  # for offline training: training on only a subset of actions. Can be int/list
 64 |   timeout: 0
 65 |   random_mixup: 0  # mixup probability
 66 |   weight_data_resample: false
 67 |   real_q_learning: false  # for cummulative reward regression
 68 |   dense_seq_cls: false  # path to db containing all_benchmark to all_seq rewards
 69 |   pydantic_dataset_path: null
 70 |   pydantic_dataset_path_dev: null
 71 |   pydantic_dataset_path_test: null
 72 |   cp_db_to_mem: false
 73 |   split: "all10k"  # have no effect; for recording the dataset
 74 |   dense_cls_metric: oz
 75 |   auto_batchsize: false
 76 |   remove_type_graph: false
 77 | 
 78 | # use submitit to send the tasks to other nodes instead of running locally
 79 | submitit:
 80 |   log_dir: null
 81 |   partition: learnlab
 82 |   timeout_min: 60
 83 |   jobs_per_task: null
 84 |   cpus_per_task: 10
 85 |   gpus_per_node: 8
 86 |   constraint: volta32gb
 87 |   mem_gb: 500
 88 | 
 89 | start_epoch: 0
 90 | save_dir: "./"
 91 | gpu: null 
 92 | num_epoch: 100
 93 | save_per_epoch: 10
 94 | optim:
 95 |   lr: 0.001
 96 |   weight_decay: 0
 97 |   lr_schedular: True
 98 |   lr_schedular_steps: 0
 99 | 
100 | train_batch_size: 256 
101 | eval_batch_size: 256
102 | 
103 | generate_v4: False
104 | model_db_path: null
105 | save_frequence: 10
106 | print_frequence: 100
107 | eval_frequence: 100
108 | warmup_steps: 100
109 | total_steps: null  # for the online training logic
110 | 
111 | load_model_db: null
112 | sampling: false
113 | 
114 | behavior_cloning: False
115 | seq_classification: False
116 | eval_model_rowid: False
117 | early_stop: false  # early stop using validation loss
118 | outdir: null  # log files
119 | 
120 | model:
121 |   _target_: "rlcompopt.cl.models.gnn_pyg.GNNEncoder"
122 |   mode: "pyg"
123 |   node_hidden_size: 32
124 |   use_node_embedding: True
125 |   use_action_embedding: False
126 |   use_autophase: False
127 |   autophase_dim: 56
128 |   n_steps: 1
129 |   n_etypes: 3
130 |   n_message_passes: 2
131 |   gnn_type: "GatedGraphConv"
132 |   aggr: 'add'
133 |   use_edge_embedding: False
134 |   use_flow_embedding: False
135 |   heads: null  # number of heads in multi-head attention for GAT
136 |   edge_emb_dim: 0
137 |   max_edge_position: 64
138 |   graph_version: 0
139 |   feat_drop: 0.0
140 |   concat_intermediate: False
141 |   discount_factor: 0.9
142 |   update_frequence: 150
143 |   zero_terminal_reward: False
144 |   node_level_action: False
145 |   bootstrap_q_learning: False  # use TD learning with bootstrap (update online net with target net)
146 |   num_actions: 124
147 |   use_subgraph_feature: False  # estimate action-value based on subgraphs
148 |   subgraph: "function"
149 |   use_fc: False
150 |   use_relu: False  # can be string like "nn.GELU" to specify other activations
151 |   use_reversed_edge: False  # False, 1/True (add back edges for control flows), 2 (add back edges for data flows)
152 |   on_policy_gradient: false
153 |   entropy_factor: 0.0003
154 |   use_history: false
155 |   use_reward_history: false
156 |   history_trans_heads: 4
157 |   # for PPO
158 |   use_value_function: false
159 |   use_ppo: false
160 |   clip_ratio: 0.2
161 |   target_kl: 0.01
162 |   num_local_updates: 1
163 |   use_reinforce: false
164 |   use_reward_only: false
165 |   use_reward_and_graph: false
166 | 
167 |   # for new contrastive SSL
168 |   use_cl: false
169 |   ema_momentum: 0.99
170 |   temperature: 0.07
171 |   action_dim: 32
172 | 
173 |   # for learning the immediate reward (online q_learning)
174 |   logit_temperature: 1
175 |   avg_instruct_nodes: false
176 |   num_heads: 4
177 |   adv_factor: 10.
178 |   no_state_obs: false  # blind model, zero the input states (but keep reward history / states to value approximation if any)
179 |   label_smoothing: 0
180 |   dense_label: false
181 |   type_graph: false  # in shallow layers, only encode the subgraph of type nodes
182 |   random_mixup: false
183 |   loss_mixup_coef: 0
184 |   norm_for_cls: False
185 |   action_histogram_steps: 0  # it is for RL online
186 |   action_histogram_for_values: false


--------------------------------------------------------------------------------
/rlcompopt/cl/data_socket.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | # This source code is licensed under the MIT license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | import socket
  9 | import sqlite3
 10 | from itertools import islice
 11 | from time import sleep
 12 | from typing import Dict, List
 13 | 
 14 | import zmq
 15 | 
 16 | with open(os.path.join(os.path.dirname(__file__), "database_socket.sql")) as f:
 17 |     DB_CREATION_SCRIPT = f.read()
 18 | 
 19 | 
 20 | class Server:
 21 |     def __init__(self) -> None:
 22 |         self.conn: zmq.Socket = None
 23 |         context = zmq.Context()
 24 |         socket = context.socket(zmq.PUB)
 25 |         port = socket.bind_to_random_port("tcp://*")
 26 |         self.ip = get_ip()
 27 |         self.addr = (self.ip, port)
 28 |         self.conn = socket
 29 |         self.setup_db()
 30 | 
 31 |     def setup_db(self):
 32 |         db_connection = sqlite3.connect(self.socket_db, timeout=1200)
 33 |         cursor = db_connection.cursor()
 34 |         cursor.executescript(DB_CREATION_SCRIPT)
 35 |         db_connection.commit()
 36 |         cursor.execute("INSERT INTO Socket VALUES (?, ?)", self.addr)
 37 |         db_connection.commit()
 38 | 
 39 | 
 40 | class DataServer(Server):
 41 |     def __init__(
 42 |         self,
 43 |         socket_db: str,
 44 |     ):
 45 |         self.socket_db = socket_db
 46 |         super().__init__()
 47 |         self.num_sent = 0
 48 |         self.num_recv = 0
 49 | 
 50 |     def send_pyobj(self, *args, **kwargs):
 51 |         self.conn.send_pyobj(*args, **kwargs)
 52 |         self.num_sent += 1
 53 |         # return self.check_receipt()
 54 | 
 55 |     def _check_receipt(self):
 56 |         try:
 57 |             self.num_recv = int(self.conn.recv_string(flags=zmq.NOBLOCK))
 58 |         except Exception:
 59 |             pass
 60 |         return self.num_sent - self.num_recv
 61 | 
 62 | 
 63 | class DataClient:
 64 |     def __init__(
 65 |         self,
 66 |         socket_db: str,
 67 |         client_rank: int,
 68 |         num_servers: int,
 69 |         num_clients: int,
 70 |     ) -> None:
 71 |         assert num_servers >= num_clients
 72 |         self.socket_db = socket_db
 73 |         self.addr = []
 74 |         self.num_conn = num_servers // num_clients
 75 |         assert self.num_conn * num_clients == num_servers
 76 |         self.client_rank = client_rank
 77 |         self.num_clients = num_clients
 78 |         context = zmq.Context()
 79 |         self.conn = context.socket(zmq.SUB)
 80 |         # self.conn.setsockopt(zmq.CONFLATE, 1)  # only keep the latest message; must before the connect
 81 |         self.setup_db()
 82 |         self.conn.setsockopt(zmq.SUBSCRIBE, b"")
 83 | 
 84 |     def setup_db(self):
 85 |         db_connection = sqlite3.connect(self.socket_db, timeout=1200)
 86 |         cursor = db_connection.cursor()
 87 |         cursor.executescript(DB_CREATION_SCRIPT)
 88 |         db_connection.commit()
 89 |         for i in range(self.num_conn):
 90 |             rowid = self.client_rank + self.num_clients * i + 1
 91 |             while True:
 92 |                 rec = cursor.execute(
 93 |                     "SELECT ip, port FROM Socket WHERE rowid = ?", (rowid,)
 94 |                 )
 95 |                 result = rec.fetchone()
 96 |                 if result is not None:
 97 |                     ip, port = result
 98 |                     self.conn.connect(f"tcp://{ip}:{port}")
 99 |                     break
100 |                 print("Socket database not yet ready. Waiting...")
101 |                 sleep(5)
102 |             print(f"Connects to tcp://{ip}:{port} at socket #{rowid}")
103 | 
104 |     def serve_data(self):
105 |         # serve data on demand
106 |         # more data are kept in the socket buffer
107 |         try:
108 |             result = self.conn.recv_pyobj(zmq.NOBLOCK)
109 |         except zmq.ZMQError:
110 |             return
111 |         return result
112 | 
113 | 
114 | class DataBuffer:
115 |     """A buffer that stores the received data."""
116 | 
117 |     def __init__(self, key="Transitions", size_idx=6, max_len=20000) -> None:
118 |         self.key = key
119 |         self.size_idx = size_idx
120 |         self.max_len = max_len
121 |         self.buffer = {}
122 |         self.key_id = []
123 | 
124 |     def store(self, data: Dict[str, Dict]):
125 |         curr_keys = self.buffer.keys()
126 |         for k, v in data.items():
127 |             if k == self.key:
128 |                 self.key_id.extend(list(v.keys()))
129 |             if k not in curr_keys:
130 |                 self.buffer[k] = v
131 |             else:
132 |                 self.buffer[k].update(v)
133 | 
134 |         # delete oldest buffer
135 |         if len(self.buffer["States"]) > self.max_len:
136 |             first = list(islice(self.buffer["States"], len(self.buffer["States"]) - self.max_len))
137 |             for ss in first:
138 |                 self.buffer["States"].pop(ss)
139 |         if len(self.buffer["Transitions"]) > self.max_len:
140 |             first = list(islice(self.buffer["Transitions"], len(self.buffer["Transitions"]) - self.max_len))
141 |             for ss in first:
142 |                 self.buffer["Transitions"].pop(ss)
143 |                 if ss in self.key_id:
144 |                     self.key_id.remove(ss)
145 |         if len(self.buffer["Trajectories"]) > self.max_len // 10:
146 |             first = list(islice(self.buffer["Trajectories"], len(self.buffer["Trajectories"]) - self.max_len // 10))
147 |             for ss in first:
148 |                 self.buffer["Trajectories"].pop(ss)
149 | 
150 |     def pop(self):
151 |         if len(self.key_id) == 0:
152 |             return None
153 |         key = self.key_id.pop()  # pop the last element to make the model train on the latest data
154 |         item = self.buffer[self.key].get(key, None)
155 |         if item is None:
156 |             return
157 |         data_size = item[self.size_idx]
158 |         return key, data_size
159 | 
160 |     def get_data(self, key: str, data_ids: List[str]):
161 |         data = self.buffer[key]
162 |         return [data.get(id_, None) for id_ in data_ids]
163 | 
164 | 
165 | def get_ip():
166 |     if "SLURM_JOB_ID" not in os.environ:
167 |         # assume the trainer and the data generator are in the same machine
168 |         return "127.0.0.1"
169 |     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
170 |     s.settimeout(0)
171 |     try:
172 |         # doesn't even have to be reachable
173 |         s.connect(("10.254.254.254", 1))
174 |         ip = s.getsockname()[0]
175 |     except Exception:
176 |         ip = "127.0.0.1"
177 |     finally:
178 |         s.close()
179 |     return ip
180 | 


--------------------------------------------------------------------------------
/rlcompopt/cl/database_socket.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CREATE TABLE IF NOT EXISTS Socket (
 8 |   ip TEXT NOT NULL,         -- the ip address of the socket
 9 |   port INTEGER NOT NULL     -- the port number
10 | );
11 | 


--------------------------------------------------------------------------------
/rlcompopt/cl/dataset_statistics.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import gym
 8 | import random
 9 | import numpy as np
10 | import torch
11 | 
12 | def set_all_seeds(seed):
13 |     random.seed(seed)
14 |     np.random.seed(seed)
15 |     torch.manual_seed(seed)
16 |     torch.cuda.manual_seed(seed)
17 |     torch.backends.cudnn.deterministic = True
18 | 
19 | 
20 | def run_agent(job_id: int, benchmark: str, seed: int, args, random_walk=False, check_flag_action_had_no_effect=False):
21 |     """Get some statistics of a single benchmark."""
22 | 
23 |     # Send random seed
24 |     set_all_seeds(11)  # random seed is not working, trajectory is different across runs
25 | 
26 |     with gym.make("llvm-autophase-ic-v0", benchmark=benchmark) as env:
27 |         env.reset()
28 | 
29 |         features = ["Programl", "IrInstructionCountOz", "IrInstructionCount"]
30 |         obs_space = [ env.observation.spaces[feature_name] for feature_name in features ]
31 |         observations, rewards, done, info = env.step(action=[], observation_spaces=obs_space)
32 |         programl = observations[0]  # networkX graph
33 |         graph_info = {"benchmark": benchmark, "#nodes": programl.number_of_nodes(), "#edges": programl.number_of_edges()}
34 |         # print(benchmark, programl.info())
35 |         if random_walk:
36 |             # debug how the graph statistics change during compiler optimization
37 |             i = 0
38 |             print(graph_info, observations[1:])
39 |             n_nodes = [programl.number_of_nodes()]
40 |             while not done:
41 |                 observations, rewards, done, info = env.step(action=env.action_space.sample(), observation_spaces=obs_space)
42 |                 if not info['action_had_no_effect']:
43 |                     programl = observations[0]
44 |                     graph_info = [programl.number_of_nodes(), programl.number_of_edges(), observations[1:]]
45 |                     print(graph_info)
46 |                     n_nodes.append(programl.number_of_nodes())
47 |                     if n_nodes[-1] > n_nodes[-2]:
48 |                         print(env.commandline())
49 |                     i += 1
50 |                     if i > 100:
51 |                         break
52 |         if check_flag_action_had_no_effect:
53 |             # debug the info['action_had_no_effect'] flag
54 |             i = 0
55 |             print(graph_info, observations[1:])
56 |             n_nodes = [(programl.number_of_nodes(), programl.number_of_edges())]
57 |             while not done:
58 |                 observations, rewards, done, info = env.step(action=env.action_space.sample(), observation_spaces=obs_space)
59 |                 programl = observations[0]
60 |                 n_nodes.append((programl.number_of_nodes(), programl.number_of_edges()))
61 |                 print(n_nodes[-1], info['action_had_no_effect'])
62 |                 if info['action_had_no_effect']:
63 |                     if n_nodes[-1] != n_nodes[-2]:
64 |                         print(f"***********************\n{n_nodes[-1]} != {n_nodes[-2]}")
65 | 
66 |     return graph_info
67 | 
68 | 
69 | def get_stat_packed_args(job):
70 |     return run_agent(*job)


--------------------------------------------------------------------------------
/rlcompopt/cl/faster_balanced_sampler.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | # This source code is licensed under the MIT license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import logging
  8 | import math
  9 | from typing import Callable, List, Optional, Tuple
 10 | 
 11 | import torch
 12 | from torch.utils.data import Dataset, DistributedSampler
 13 | 
 14 | log = logging.getLogger(__file__)
 15 | 
 16 | 
 17 | class BinPacking:
 18 |     def __init__(
 19 |         self,
 20 |         items: List[Tuple[int, float]],
 21 |         bin_size: float,
 22 |         full_rate: float = 0.9,
 23 |         rank: int = 0,
 24 |         num_replicas: int = 1,
 25 |     ) -> None:
 26 |         """
 27 |         BinPacking puts the items of various weight in bins without exceeding the bin size.
 28 |         It yields the full bin immediately when it is ready.
 29 |         The full bins are distributed to `num_replicas` processes and each process has its own set of bins.
 30 |         The computation is basically the same for each process to avoid process communication, but each process
 31 |         only outputs the bins that belong to itself.
 32 |         """
 33 |         self.rank = rank
 34 |         self.num_replicas = num_replicas
 35 |         self.bin_size = bin_size
 36 |         self.full_size = self.bin_size * full_rate
 37 |         self.items = items
 38 |         self.bins = []
 39 |         self.loaded_bins = []
 40 |         self.loaded_weight = []  # this is for recording total weight in self.bins
 41 |         self.next_bin_idx = rank  # only outputs the bin for this process
 42 | 
 43 |     def __iter__(self):
 44 |         for idx, weight in self.items:
 45 |             found = False
 46 |             for i, bin_ in enumerate(self.bins):
 47 |                 tmp = self.loaded_weight[i] + weight
 48 |                 if tmp <= self.bin_size:
 49 |                     # fits into this bin, add it to bin
 50 |                     bin_.append(idx)
 51 |                     self.loaded_weight[i] = tmp
 52 |                     found = True
 53 |                     if tmp >= self.full_size:
 54 |                         # this bin reachs its full size, so move it to the list of loaded bins and yield it
 55 |                         self.loaded_bins.append(bin_)
 56 |                         if self.next_bin_idx < len(self.loaded_bins):
 57 |                             yield self.loaded_bins[self.next_bin_idx]
 58 |                             self.next_bin_idx += self.num_replicas
 59 |                         del self.bins[i]
 60 |                         del self.loaded_weight[i]
 61 |                     break
 62 |             if not found:
 63 |                 # does not fit to any bins, put it to a new bin
 64 |                 self.bins.append([idx])
 65 |                 self.loaded_weight.append(weight)
 66 |         num_bins = len(self.loaded_bins) // self.num_replicas
 67 |         num_last_loaded_bins = len(self.loaded_bins) - num_bins * self.num_replicas
 68 |         # check if the number of batches is divisible by the number of processes
 69 |         if len(self.loaded_bins) == 0 or (
 70 |             num_last_loaded_bins > 0 and self.rank >= num_last_loaded_bins
 71 |         ):
 72 |             # this process has not yet output its own last batch
 73 |             padding_size = self.num_replicas - num_last_loaded_bins - len(self.bins)
 74 |             if padding_size > 0:
 75 |                 # pad bins to self.bins
 76 |                 pad_batches = (
 77 |                     self.loaded_bins + self.bins
 78 |                 )  # prioritize padding with full bins
 79 |                 if padding_size <= len(pad_batches):
 80 |                     self.bins += pad_batches[:padding_size]
 81 |                 else:
 82 |                     self.bins += (
 83 |                         pad_batches * math.ceil(padding_size / len(pad_batches))
 84 |                     )[:padding_size]
 85 |             self.loaded_bins += self.bins[: self.num_replicas - num_last_loaded_bins]
 86 |             yield self.loaded_bins[self.next_bin_idx]
 87 | 
 88 | 
 89 | class BalancedBatchSampler(DistributedSampler):
 90 |     def __init__(
 91 |         self,
 92 |         dataset: Dataset,
 93 |         max_size: int,
 94 |         full_rate: float,
 95 |         size_func: Callable,
 96 |         num_replicas: Optional[int] = None,
 97 |         rank: Optional[int] = None,
 98 |         shuffle: bool = True,
 99 |         seed: int = 0,
100 |         drop_last: bool = False,
101 |     ) -> None:
102 |         super().__init__(dataset, num_replicas, rank, shuffle, seed, drop_last)
103 |         self.max_size = max_size
104 |         self.full_rate = full_rate
105 |         self.datasize = [
106 |             size_func(d) for d in self.dataset if size_func(d) <= self.max_size
107 |         ]
108 |         assert len(self.datasize) == len(
109 |             self.dataset
110 |         ), "Some items have size exceeding the max_size"
111 | 
112 |     def __iter__(self):
113 |         if self.shuffle:
114 |             # deterministically shuffle based on epoch and seed
115 |             g = torch.Generator()
116 |             g.manual_seed(self.seed + self.epoch)
117 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()  # type: ignore[arg-type]
118 |         else:
119 |             indices = list(range(len(self.dataset)))  # type: ignore[arg-type]
120 | 
121 |         datasize = [(idx, self.datasize[idx]) for idx in indices]
122 |         bin_pack = BinPacking(
123 |             datasize, self.max_size, self.full_rate, self.rank, self.num_replicas
124 |         )
125 | 
126 |         return bin_pack.__iter__()
127 | 
128 |     def __len__(self):
129 |         # The length of BalancedBatchSampler is undefined as it could change
130 |         if not hasattr(self, "_len_batches"):
131 |             self._len_batches = 0
132 |             idx = []
133 |             for b in self.__iter__():
134 |                 self._len_batches += 1
135 |                 idx.extend(b)
136 |             idx = set(idx)
137 |             num_covered = len(idx & set(range(len(self.dataset))))
138 |             log.info(
139 |                 f"this iteration in a single process covers {num_covered}/{len(self.dataset)} ({num_covered/len(self.dataset):.2%}) datapoints"
140 |             )
141 |         return self._len_batches
142 | 


--------------------------------------------------------------------------------
/rlcompopt/cl/merge.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from glob import glob
 8 | import hydra
 9 | import os
10 | 
11 | from rlcompopt.env_wrapper.wrapper_offline import merge_dbs
12 | 
13 | import logging
14 | log = logging.getLogger(__file__)
15 | 
16 | 
17 | @hydra.main(config_path="./conf", config_name="merge.yaml")
18 | def main(args):
19 |     """Merge input databases into a single output database."""
20 |     inputs_list = []
21 |     output = args.output
22 |     tables = args.tables
23 |     for name in args.inputs.split(","):
24 |         if not name.startswith("/") and not name.startswith("."):
25 |             name = os.path.join(args.root, name)
26 |         if not name.endswith(".db"):
27 |             name = os.path.join(name + args.suffix, "summary.db")
28 |         inputs_list.extend(list(glob(name)))
29 | 
30 |     if not output.startswith("/") and not output.startswith("."):
31 |         output = os.path.join(args.root, output)
32 | 
33 |     logging.info(f"Input lists: {inputs_list}")
34 |     logging.info(f"Output: {output}")
35 | 
36 |     tables = tables.split(",") if tables is not None else [] 
37 |     merge_dbs(inputs_list, output, tables=tables)
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     main()


--------------------------------------------------------------------------------
/rlcompopt/cl/models/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
3 | 
4 | # This source code is licensed under the MIT license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | 


--------------------------------------------------------------------------------
/rlcompopt/cl/models/conv.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | # Borrowed code from https://github.com/snap-stanford/ogb/blob/master/examples/graphproppred/code2/conv.py
 8 | 
 9 | import torch
10 | from torch_geometric.nn import MessagePassing
11 | import torch.nn.functional as F
12 | from torch_geometric.utils import degree
13 | 
14 | 
15 | ### GIN convolution along the graph structure
16 | class GINConv(MessagePassing):
17 |     def __init__(self, in_channels=None, edge_dim=None, aggr=None, **kwargs):
18 |         '''
19 |         emb_dim (int): node embedding dimensionality
20 |         '''
21 |         emb_dim = in_channels
22 | 
23 |         super(GINConv, self).__init__(aggr = aggr)
24 | 
25 |         self.mlp = torch.nn.Sequential(torch.nn.Linear(emb_dim, 2*emb_dim), torch.nn.BatchNorm1d(2*emb_dim), torch.nn.ReLU(), torch.nn.Linear(2*emb_dim, emb_dim))
26 |         self.eps = torch.nn.Parameter(torch.Tensor([0]))
27 | 
28 |         self.edge_dim = edge_dim
29 |         if self.edge_dim is not None:
30 |             self.edge_encoder = torch.nn.Linear(edge_dim, emb_dim)
31 | 
32 |     def forward(self, x, edge_index, edge_attr):
33 |         edge_embedding = None
34 |         if self.edge_dim is not None:
35 |             edge_embedding = self.edge_encoder(edge_attr)
36 |         out = self.mlp((1 + self.eps) *x + self.propagate(edge_index, x=x, edge_attr=edge_embedding))
37 | 
38 |         return out
39 | 
40 |     def message(self, x_j, edge_attr):
41 |         if edge_attr is not None:
42 |             x_j = x_j + edge_attr
43 |         return F.relu(x_j)
44 | 
45 |     def update(self, aggr_out):
46 |         return aggr_out
47 | 
48 | 
49 | ### GCN convolution along the graph structure
50 | class GCNConv(MessagePassing):
51 |     def __init__(self, in_channels=None, edge_dim=None, aggr=None, **kwargs):
52 |         super(GCNConv, self).__init__(aggr=aggr)
53 |         emb_dim = in_channels
54 | 
55 |         self.linear = torch.nn.Linear(emb_dim, emb_dim)
56 |         self.root_emb = torch.nn.Embedding(1, emb_dim)
57 | 
58 |         self.edge_dim = edge_dim
59 |         if self.edge_dim is not None:
60 |             self.edge_encoder = torch.nn.Linear(edge_dim, emb_dim)
61 | 
62 |     def forward(self, x, edge_index, edge_attr):
63 |         x = self.linear(x)
64 |         edge_embedding = None
65 |         if self.edge_dim is not None:
66 |             edge_embedding = self.edge_encoder(edge_attr)
67 | 
68 |         row, col = edge_index
69 | 
70 |         #edge_weight = torch.ones((edge_index.size(1), ), device=edge_index.device)
71 |         deg = degree(row, x.size(0), dtype = x.dtype) + 1
72 |         deg_inv_sqrt = deg.pow(-0.5)
73 |         deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
74 | 
75 |         norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
76 | 
77 |         return self.propagate(edge_index, x=x, edge_attr = edge_embedding, norm=norm) + F.relu(x + self.root_emb.weight) * 1./deg.view(-1,1)
78 | 
79 |     def message(self, x_j, edge_attr, norm):
80 |         if edge_attr is not None:
81 |             x_j = x_j + edge_attr
82 |         return norm.view(-1, 1) * F.relu(x_j)
83 | 
84 |     def update(self, aggr_out):
85 |         return aggr_out
86 | 
87 | 


--------------------------------------------------------------------------------
/rlcompopt/cl/models/edge_attn.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | # This source code is licensed under the MIT license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch_scatter
 10 | from torch import Tensor, LongTensor
 11 | 
 12 | 
 13 | def indexing(x: Tensor, idx: LongTensor):
 14 |     assert idx.ndim == 1
 15 |     return torch.index_select(x, 0, idx)
 16 | 
 17 | 
 18 | class EdgeEncoding(nn.Module):
 19 |     r"""
 20 |     Edge encoding that encodes edge types, edge positions,
 21 |     and block differences (whether in the same basic block, 
 22 |     positional differences in the basic block).
 23 |     """
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |         edge_dim: int,
 28 |         num_edge_types: int = 4,
 29 |         max_edge_positions: int = 32,
 30 |         max_blk_diff: int = 32,
 31 |     ):
 32 |         super().__init__()
 33 |         self.edge_dim = edge_dim
 34 |         self.max_edge_positions = max_edge_positions - 1
 35 |         self.max_blk_diff = max_blk_diff
 36 |         self.edge_type_enc = nn.Embedding(num_edge_types, edge_dim)
 37 |         self.edge_pos_enc = nn.Embedding(max_edge_positions, edge_dim)
 38 |         self.blk_diff = nn.Embedding(3, edge_dim)  # before/same/after block
 39 |         # self.blk_enc = nn.Embedding(max_blk_diff * 2 + 1, edge_dim)
 40 | 
 41 |     def get_relative_pos(self, idx0, idx1, max_diff):
 42 |         diff = idx0 - idx1
 43 |         diff.clamp_(-max_diff, max_diff).add_(max_diff)
 44 |         return diff.detach()
 45 | 
 46 |     def get_sign(self, idx0, idx1):
 47 |         diff = idx0 - idx1
 48 |         sign = diff.sign().long() + 1
 49 |         return sign.detach()
 50 | 
 51 |     def forward(self, edge_types, edge_pos, block_idx, block_pos=None):
 52 |         assert edge_types.ndim == edge_pos.ndim == 1
 53 |         assert block_idx.ndim == 2 and block_idx.shape[0] == 2
 54 |         if block_pos is not None:
 55 |             assert block_pos.ndim == 2 and block_pos.shape[0] == 2
 56 | 
 57 |         type_emb = self.edge_type_enc(edge_types)
 58 | 
 59 |         edge_pos = edge_pos.clone()
 60 |         edge_pos.clamp_(0, self.max_edge_positions)
 61 |         pos_emb = self.edge_pos_enc(edge_pos)
 62 | 
 63 |         block_diff = self.get_sign(block_idx[0], block_idx[1])
 64 |         block_d = self.blk_diff(block_diff)
 65 |         # same_blk = block_diff == 1
 66 | 
 67 |         # blk_pos_diff = self.get_relative_pos(block_pos[0], block_pos[1], self.max_blk_diff)
 68 |         # blk_pos_diff = blk_pos_diff[same_blk]  # only encode if in the same block
 69 |         # blk_pos_emb = self.blk_enc(blk_pos_diff)
 70 | 
 71 |         edge_emb = type_emb + pos_emb + block_d
 72 |         # edge_emb = edge_emb.clone()
 73 |         # edge_emb[same_blk] += blk_pos_emb
 74 |         return edge_emb
 75 | 
 76 | 
 77 | class EdgeAttn(nn.Module):
 78 |     r"""
 79 |     A graph neural network with node-edge-node basic computation blocks.
 80 |     """
 81 | 
 82 |     def __init__(
 83 |         self,
 84 |         out_channels: int,
 85 |         edge_dim: int,
 86 |         bias: bool = True,
 87 |         num_heads: int = 1,
 88 |         zero_edge_emb: bool = False,
 89 |         **kwargs,
 90 |     ):
 91 |         super().__init__()
 92 | 
 93 |         self.out_channels = out_channels
 94 |         self.edge_dim = edge_dim
 95 |         concat_dim = 2 * out_channels + edge_dim
 96 |         out_dim = concat_dim + num_heads * 2
 97 |         self.num_heads = num_heads
 98 |         self.head_dim = out_channels // num_heads
 99 |         self.zero_edge_emb = zero_edge_emb
100 |         assert self.head_dim * num_heads == out_channels
101 | 
102 |         self.attn = Mlp(concat_dim, concat_dim, out_dim, bias=bias)
103 |         # self.attn = nn.Linear(concat_dim, out_dim, bias=bias)
104 |         self.node_mlp = Mlp(out_channels, out_channels)
105 |         self.edge_mlp = Mlp(edge_dim, edge_dim)
106 |         self.node_norm0 = nn.LayerNorm(out_channels)
107 |         self.edge_norm0 = nn.LayerNorm(edge_dim)
108 |         self.node_norm1 = nn.LayerNorm(out_channels)
109 |         self.edge_norm1 = nn.LayerNorm(edge_dim)
110 | 
111 |         self.reset_parameters()
112 | 
113 |     def reset_parameters(self):
114 |         self.apply(self._init_weights)
115 | 
116 |     def _init_weights(self, m):
117 |         if isinstance(m, nn.Linear):
118 |             nn.init.trunc_normal_(m.weight, std=0.02)
119 |             if isinstance(m, nn.Linear) and m.bias is not None:
120 |                 nn.init.constant_(m.bias, 0)
121 |         elif isinstance(m, nn.LayerNorm):
122 |             nn.init.constant_(m.bias, 0)
123 |             nn.init.constant_(m.weight, 1.0)
124 | 
125 |     def forward(
126 |         self, x: Tensor, edge_index: LongTensor, edge_attr: Tensor = None, **kwargs
127 |     ) -> Tensor:
128 |         r"""
129 |         Before sending a graph to this module, the type nodes/edges should be processed separately,
130 |         and the call edges should be removed.
131 |         """
132 |         shortcut = x
133 |         x = self.node_norm0(x)
134 |         if self.zero_edge_emb:
135 |             edge_attr = edge_attr * 0
136 |         shortcut_e = edge_attr
137 |         edge_attr = self.edge_norm0(edge_attr)
138 |         # TODO: add self loop?
139 |         src_idx = edge_index[0]
140 |         tgt_idx = edge_index[1]
141 |         src = indexing(x, src_idx)
142 |         tgt = indexing(x, tgt_idx)
143 | 
144 |         node_pair_edge = torch.cat([src, tgt, edge_attr], dim=1)
145 |         node_pair_edge = self.attn(node_pair_edge)
146 |         raw_attn0, raw_attn1, node_s, node_t, edges = torch.split(
147 |             node_pair_edge, 
148 |             [self.num_heads, self.num_heads, self.out_channels, self.out_channels, self.edge_dim],
149 |             dim=1
150 |         )
151 |         raw_attn = torch.cat([raw_attn0, raw_attn1], dim=0)  # [2 * num_edges, num_heads]
152 |         e_idx = torch.cat([src_idx, tgt_idx], dim=0)
153 |         nodes = torch.cat([node_s, node_t], dim=0).view(-1, self.num_heads, self.head_dim)  # [2 * num_edges, num_heads, head_dim]
154 | 
155 |         attn = torch_scatter.scatter_softmax(raw_attn, e_idx, dim=0)  # [2 * num_edges, num_heads]
156 |         nodes = nodes * attn.unsqueeze(-1)
157 |         nodes = nodes.view(-1, self.out_channels)  # [2 * num_edges, out_channels]
158 | 
159 |         new_nodes = torch_scatter.scatter_add(nodes, e_idx, dim=0, dim_size=x.shape[0])  # [num_nodes, out_channels]
160 |         # TODO: residual connection?
161 | 
162 |         new_nodes = self.node_mlp(self.node_norm1(new_nodes))
163 |         new_edges = self.edge_mlp(self.edge_norm1(edges))
164 | 
165 |         new_nodes = new_nodes + shortcut
166 |         new_edges = new_edges + shortcut_e
167 |         if self.zero_edge_emb:
168 |             new_edges = new_edges * 0
169 | 
170 |         return new_nodes, new_edges
171 | 
172 | 
173 | class Mlp(nn.Module):
174 |     def __init__(
175 |         self,
176 |         in_features,
177 |         hidden_features=None,
178 |         out_features=None,
179 |         act_layer=nn.GELU,
180 |         bias=True,
181 |     ):
182 |         super().__init__()
183 |         hidden_features = hidden_features or in_features
184 |         self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
185 |         self.act = act_layer()
186 |         self.fc2 = None
187 |         if out_features is not None:
188 |             self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
189 | 
190 |     def forward(self, x):
191 |         x = self.fc1(x)
192 |         x = self.act(x)
193 |         if self.fc2 is not None:
194 |             x = self.fc2(x)
195 |         return x
196 | 


--------------------------------------------------------------------------------
/rlcompopt/cl/models/math_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | 
10 | # TODO: rewrite this? Just write one for numpy one for pytorch...
11 | def rescale(x, eps=1e-3):
12 |     sign = get_sign(x)
13 |     x_abs = get_abs(x)
14 |     if isinstance(x, np.ndarray):
15 |         return sign * (np.sqrt(x_abs + 1) - 1) + eps * x
16 |     else:
17 |         return sign * ((x_abs + 1).sqrt() - 1) + eps * x
18 | 
19 | 
20 | def inv_rescale(x, eps=1e-3):
21 |     sign = get_sign(x)
22 |     x_abs = get_abs(x)
23 |     if eps == 0:
24 |         return sign * (x * x + 2.0 * x_abs)
25 |     else:
26 |         return sign * (
27 |         (
28 |             ((1.0 + 4.0 * eps * (x_abs + 1.0 + eps)).sqrt() - 1.0) / (2.0 * eps)
29 |         ).pow(2)
30 |         - 1.0
31 |     )
32 | 
33 | def get_sign(x):
34 |     if isinstance(x, np.ndarray):
35 |         return np.sign(x)
36 |     elif isinstance(x, torch.Tensor):
37 |         return x.sign()
38 |     else:
39 |         raise NotImplementedError(f"Data type: {type(x)} is not implemented")
40 | 
41 | def get_abs(x):
42 |     if isinstance(x, np.ndarray):
43 |         return np.abs(x)
44 |     elif isinstance(x, torch.Tensor):
45 |         return x.abs()
46 |     else:
47 |         raise NotImplementedError(f"Data type: {type(x)} is not implemented")


--------------------------------------------------------------------------------
/rlcompopt/cl/models/model_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import pickle
 8 | import sqlite3
 9 | 
10 | 
11 | def load_model(model, load_model_db, model_rowid=None):
12 |     con = sqlite3.connect(load_model_db, timeout=60)
13 |     cursor = con.cursor()
14 |     try:
15 |         if model_rowid is not None:
16 |             rec = list(
17 |                 cursor.execute(
18 |                     f"SELECT rowid, * FROM Models where rowid = {model_rowid}"
19 |                 )
20 |             )
21 |         else:
22 |             rec = list(
23 |                 cursor.execute(
24 |                     "SELECT rowid, * FROM Models ORDER BY rowid DESC LIMIT 1"
25 |                 )
26 |             )
27 |     except sqlite3.OperationalError:
28 |         print("Failed to load model from database.")
29 |         return
30 |     finally:
31 |         con.close()
32 | 
33 |     rowid, config, kwargs, state_dict, state_dict_ema = rec[0]
34 | 
35 |     state_dict = pickle.loads(state_dict)
36 |     state_dict_ema = pickle.loads(state_dict_ema)
37 |     if state_dict_ema is not None:
38 |         state_dict = state_dict_ema
39 | 
40 |     msg = model.load_state_dict(state_dict)
41 | 
42 |     print(f"Initialized model with the checkpoint from database {load_model_db} row {rowid}: {msg}")


--------------------------------------------------------------------------------
/rlcompopt/cl/repr_queue.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | import numpy as np
10 | from typing import Dict, List, Tuple
11 | 
12 | 
13 | class ReprQueue:
14 |     """
15 |     A queue holding the representations from the EMA encoder, as in MoCo:
16 |     https://arxiv.org/abs/1911.05722
17 |     """
18 |     def __init__(self, dim=64, K=1000, min_K=50):
19 | 
20 |         self.K = K  # number of trajectories to store in buffer
21 |         self.min_K = min_K  # minimum number of trajectorie to enable training
22 | 
23 |         self.buffer = torch.zeros(0, dim)
24 |         self.traj_batch_idx = [0]  # storing the starting and ending idx of each chunk of trajectory representations, len == num_traj + 1
25 |         self.traj_ids = []  # a list that maps traj_id to an idx, where traj_batch_idx[idx] = the starting index of the traj buffer in self.buffer
26 | 
27 |     @torch.no_grad()
28 |     def collate_fn(self, trajs: List[Tuple[str, int, int, torch.Tensor]]):
29 |         """
30 |         Args:
31 |             trajs: a list of [traj_id, current_repr_idx, next_repr_idx, repr]
32 |         """
33 | 
34 |         # remove some traj buffer if the new buffer is going to be too long
35 |         this_traj_ids = set([tid for tid, *_ in trajs] + self.traj_ids)
36 |         num_rm = max(0, len(this_traj_ids) - self.K)
37 |         if num_rm > 0:
38 |             new_start = self.traj_batch_idx[num_rm]
39 |             traj_batch_idx = np.array(self.traj_batch_idx[num_rm:], dtype=np.int32)
40 |             self.traj_batch_idx = (traj_batch_idx - new_start).tolist()  # traj_batch_idx starts from 0
41 |             self.buffer = self.buffer[new_start:]
42 |             self.traj_ids = self.traj_ids[num_rm:]
43 | 
44 |         curr = []  # holding the idx of the current repr in the buffer
45 |         next_ = []  # holding the idx of the next repr in the buffer
46 | 
47 |         new_buffer = [self.buffer]
48 |         new_idx = self.traj_batch_idx
49 | 
50 |         for traj_id, current_repr_idx, next_repr_idx, repr_ in trajs:
51 |             if traj_id not in self.traj_ids:
52 |                 bz = repr_.shape[0]
53 |                 starting_idx = new_idx[-1]
54 |                 new_idx.append(starting_idx + bz)
55 |                 new_buffer.append(repr_)
56 |                 self.traj_ids.append(traj_id)
57 |             else:
58 |                 idx = self.traj_ids.index(traj_id)
59 |                 starting_idx = new_idx[idx]
60 |             # starting_idx of the current buffer
61 |             curr.append(current_repr_idx + starting_idx)
62 |             next_.append(next_repr_idx + starting_idx)
63 | 
64 |         self.buffer = torch.cat(new_buffer, dim=0)
65 |         if len(self.traj_ids) >= self.min_K:
66 |             current_state_idx = torch.tensor(curr, dtype=torch.long)
67 |             next_state_idx = torch.tensor(next_, dtype=torch.long)
68 |             return self.buffer.clone(), current_state_idx, next_state_idx
69 |         return None
70 | 


--------------------------------------------------------------------------------
/rlcompopt/env_wrapper/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
3 | 
4 | # This source code is licensed under the MIT license found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | 


--------------------------------------------------------------------------------
/rlcompopt/env_wrapper/database_model.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CREATE TABLE IF NOT EXISTS Models (
 8 |   config BLOB,                  -- config for initializing the model with hydra
 9 |   kwargs BLOB,                  -- kwargs for initializing the model
10 |   state_dict BLOB NOT NULL,     -- the state dict
11 |   ema_state_dict BLOB           -- the state dict of the ema model
12 | );
13 | 
14 | CREATE TABLE IF NOT EXISTS Performance (
15 |   model_id INTEGER NOT NULL,    -- the rowid of the model being evaluated
16 |   split_tag TEXT NOT NULL,      -- dataset split
17 |   total_metric REAL NOT NULL,   -- total percent improvement over Oz
18 |   mean_metric REAL NOT NULL,    -- average percent improvement over Oz
19 |   table_rows BLOB NOT NULL,     -- the table rows that can be put into tabulate.tabulate
20 |   table_str TEXT NOT NULL       -- the str of tabulate.tabulate(rows)
21 | );
22 | 
23 | CREATE TABLE IF NOT EXISTS ValBest (
24 |   model_id INTEGER NOT NULL    -- the rowid of the model with smallest val loss
25 | );
26 | 
27 | CREATE TABLE IF NOT EXISTS Signal (
28 |   done INTEGER
29 | );


--------------------------------------------------------------------------------
/rlcompopt/env_wrapper/database_schema.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CREATE TABLE IF NOT EXISTS States (
 8 |   benchmark_uri TEXT NOT NULL,         -- The URI of the benchmark.
 9 |   state_id TEXT NOT NULL UNIQUE,              -- 40-char sha1.
10 |   dgl_graph BLOB NOT NULL,             -- dgl graph, in pickle format, need to use zlib.decompress()
11 |   done INTEGER NOT NULL,               -- 0 = False, 1 = True.
12 |   actions TEXT NOT NULL,               -- Decode: [int(x) for x in field.split()]
13 |   PRIMARY KEY (benchmark_uri, actions),
14 |   FOREIGN KEY (state_id) REFERENCES Transitions(state_id) ON UPDATE CASCADE
15 | );
16 | 
17 | CREATE TABLE IF NOT EXISTS Transitions (
18 |     state_id TEXT NOT NULL,            -- 40-char sha1.
19 |     action_value TEXT NOT NULL,
20 |     next_state_id TEXT NOT NULL,            -- 40-char sha1.
21 |     done INTEGER NOT NULL,               -- 0 = False, 1 = True.
22 |     ir_instruction_count INTEGER NOT NULL,
23 |     ir_instruction_count_oz_reward REAL NULLABLE,
24 |     cumulative_reward REAL NULLABLE,
25 |     autophase TEXT NOT NULL,                    -- Decode: np.array([int(x) for x in field.split()], dtype=np.int64)
26 |     instcount TEXT NOT NULL,                    -- Decode: np.array([int(x) for x in field.split()], dtype=np.int64)
27 |     PRIMARY KEY (state_id, next_state_id)
28 | );
29 | 
30 | CREATE TABLE IF NOT EXISTS Vocabs (
31 |     token TEXT NOT NULL UNIQUE, 
32 |     PRIMARY KEY (token)
33 | );
34 | 
35 | -- The vocabulary used for encoding graphs.
36 | CREATE TABLE IF NOT EXISTS VocabsForEncoding (
37 |     token TEXT NOT NULL UNIQUE, 
38 |     PRIMARY KEY (token)
39 | );
40 | 


--------------------------------------------------------------------------------
/rlcompopt/env_wrapper/database_schema2.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CREATE TABLE IF NOT EXISTS States (
 8 |   benchmark_uri TEXT NOT NULL,         -- The URI of the benchmark.
 9 |   state_id TEXT NOT NULL UNIQUE,              -- 40-char sha1.
10 |   dgl_graph BLOB NOT NULL,             -- dgl graph, in pickle format, need to use zlib.decompress()
11 |   done INTEGER NOT NULL,               -- 0 = False, 1 = True.
12 |   actions TEXT NOT NULL,               -- Decode: [int(x) for x in field.split()]
13 |   IrInstructionCount INTEGER NOT NULL, 
14 |   PRIMARY KEY (benchmark_uri, actions),
15 |   FOREIGN KEY (state_id) REFERENCES Transitions(state_id) ON UPDATE CASCADE
16 | );
17 | 
18 | CREATE TABLE IF NOT EXISTS Transitions (
19 |     state_id TEXT NOT NULL,            -- 40-char sha1.
20 |     traj_id TEXT NOT NULL,             -- 45-char: starting state's sha1 + 5 random char
21 |     traj_step INTEGER,                 -- length of the traj, == total transition pairs (this number could be larger if the end of traj is cut off)
22 |     action_value TEXT NOT NULL,
23 |     next_state_id TEXT NOT NULL,            -- 40-char sha1.
24 |     done INTEGER NOT NULL,               -- 0 = False, 1 = True.
25 |     ir_instruction_count INTEGER NOT NULL,  -- raw ir_instruction_count of the next state
26 |     ir_instruction_count_reward INTEGER NOT NULL,   -- next state's ir_instruction_count reduction from this state
27 |     ir_instruction_count_oz_reward REAL NULLABLE,
28 |     cumulative_reward REAL NULLABLE,
29 |     cumulative_reward2 REAL NULLABLE,               -- cumulative ir_instruction_count_reward
30 |     action_had_no_effect INTEGER NOT NULL,   -- 0 = False, 1 = True. Note: there maybe a bug in compiler_gym: action_had_no_effect can be 0 (meaning the action has effect) even though state_id and next_state_id are the same
31 |     autophase TEXT NOT NULL,                    -- Decode: np.array([int(x) for x in field.split()], dtype=np.int64)
32 |     instcount TEXT NOT NULL,                    -- Decode: np.array([int(x) for x in field.split()], dtype=np.int64)
33 |     PRIMARY KEY (traj_id, traj_step)
34 | );
35 | 
36 | CREATE TABLE IF NOT EXISTS Trajectories (
37 |     traj_id TEXT NOT NULL UNIQUE, 
38 |     traj_length INTEGER,
39 |     PRIMARY KEY (traj_id)
40 | );
41 | 
42 | CREATE TABLE IF NOT EXISTS Vocabs (
43 |     token TEXT NOT NULL UNIQUE, 
44 |     PRIMARY KEY (token)
45 | );
46 | 
47 | -- The vocabulary used for encoding graphs.
48 | CREATE TABLE IF NOT EXISTS VocabsForEncoding (
49 |     token TEXT NOT NULL UNIQUE, 
50 |     PRIMARY KEY (token)
51 | );
52 | 


--------------------------------------------------------------------------------
/rlcompopt/env_wrapper/database_schema4.sql:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | CREATE TABLE IF NOT EXISTS States (
 8 |     benchmark_uri TEXT NOT NULL,         -- The URI of the benchmark.
 9 |     state_id TEXT NOT NULL UNIQUE,       -- 40-char sha1
10 |     graph BLOB,                          -- pyg graph, in pickle format, need to use zlib.decompress()
11 |     autophase TEXT,                      -- Decode: np.array([int(x) for x in field.split()], dtype=np.int64)
12 |     IrInstructionCount INTEGER NOT NULL, -- raw ir_instruction_count of the this state
13 |     num_nodes INTEGER NOT NULL
14 | );
15 | 
16 | CREATE TABLE IF NOT EXISTS Transitions (
17 |     benchmark_uri TEXT NOT NULL,
18 |     state_id TEXT NOT NULL,
19 |     action_value INTEGER NOT NULL,     -- the action performed
20 |     reward REAL NOT NULL,              -- the reward received immediately after the action
21 |     cumulative_reward REAL NOT NULL,   -- discounted cumulative reward
22 |     next_state_id TEXT NOT NULL,       -- the observed state_id after applying the action
23 |     num_nodes INTEGER NOT NULL,
24 |     traj_id TEXT NOT NULL,
25 |     traj_step INTEGER NOT NULL,
26 |     advantage REAL,                    -- the advantage associated with the state-action
27 |     logp REAL,                         -- log probability of selecting the action_value
28 |     time_stamp REAL                    -- the time stamp of the creation of this transition
29 | );
30 | 
31 | CREATE TABLE IF NOT EXISTS Trajectories (
32 |     traj_id TEXT NOT NULL UNIQUE,      -- 45-char: starting state's sha1 + 5 random char
33 |     benchmark_uri TEXT NOT NULL,
34 |     state_ids TEXT NOT NULL,           -- the sequence of all observed state id [n+1]
35 |     actions TEXT NOT NULL,             -- the sequence of actions. Decode: [int(x) for x in field.split()] [n]
36 |     rewards BLOB NOT NULL,             -- the rewards (IrInstructionCount / ir_current) received immediately after each action [n]
37 |     graph_repr BLOB                    -- the representation of each observed state [n+1]
38 | );
39 | 
40 | CREATE TABLE IF NOT EXISTS TrainerProgress (
41 |     read_rows INTEGER NOT NULL,        -- the number of rows the trainer has read
42 |     num_rows_per_iter REAL NOT NULL    -- an estimate of number of rows read per trainer iteration
43 | );
44 | 


--------------------------------------------------------------------------------
/rlcompopt/env_wrapper/merge_db.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import sqlite3
 8 | import os
 9 | import sys
10 | 
11 | 
12 | def merge_databases(db1, db2):
13 |     con3 = sqlite3.connect(db1)
14 | 
15 |     con3.execute("ATTACH '" + db2 +  "' as dba")
16 | 
17 |     con3.execute("BEGIN")
18 |     for row in con3.execute("SELECT * FROM dba.sqlite_master WHERE type='table'"):
19 |         combine = "INSERT OR IGNORE INTO "+ row[1] + " SELECT * FROM dba." + row[1]
20 |         print(combine)
21 |         con3.execute(combine)
22 |     con3.commit()
23 |     con3.execute("detach database dba")
24 | 
25 | 
26 | def read_files(directory):
27 |     fname = []
28 |     for root,d_names,f_names in os.walk(directory):
29 |         for f in f_names:
30 |             c_name = os.path.join(root, f)
31 |             filename, file_extension = os.path.splitext(c_name)
32 |             if (file_extension == '.db'):
33 |                 fname.append(c_name)
34 | 
35 |     return fname
36 | 
37 | def batch_merge(directory, db_to_merge):
38 |     db_files = read_files(directory)
39 |     for db_file in db_files:
40 |         merge_databases(db_to_merge, db_file)
41 | 
42 | 
43 | def main():
44 |     dir_ = sys.argv[1]
45 |     db_to = sys.argv[2]
46 |     batch_merge(dir_, db_to)
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     main()
51 | 


--------------------------------------------------------------------------------
/rlcompopt/env_wrapper/parsing_utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | # This source code is licensed under the MIT license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import sqlite3
  8 | 
  9 | import dgl
 10 | 
 11 | DB_CREATION_SCRIPT = """
 12 | CREATE TABLE IF NOT EXISTS Vocabs (
 13 |     token TEXT NOT NULL UNIQUE, 
 14 |     PRIMARY KEY (token)
 15 | );
 16 | """
 17 | 
 18 | 
 19 | def update_networkx_feature(graph_fn, idx, feature_name, feature):
 20 |     graph_fn[idx][feature_name] = feature
 21 | 
 22 | 
 23 | def convert_networkx_to_dgl(
 24 |     graph, node_attrs=["text_idx", "type"], edge_attrs=["flow", "position"]
 25 | ):
 26 |     return dgl.from_networkx(graph, node_attrs=node_attrs, edge_attrs=edge_attrs)
 27 | 
 28 | 
 29 | # utility for feature extraction
 30 | class FeatureExtractor:
 31 |     def __init__(self, vocab_db_path, online_update=False, graph_version=0):
 32 |         self.node_feature_list = ["text", "type", "function", "block"]
 33 |         self.node_feature_list_dgl = ["text_idx", "type", "function", "block"]
 34 |         self.edge_feature_list = ["flow", "position"]
 35 |         self.online_update = online_update
 36 |         self.graph_version = graph_version
 37 | 
 38 |         if vocab_db_path is None:
 39 |             self.vocab_mapping = {}
 40 |         else:
 41 |             self.connection = sqlite3.connect(vocab_db_path, timeout=3200)
 42 |             self.cursor = self.connection.cursor()
 43 |             self.cursor.executescript(DB_CREATION_SCRIPT)
 44 |             self.connection.commit()
 45 |             # Load the dataset
 46 |             # FIXME: does "select token from Vocabs;" give the same order each time it is called?
 47 |             self.vocabs = list(self.cursor.execute("select token from Vocabs;"))
 48 |             v2i = {v[0]: i for i, v in enumerate(self.vocabs)}
 49 |             self.vocab_mapping = {"text": v2i}
 50 |             if not online_update:
 51 |                 self.connection.close()
 52 | 
 53 |     def save_vocab_to_db(self, cursor, table_name):
 54 |         tuple_vers = [v for v in self.vocabs]
 55 |         cursor.executemany(f"INSERT OR IGNORE INTO {table_name} VALUES (?)", tuple_vers)
 56 | 
 57 |     def process_nx_graph(self, graph):
 58 |         """
 59 |         Handles all of the requirements of taking a networkx graph and converting it into a
 60 |         dgl graph
 61 |         Inputs:
 62 |             - graph: the networkx graph
 63 |             - vocab: the vocabulary, a mapping from word to index.
 64 |             - node_feature_list: a list of textual features from the networkx node that we want to make sure
 65 |                 are featurizable into a vector.
 66 |             - edge_feature_list: a list of textual features from the networkx edges that we want to make sure
 67 |                 are featurizable into a vector.
 68 |         """
 69 |         self.update_graph_with_vocab(
 70 |             graph.nodes, self.node_feature_list, self.vocab_mapping, "nodes"
 71 |         )
 72 |         # No need to update edge feature: it will not change anything
 73 |         # self.update_graph_with_vocab(graph.edges, self.edge_feature_list, self.vocab_mapping, "edges")
 74 | 
 75 |         dgl_graph = convert_networkx_to_dgl(
 76 |             graph,
 77 |             node_attrs=self.node_feature_list_dgl,
 78 |             edge_attrs=self.edge_feature_list,
 79 |         )
 80 |         return dgl_graph
 81 | 
 82 |     def update_vocabs(self, token):
 83 |         # add a new token into vocab database
 84 |         self.cursor.execute("INSERT OR IGNORE INTO Vocabs VALUES (?)", (token,))
 85 |         self.connection.commit()
 86 |         self.vocabs = list(self.cursor.execute("select token from Vocabs;"))
 87 |         v2i = {v[0]: i for i, v in enumerate(self.vocabs)}
 88 |         self.vocab_mapping = {"text": v2i}
 89 |         assert token in v2i
 90 | 
 91 |     def update_graph_with_vocab(self, graph_fn, features, vocab, graph_fn_type="edges"):
 92 |         for feature_name in features:
 93 |             _counter = 0
 94 |             _total = 0
 95 |             curr_vocab = None
 96 |             if feature_name in vocab:
 97 |                 curr_vocab = vocab[feature_name]
 98 |             len_curr_vocab = len(curr_vocab) if curr_vocab is not None else 0
 99 |             for graph_item in graph_fn(data=feature_name):
100 |                 feature = graph_item[
101 |                     -1
102 |                 ]  # for networkX graph, the node/edge feature is always the last item
103 |                 if graph_fn_type == "nodes":
104 |                     idx = graph_item[0]
105 |                 else:
106 |                     # for this MultiDiGraph, this is at most one edge for a pair of nodes, so the third idx is 0;
107 |                     # the first two idx are the node idx for this edge
108 |                     idx = graph_item[:-1] + (0,)
109 | 
110 |                 _total += 1
111 |                 if feature_name in vocab:
112 |                     # this is for nodes feature "text", convert this feature to idx for embedding later
113 |                     # aggregate all functions to a single type
114 |                     if (
115 |                         self.graph_version == 1
116 |                         and feature.endswith(")")
117 |                         and feature.find(" (") >= 0
118 |                     ):
119 |                         feature = "__function__"
120 |                     token_idx = curr_vocab.get(feature, len_curr_vocab)
121 |                     if (
122 |                         feature_name == "text"
123 |                         and self.online_update
124 |                         and token_idx == len_curr_vocab
125 |                     ):
126 |                         # add this word to vocab database
127 |                         self.update_vocabs(feature)
128 |                         # update curr_vocab
129 |                         curr_vocab = self.vocab_mapping["text"]
130 |                         token_idx = curr_vocab.get(feature, len_curr_vocab)
131 |                     update_networkx_feature(
132 |                         graph_fn, idx, f"{feature_name}_idx", token_idx
133 |                     )
134 |                     if token_idx < len_curr_vocab:
135 |                         _counter += 1
136 |                 elif isinstance(feature, str):
137 |                     # this is for nodes feature "text", vocab is empty (it has not been created yet), so save a dummy value
138 |                     assert len(vocab) == 0 and feature_name == "text"
139 |                     update_networkx_feature(graph_fn, idx, f"{feature_name}_idx", -1)
140 |                 else:
141 |                     assert isinstance(
142 |                         feature, int
143 |                     ), f"{(feature_name, feature)} is not an int"
144 |             # if feature_name == "text":
145 |             #     print(f"Found {_counter} among {_total} queries, query success rate: {_counter / _total}")
146 | 


--------------------------------------------------------------------------------
/rlcompopt/env_wrapper/pyg_utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | # This source code is licensed under the MIT license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import torch
  8 | from torch_geometric.data import Data
  9 | from torch_geometric.utils import subgraph
 10 | 
 11 | 
 12 | def dgl2pyg(dgl_graph):
 13 |     u, v = dgl_graph.edges()
 14 |     edge_index = torch.cat([u.unsqueeze(0), v.unsqueeze(0)], dim=0)
 15 |     text_idx = dgl_graph.ndata.pop('text_idx')  # dgl_graph.ndata is like a dict
 16 |     # other_node_feat = dict(**dgl_graph.ndata)
 17 |     # edge_feat = dict(**dgl_graph.edata)
 18 |     other_node_feat = {k: v.view(v.shape[0], -1) for k, v in dgl_graph.ndata.items()}
 19 |     edge_feat = {k: v.view(v.shape[0], -1) for k, v in dgl_graph.edata.items()}
 20 | 
 21 |     # node feature `x` should have the shape [num_nodes, ...], so unsqueeze
 22 |     data = Data(x=text_idx.unsqueeze(1), edge_index=edge_index, **other_node_feat, **edge_feat)
 23 |     # get_blk_idx(data)
 24 |     return data
 25 | 
 26 | 
 27 | def remove_type_nodes(pyg_graph):
 28 |     mask = pyg_graph['type'].flatten() != 3  # 3 denotes the type nodes
 29 |     return remove_nodes(pyg_graph, mask)
 30 | 
 31 | 
 32 | def remove_nodes(pyg_graph, subset, edge_attr=('flow', 'position'), node_attr=('x', 'type', 'function', 'block')):
 33 |     assert isinstance(pyg_graph, Data)
 34 |     edge_index = pyg_graph.edge_index
 35 |     edge_index, _, edge_mask = subgraph(subset, edge_index, relabel_nodes=True, num_nodes=pyg_graph.num_nodes, return_edge_mask=True)
 36 |     pyg_graph.edge_index = edge_index
 37 | 
 38 |     for attr in node_attr:
 39 |         pyg_graph[attr] = pyg_graph[attr][subset]
 40 |     for attr in edge_attr:
 41 |         pyg_graph[attr] = pyg_graph[attr][edge_mask]
 42 |     return pyg_graph
 43 | 
 44 | 
 45 | def remove_edges(pyg_graph, edge_ids_to_remove=None, edge_ids_to_keep=None,  edge_attr=()):
 46 |     # edge_index has shape [2, num_edges], edges features have shape [num_edges, dim_feat]
 47 |     if edge_ids_to_keep is not None:
 48 |         pyg_graph.edge_index = pyg_graph.edge_index[:, edge_ids_to_keep]
 49 |         for attr in edge_attr:
 50 |             pyg_graph[attr] = pyg_graph[attr][edge_ids_to_keep]
 51 |         if pyg_graph.edge_attr is not None:
 52 |             pyg_graph.edge_attr = pyg_graph.edge_attr[edge_ids_to_keep]
 53 |     else:
 54 |         raise NotImplementedError
 55 |     return pyg_graph
 56 | 
 57 | 
 58 | def get_blk_idx(graph):
 59 |     """
 60 |     This function adds the position of each intruction node 
 61 |     within a basic block to the graph node attribute.
 62 |     After that, we have function idx, block idx, position in block.
 63 |     This can be used in attention layer to put some info to the attn.
 64 |     Args:
 65 |         graph (PyG graph): the graph to convert
 66 |     Outputs:
 67 |         in-place modification to the graph by adding an extra block_pos attribute, and
 68 |         the instruction nodes idx ordered by blocks (excluding blocks with single instruciton)
 69 |     """
 70 |     if graph.get("ordered_instr_idx", None) is not None:
 71 |         return
 72 |     flow_mask = graph['flow'].flatten() == 0
 73 |     instr_edges = graph.edge_index.T[flow_mask]
 74 |     num_block = graph['block'].max().item() + 1
 75 |     blk = graph['block'].flatten().tolist()
 76 |     block_edges = [[] for _ in range(num_block)]
 77 |     for ie in instr_edges:
 78 |         b0 = blk[ie[0]]
 79 |         b1 = blk[ie[1]]
 80 |         block_edges[b0].append(ie.tolist())
 81 |         if b0 != b1:
 82 |             block_edges[b1].append(ie.tolist())
 83 |     nodes = []
 84 |     pos = []
 85 |     func = graph['function'].flatten().tolist()
 86 |     for i, be in enumerate(block_edges):
 87 |         if len(be) == 0:
 88 |             continue
 89 |         thic = func[be[0][0]]
 90 |         for e in be:
 91 |             assert func[e[0]] == thic and func[e[1]] == thic
 92 |         ordered_nodes = order_block(be, blk, i)
 93 |         nodes.append(torch.tensor(ordered_nodes, dtype=torch.long))
 94 |         pos.append(torch.arange(len(ordered_nodes)))
 95 | 
 96 |     # instr_mask = graph['type'].flatten() == 0
 97 |     # blk_pos = torch.zeros(instr_mask.shape[0], dtype=torch.long)
 98 |     if nodes:
 99 |         nodes_ = torch.cat(nodes)  # excluding the instructions in blocks with single instruciton
100 |         pos_ = torch.cat(pos)
101 |         # blk_pos[nodes_] = pos_
102 |         graph['ordered_instr_idx0'] = nodes_
103 |         graph['ordered_instr_idx'] = nodes_  # hack to make batching work
104 |         graph['blk_pos'] = pos_
105 |     else:
106 |         graph['ordered_instr_idx'] = torch.tensor([], dtype=torch.long)
107 |         graph['ordered_instr_idx0'] = graph['ordered_instr_idx']
108 |         graph['blk_pos'] = torch.tensor([], dtype=torch.long)
109 |     # graph['blk_pos'] = blk_pos
110 | 
111 | 
112 | def order_block(blk_edges, idx2block, this_blk):
113 |     """
114 |     Given a list of edges (2-tuple), 
115 |     find the order of the nodes in the control flow.
116 |     Args:
117 |         blk_edges: a list of edges (2-tuple)
118 |         idx2block (list): given the node idx, get its block idx
119 |         this_blk (int): the block idx for this run
120 |     """
121 |     assert isinstance(blk_edges, list)
122 |     assert isinstance(idx2block, list)
123 |     assert isinstance(this_blk, int)
124 |     starts = set(e[0] for e in blk_edges)
125 |     ends = set(e[1] for e in blk_edges)
126 |     end_not_in_start = ends - starts  # could have more than 2: multiple ends in branching; could be empty
127 |     start_not_in_end = starts - ends  # could have more than 2: multiple branches into this block; could be empty
128 | 
129 |     starters = []
130 |     start_of_function = False
131 |     for s in start_not_in_end:
132 |         if idx2block[s] == this_blk:
133 |             # this is the start of a function
134 |             starter = s
135 |             start_of_function = True
136 |             break
137 |         starters.extend([e[1] for e in blk_edges if e[0] == s])
138 |     if len(starters) > 0:
139 |         assert not start_of_function
140 |         starters = set(starters)
141 |         assert len(starters) == 1, f"{starters=}"
142 |         starter = list(starters)[0]
143 |     
144 |     # at the end of the block, there is a branch going to the predecessor of the starter of the block
145 |     if not start_not_in_end:
146 |         for e in blk_edges:
147 |             if idx2block[e[0]] != this_blk and idx2block[e[1]] == this_blk:
148 |                 starter = e[1]
149 |                 break
150 |     for e in blk_edges:
151 |         if e[1] == starter and idx2block[e[0]] != this_blk:
152 |             # make sure the end not going to the starter
153 |             end_not_in_start.add(e[0])
154 | 
155 |     ordered_nodes = [starter]
156 |     while True:
157 |         end_node = ordered_nodes[-1]
158 |         if end_node in end_not_in_start:
159 |             # "ret" could be in the same block,
160 |             # in this case not to remove the last added node
161 |             if idx2block[end_node] != this_blk:
162 |                 ordered_nodes = ordered_nodes[:-1]
163 |             break
164 |         end_flag = False
165 |         for e in blk_edges:
166 |             if e[0] == end_node:
167 |                 if e[1] in ordered_nodes:
168 |                     # the end goes to the starter
169 |                     assert e[1] == starter
170 |                     assert idx2block[e[0]] == this_blk
171 |                     end_flag = True
172 |                     break
173 |                 ordered_nodes.append(e[1])
174 |                 break
175 |         if end_flag:
176 |             break
177 |     return ordered_nodes
178 | 


--------------------------------------------------------------------------------
/rlcompopt/eval_local.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import argparse
 8 | import pickle
 9 | from rlcompopt.train import testing
10 | 
11 | 
12 | def main():
13 |     """
14 |     Perform local testing with the saved arg.pkl file
15 |     """
16 |     parser = argparse.ArgumentParser()
17 |     parser.add_argument("--args_path", type=str, help="The path to the args.pkl file")
18 |     args = parser.parse_args()
19 |     with open(args.args_path, "rb") as f:
20 |         args_ = pickle.load(f)
21 | 
22 |     testing(args_, locally=True)
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/rlcompopt/misc/convert_smaller_coreset.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import argparse
 8 | from typing import List, Tuple
 9 | from rlcompopt.pipeline.lib.types import TrajectoryDataset, ActionSequences, ActionSequence
10 | 
11 | 
12 | def convert(old_traj_path: str, new_traj_path: str, n: int, sorted_coreset: List[Tuple[int]]):
13 |     assert isinstance(sorted_coreset, list)
14 |     old_traj = TrajectoryDataset.load(old_traj_path)
15 | 
16 |     using_seqs = sorted_coreset[:n]
17 |     ac = [ActionSequence(actions=aa) for aa in using_seqs]
18 |     old_acs = old_traj.action_sequences
19 |     acs = ActionSequences(
20 |         name=old_acs.name + f"_using_best_{n}",
21 |         actionseqs=ac,
22 |         train_dataset_name=old_acs.train_dataset_name,
23 |     )
24 | 
25 |     idx = [old_acs.actionseqs.index(aa) for aa in ac]
26 | 
27 |     def extract_ir(irs):
28 |         return [irs[ii] for ii in idx]
29 | 
30 |     old_samples = old_traj.samples
31 |     new_samples = []
32 |     for sample in old_samples:
33 |         sample.all_ir_searches = extract_ir(sample.all_ir_searches)
34 |         assert len(sample.all_ir_searches) == n
35 |         new_samples.append(sample)
36 |     old_traj.samples = new_samples
37 |     old_traj.action_sequences = acs
38 | 
39 |     old_traj.save(new_traj_path)
40 | 
41 | 
42 | def main():
43 |     parser = argparse.ArgumentParser(description="Convert a TrajectoryDataset to use a smaller coreset.")
44 |     parser.add_argument("--old_traj_path", type=str, help="The path to the old TrajectoryDataset file")
45 |     parser.add_argument("--new_traj_path", type=str, help="The path to the new TrajectoryDataset file")
46 |     parser.add_argument("--n", type=int, help="The number of action sequences to use in the new TrajectoryDataset")
47 |     args = parser.parse_args()
48 |     with open("rlcompopt/pipeline/lib/coreset_sorted.txt", "rt") as f:
49 |         lines = f.read().splitlines()
50 |     action_seqs = [eval(line) for line in lines]
51 |     convert(args.old_traj_path, args.new_traj_path, args.n, action_seqs)
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     main()
56 | 


--------------------------------------------------------------------------------
/rlcompopt/pipeline/action_seq_50.txt:
--------------------------------------------------------------------------------
 1 | (102, 10, 111, 30, 36, 121, 54, 55, 46, 50, 65, 75, 57, 9, 10, 104, 97, 53)
 2 | (10, 64, 31, 10, 52, 111, 116, 36, 40, 48, 54, 30, 53, 114, 29, 120, 10)
 3 | (91, 115, 46, 2)
 4 | (111, 57, 55, 120, 54, 36, 53, 122, 105, 95, 76, 47, 39, 97, 10)
 5 | (27, 39, 64, 55, 53, 38, 122, 31, 111, 64, 10, 39, 21, 105, 36)
 6 | (102, 53, 97, 10, 57, 71, 41, 111, 39, 71, 45, 118, 23, 53)
 7 | (29, 72, 55, 103, 36, 122, 59, 30, 65, 53, 10)
 8 | (53, 91, 67, 86, 52, 61, 41, 29, 54, 10)
 9 | (50, 21, 120, 97, 39, 67, 10, 29, 47, 53, 79, 36, 97, 10)
10 | (38, 47, 50, 24, 57, 30, 41, 72, 53, 56, 122, 97, 70, 15, 10, 26, 29, 53)
11 | (72, 71, 31, 36, 97, 103, 78, 47, 97, 53, 41, 120, 10, 52, 97)
12 | (38, 10, 71, 39, 54, 102, 57, 103, 53, 46, 54, 116, 29, 10, 114, 41, 66)
13 | (59, 9, 10, 121, 114, 110, 120, 97, 10, 1, 21, 47, 53, 10, 96, 97)
14 | (99, 41, 31, 122, 36, 120, 29, 21, 111, 117, 48, 30, 10, 53)
15 | (29, 103, 102, 30, 36, 61, 29, 41, 71, 10, 61, 41, 52)
16 | (31, 63, 29, 39, 93, 41, 74, 103, 120, 10, 55, 114, 55, 68, 57, 53, 95, 78, 97, 10)
17 | (47, 53, 36, 117, 9, 55, 74, 111, 116, 120, 9, 77, 29, 97, 10)
18 | (99, 111, 97, 40, 31, 47, 10, 36, 53)
19 | (65, 9, 55, 27, 105, 57, 103, 38, 120, 8, 29, 53, 116, 55, 39, 10, 63, 97)
20 | (53, 36, 103, 47, 55, 9, 29, 10)
21 | (56, 38, 27, 29, 50, 80, 83, 97, 55, 111, 96, 10)
22 | (45, 48, 23, 91, 41, 54, 2)
23 | (71, 29, 111, 102, 53, 120, 38, 47, 21, 10, 120, 39, 23, 71, 40, 52)
24 | (39, 38, 103, 117, 116, 97, 122, 10, 41, 59)
25 | (39, 31, 53, 36, 47, 30, 33, 9, 10)
26 | (53, 115, 86, 122, 67, 54, 30, 61, 46, 36, 10, 53)
27 | (105, 9, 27, 55, 46, 53, 103, 76, 46, 71, 39, 41, 39, 10, 109, 30)
28 | (27, 36, 103, 24, 53, 97, 53, 38, 69, 97, 57, 10, 29)
29 | (53, 97, 97, 21, 65, 105, 54, 120, 10, 122, 30, 28, 39, 53)
30 | (123, 54, 75, 59, 10, 53, 97, 86, 80, 115, 41, 50, 10)
31 | (103, 66, 117, 47, 54, 30, 120, 36, 65, 53, 29, 96, 61, 10)
32 | (21, 121, 97, 38, 31, 52, 70, 53, 71, 97, 56, 111, 40, 39, 65, 10, 53)
33 | (48, 112, 46, 110, 97, 53, 10)
34 | (29, 55, 39, 61, 27, 41, 36, 25, 103, 10)
35 | (104, 39, 41, 97, 53, 10, 26, 78, 55)
36 | (39, 99, 66, 111, 23, 25, 45, 10, 53, 75, 102, 74, 40, 105, 52, 71, 30)
37 | (46, 23, 120, 91)
38 | (59, 30, 120, 79, 38, 53, 115, 10)
39 | (111, 39, 10, 69, 90, 9, 29, 69, 10, 53)
40 | (57, 9, 26, 102, 39, 8, 111, 55, 10, 104, 1)
41 | (36, 38, 24, 64, 39, 53, 55, 9, 10, 118, 30)
42 | (47, 53, 111, 57, 120, 10, 38, 21, 39)
43 | (27, 104, 55, 57, 26, 103, 10, 29, 31, 36, 120, 102, 53)
44 | (103, 57, 39, 53, 79, 47, 54, 97, 50, 116, 56, 53, 36, 10)
45 | (102, 103, 31, 117, 59, 8, 36, 39, 75, 53, 76, 97, 70, 41, 122, 55)
46 | (41, 47, 104, 46)
47 | (53, 111, 120, 64, 36, 15, 122, 96, 121, 39, 10)
48 | (30, 48, 29, 120, 103, 96, 47, 29, 78, 21, 122, 41, 36, 10)
49 | (97, 65, 10, 111, 25, 74, 97, 53, 102, 120, 73, 55, 10, 53, 26)
50 | (10, 53, 122, 31, 36, 111, 10, 97)


--------------------------------------------------------------------------------
/rlcompopt/pipeline/lib/coreset_sorted.txt:
--------------------------------------------------------------------------------
 1 | (48, 112, 46, 110, 97, 53, 10)
 2 | (10, 53, 122, 31, 36, 111, 10, 97)
 3 | (39, 31, 53, 36, 47, 30, 33, 9, 10)
 4 | (41, 47, 104, 46)
 5 | (99, 111, 97, 40, 31, 47, 10, 36, 53)
 6 | (29, 72, 55, 103, 36, 122, 59, 30, 65, 53, 10)
 7 | (53, 36, 103, 47, 55, 9, 29, 10)
 8 | (111, 39, 10, 69, 90, 9, 29, 69, 10, 53)
 9 | (104, 39, 41, 97, 53, 10, 26, 78, 55)
10 | (27, 36, 103, 24, 53, 97, 53, 38, 69, 97, 57, 10, 29)
11 | (36, 38, 24, 64, 39, 53, 55, 9, 10, 118, 30)
12 | (47, 53, 111, 57, 120, 10, 38, 21, 39)
13 | (39, 38, 103, 117, 116, 97, 122, 10, 41, 59)
14 | (72, 71, 31, 36, 97, 103, 78, 47, 97, 53, 41, 120, 10, 52, 97)
15 | (31, 63, 29, 39, 93, 41, 74, 103, 120, 10, 55, 114, 55, 68, 57, 53, 95, 78, 97, 10)
16 | (97, 65, 10, 111, 25, 74, 97, 53, 102, 120, 73, 55, 10, 53, 26)
17 | (29, 55, 39, 61, 27, 41, 36, 25, 103, 10)
18 | (27, 39, 64, 55, 53, 38, 122, 31, 111, 64, 10, 39, 21, 105, 36)
19 | (53, 97, 97, 21, 65, 105, 54, 120, 10, 122, 30, 28, 39, 53)
20 | (50, 21, 120, 97, 39, 67, 10, 29, 47, 53, 79, 36, 97, 10)
21 | (65, 9, 55, 27, 105, 57, 103, 38, 120, 8, 29, 53, 116, 55, 39, 10, 63, 97)
22 | (57, 9, 26, 102, 39, 8, 111, 55, 10, 104, 1)
23 | (111, 57, 55, 120, 54, 36, 53, 122, 105, 95, 76, 47, 39, 97, 10)
24 | (29, 103, 102, 30, 36, 61, 29, 41, 71, 10, 61, 41, 52)
25 | (102, 10, 111, 30, 36, 121, 54, 55, 46, 50, 65, 75, 57, 9, 10, 104, 97, 53)
26 | (56, 38, 27, 29, 50, 80, 83, 97, 55, 111, 96, 10)
27 | (10, 64, 31, 10, 52, 111, 116, 36, 40, 48, 54, 30, 53, 114, 29, 120, 10)
28 | (91, 115, 46, 2)
29 | (47, 53, 36, 117, 9, 55, 74, 111, 116, 120, 9, 77, 29, 97, 10)
30 | (27, 104, 55, 57, 26, 103, 10, 29, 31, 36, 120, 102, 53)
31 | (102, 103, 31, 117, 59, 8, 36, 39, 75, 53, 76, 97, 70, 41, 122, 55)
32 | (102, 53, 97, 10, 57, 71, 41, 111, 39, 71, 45, 118, 23, 53)
33 | (30, 48, 29, 120, 103, 96, 47, 29, 78, 21, 122, 41, 36, 10)
34 | (21, 121, 97, 38, 31, 52, 70, 53, 71, 97, 56, 111, 40, 39, 65, 10, 53)
35 | (103, 57, 39, 53, 79, 47, 54, 97, 50, 116, 56, 53, 36, 10)
36 | (71, 29, 111, 102, 53, 120, 38, 47, 21, 10, 120, 39, 23, 71, 40, 52)
37 | (38, 10, 71, 39, 54, 102, 57, 103, 53, 46, 54, 116, 29, 10, 114, 41, 66)
38 | (59, 30, 120, 79, 38, 53, 115, 10)
39 | (99, 41, 31, 122, 36, 120, 29, 21, 111, 117, 48, 30, 10, 53)
40 | (105, 9, 27, 55, 46, 53, 103, 76, 46, 71, 39, 41, 39, 10, 109, 30)
41 | (59, 9, 10, 121, 114, 110, 120, 97, 10, 1, 21, 47, 53, 10, 96, 97)
42 | (39, 99, 66, 111, 23, 25, 45, 10, 53, 75, 102, 74, 40, 105, 52, 71, 30)
43 | (38, 47, 50, 24, 57, 30, 41, 72, 53, 56, 122, 97, 70, 15, 10, 26, 29, 53)
44 | (53, 111, 120, 64, 36, 15, 122, 96, 121, 39, 10)
45 | (46, 23, 120, 91)
46 | (103, 66, 117, 47, 54, 30, 120, 36, 65, 53, 29, 96, 61, 10)
47 | (53, 115, 86, 122, 67, 54, 30, 61, 46, 36, 10, 53)
48 | (45, 48, 23, 91, 41, 54, 2)
49 | (53, 91, 67, 86, 52, 61, 41, 29, 54, 10)
50 | (123, 54, 75, 59, 10, 53, 97, 86, 80, 115, 41, 50, 10)


--------------------------------------------------------------------------------
/scripts/generate_autophase_history_online.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | #!/bin/bash -x
 8 | 
 9 | outdir=outputs_rl/autophase
10 | 
11 | # run locally
12 | python -m rlcompopt.cl.generate --config-path conf/rl_online --config-name generate_autophase \
13 |     hydra.run.dir=$outdir \
14 |     outdir=$outdir \
15 |     n_model_workers=1 \
16 |     nproc=20


--------------------------------------------------------------------------------
/scripts/generate_graph_reward_history_online.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | #!/bin/bash -x
 8 | 
 9 | outdir=outputs_rl/cg_online
10 | 
11 | # run locally
12 | python -m rlcompopt.cl.generate --config-path conf/rl_online --config-name generate_online \
13 |     hydra.run.dir=$outdir \
14 |     outdir=$outdir \
15 |     nproc=5


--------------------------------------------------------------------------------
/scripts/rl_ppo_generator_submitit.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | #!/bin/bash -x
 8 | 
 9 | outdir=outputs_rl/cg_online2
10 | 
11 | # submit experiments to Slurm
12 | python -m rlcompopt.cl.generate --config-path conf/rl_online --config-name generate_online \
13 |     hydra.run.dir=$outdir \
14 |     outdir=$outdir \
15 |     submitit.log_dir=./log_dir


--------------------------------------------------------------------------------
/scripts/rl_ppo_trainer_submitit.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | outdir=outputs_rl/cg_online2
 8 | 
 9 | # submit experiments to Slurm
10 | python -m rlcompopt.train --config-path cl/conf/rl_online --config-name train_attn \
11 |     hydra.run.dir=$outdir \
12 |     dataset.train=$outdir/summary.db \
13 |     submitit.log_dir=./log_dir


--------------------------------------------------------------------------------
/scripts/submit_online_train_ppo_action_histogram.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | # This source code is licensed under the MIT license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | import datetime
  9 | import argparse
 10 | import subprocess
 11 | from typing import Dict
 12 | 
 13 | FS_LIMIT = 200  # file system path length limit
 14 | 
 15 | GEN = "python -m rlcompopt.cl.generate --config-path conf/rl_online --config-name generate_online "
 16 | type2_train = "python -m rlcompopt.train --config-path cl/conf/rl_online --config-name train_gnn_type2 "
 17 | TRAIN = {
 18 |     "gcn": "python -m rlcompopt.train --config-path cl/conf/rl_online --config-name train_gcn ",
 19 |     "gat": type2_train,
 20 |     "gcn_real": type2_train,
 21 |     "gin": type2_train,
 22 |     "attn": "python -m rlcompopt.train --config-path cl/conf/rl_online --config-name train_attn "
 23 | }
 24 | 
 25 | 
 26 | def make_str(s: str):
 27 |     assert not s.startswith("'")
 28 |     assert not s.endswith("'")
 29 |     return "'" + s + "'"
 30 | 
 31 | 
 32 | def fill_param(cmd: str, params: Dict = None, exclude={}):
 33 |     assert isinstance(cmd, str)
 34 |     exp_name = ""
 35 |     if params is not None:
 36 |         assert isinstance(params, dict)
 37 |         names = {k: v for k, v in params.items() if k not in exclude.keys()}
 38 |         exp_name = "_".join(f"{k.split('.')[-1]}_{v}" for k, v in names.items())
 39 |         params = " ".join(f"{k}={v}" for k, v in params.items())
 40 |         if not cmd.endswith(" "):
 41 |             cmd = cmd + " "
 42 |         cmd = cmd + params
 43 |     return cmd, exp_name
 44 | 
 45 | 
 46 | def submit(exp_name: str, dry_run=False):
 47 |     for gen_config, train_config in exp[exp_name]:
 48 |         gen_config = gen_config.copy()
 49 |         train_config = train_config.copy()
 50 |         gen_config.update(gen_config_common)
 51 |         train_config.update(train_config_common)
 52 |         gen_cmd, gen_name = fill_param(GEN, gen_config)
 53 |         train_cmd, train_name = fill_param(TRAIN[exp_name], train_config)
 54 | 
 55 |         now = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
 56 |         outdir = "_".join((now, train_name, gen_name))
 57 |         outdir = log_dir + outdir
 58 |         if len(outdir) > FS_LIMIT:
 59 |             print("Truncated outdir")
 60 |             outdir = outdir[:FS_LIMIT]  # file system limit: 256
 61 | 
 62 |         gen_basic = {
 63 |             "hydra.run.dir": make_str(outdir),
 64 |             "outdir": make_str(outdir),
 65 |         }
 66 | 
 67 |         train_basic = {
 68 |             "hydra.run.dir": make_str(outdir),
 69 |             "dataset.train": make_str(os.path.join(outdir, "summary.db")),
 70 |             # "model_db_path": make_str(os.path.join(outdir, "model.db")),
 71 |         }
 72 | 
 73 |         gen_cmd, _ = fill_param(gen_cmd, gen_basic)
 74 |         train_cmd, _ = fill_param(train_cmd, train_basic)
 75 | 
 76 |         if dry_run or local_run:
 77 |             print(f"{outdir=}")
 78 |             print(f"{gen_cmd=}")
 79 |             print(f"{train_cmd=}")
 80 |         if not local_run:
 81 |             print("====================")
 82 |             os.system(gen_cmd)
 83 |             print("\n")
 84 |             os.system(train_cmd)
 85 |         else:
 86 |             print("Running locally")
 87 |             subprocess.Popen(gen_cmd.split())
 88 |             subprocess.Popen(train_cmd.split())
 89 | 
 90 | 
 91 | # GCN
 92 | gcn_exp = []
 93 | gen_config = {}
 94 | train_config = {
 95 |     "model.gnn_type": "GatedGraphConv",
 96 | }
 97 | gcn_exp.append((gen_config, train_config.copy()))
 98 | 
 99 | # GAT
100 | gat_exp = []
101 | gen_config = {}
102 | train_config = {
103 |     "model.gnn_type": "GAT",
104 |     "model.entropy_factor": 0.0006,
105 |     "optim.weight_decay": 0,
106 | }
107 | gat_exp.append((gen_config, train_config.copy()))
108 | 
109 | # GIN
110 | gin_exp = []
111 | gen_config = {}
112 | train_config = {
113 |     "model.gnn_type": "GIN",
114 |     "model.entropy_factor": 0.003,
115 |     "optim.lr": 1e-5,
116 |     "optim.weight_decay": 0,
117 | }
118 | gin_exp.append((gen_config, train_config.copy()))
119 | 
120 | # GCN real
121 | gcn_real_exp = []
122 | gen_config = {}
123 | train_config = {
124 |     "model.gnn_type": "GCN",
125 | }
126 | gcn_real_exp.append((gen_config, train_config.copy()))
127 | 
128 | 
129 | # EdgeAttn
130 | attn_exp = []
131 | gen_config = {}
132 | train_config = {
133 |     "model.gnn_type": "EdgeAttn",
134 | }
135 | 
136 | attn_exp.append((gen_config.copy(), train_config.copy()))
137 | 
138 | exp = {
139 |     "gcn": gcn_exp,
140 |     "gat": gat_exp,
141 |     "gin": gin_exp,
142 |     "gcn_real": gcn_real_exp,
143 |     "attn": attn_exp
144 | }
145 | 
146 | 
147 | def submit_all(dry_run=True):
148 | 
149 |     submit("gcn", dry_run=dry_run)
150 |     submit("gcn_real", dry_run=dry_run)
151 |     submit("gat", dry_run=dry_run)
152 |     submit("gin", dry_run=dry_run)
153 |     submit("attn", dry_run=dry_run)
154 | 
155 | 
156 | def parse_args():
157 |     parser = argparse.ArgumentParser(
158 |         description='Run RL-PPO experiments locally or on Slurm.')
159 |     parser.add_argument(
160 |         "--submitit_log_dir", default="null", type=str, 
161 |         help="If null, run experiments locally, otherwise, submit them to Slurm via submitit.")
162 |     parser.add_argument(
163 |         "--nproc_per_node", default=80, type=int, 
164 |         help="This is for setting how many processes to use when experiments are run locally."
165 |              "For experiments on Slurm, the number is determined by 'submitit' entries in config file.")
166 |     parser.add_argument(
167 |         "--num_seeds", default=3, type=int,
168 |         help="number of seeds (runs) to repeat experiments")
169 |     parser.add_argument("--dry_run", action="store_true", help="whether it is a dry run.")
170 |     parser.add_argument(
171 |         "--log_dir", default="outputs_rl/", type=str,
172 |         help="log dir to save checkpoints and testing results")
173 |     parser.add_argument(
174 |         "--slurm_partition", default="", type=str,
175 |         help="slurm partition to use")
176 |     args = parser.parse_args()
177 |     return args
178 | 
179 | 
180 | if __name__ == "__main__":
181 |     args = parse_args()
182 |     nproc_per_node = args.nproc_per_node
183 |     submitit_log_dir = args.submitit_log_dir
184 |     partition = args.slurm_partition
185 |     log_dir = args.log_dir
186 | 
187 |     local_run = submitit_log_dir == "null"
188 | 
189 |     gen_config_common0 = {
190 |         "nproc": nproc_per_node,
191 |         "submitit.log_dir": submitit_log_dir,
192 |         "submitit.partition": partition,
193 |     }
194 |     train_config_common0 = {
195 |         "dataset.num_generators": nproc_per_node,
196 |         "submitit.log_dir": submitit_log_dir,
197 |         "submitit.partition": partition,
198 |     }
199 |     for seed in range(args.num_seeds):
200 |         gen_config_common = {"seed": seed, **gen_config_common0}
201 |         train_config_common = {"seed": seed, **train_config_common0}
202 |         submit_all(args.dry_run)
203 | 


--------------------------------------------------------------------------------
/scripts/submit_ppo_autophase_action_histogram.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  3 | 
  4 | # This source code is licensed under the MIT license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | import datetime
  9 | import argparse
 10 | import subprocess
 11 | from typing import Dict
 12 | 
 13 | FS_LIMIT = 200  # file system path length limit
 14 | 
 15 | GEN = "python -m rlcompopt.cl.generate --config-path conf/rl_online --config-name generate_autophase "
 16 | train = "python -m rlcompopt.train --config-path cl/conf/rl_online --config-name train_autophase "
 17 | TRAIN = {
 18 |     "autophase": train,
 19 | }
 20 | 
 21 | 
 22 | def make_str(s: str):
 23 |     assert not s.startswith("'")
 24 |     assert not s.endswith("'")
 25 |     return "'" + s + "'"
 26 | 
 27 | 
 28 | def fill_param(cmd: str, params: Dict = None, exclude={}):
 29 |     assert isinstance(cmd, str)
 30 |     exp_name = ""
 31 |     if params is not None:
 32 |         assert isinstance(params, dict)
 33 |         names = {k: v for k, v in params.items() if k not in exclude.keys()}
 34 |         exp_name = "_".join(f"{k.split('.')[-1]}_{v}" for k, v in names.items())
 35 |         params = " ".join(f"{k}={v}" for k, v in params.items())
 36 |         if not cmd.endswith(" "):
 37 |             cmd = cmd + " "
 38 |         cmd = cmd + params
 39 |     return cmd, exp_name
 40 | 
 41 | 
 42 | def submit(exp_name: str, dry_run=False):
 43 |     for gen_config, train_config in exp[exp_name]:
 44 |         gen_config = gen_config.copy()
 45 |         train_config = train_config.copy()
 46 |         gen_config.update(gen_config_common)
 47 |         train_config.update(train_config_common)
 48 |         gen_cmd, gen_name = fill_param(GEN, gen_config)
 49 |         train_cmd, train_name = fill_param(TRAIN[exp_name], train_config)
 50 | 
 51 |         now = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
 52 |         outdir = "_".join((now, train_name, gen_name))
 53 |         outdir = log_dir + outdir
 54 |         if len(outdir) > FS_LIMIT:
 55 |             print("Truncated outdir")
 56 |             outdir = outdir[:FS_LIMIT]  # file system limit: 256
 57 | 
 58 |         gen_basic = {
 59 |             "hydra.run.dir": make_str(outdir),
 60 |             "outdir": make_str(outdir),
 61 |         }
 62 | 
 63 |         train_basic = {
 64 |             "hydra.run.dir": make_str(outdir),
 65 |             "dataset.train": make_str(os.path.join(outdir, "summary.db")),
 66 |             # "model_db_path": make_str(os.path.join(outdir, "model.db")),
 67 |         }
 68 | 
 69 |         gen_cmd, _ = fill_param(gen_cmd, gen_basic)
 70 |         train_cmd, _ = fill_param(train_cmd, train_basic)
 71 | 
 72 |         if dry_run or local_run:
 73 |             print(f"{outdir=}")
 74 |             print(f"{gen_cmd=}")
 75 |             print(f"{train_cmd=}")
 76 |         if not local_run:
 77 |             print("====================")
 78 |             os.system(gen_cmd)
 79 |             print("\n")
 80 |             os.system(train_cmd)
 81 |         else:
 82 |             print("Running locally")
 83 |             subprocess.Popen(gen_cmd.split())
 84 |             subprocess.Popen(train_cmd.split())
 85 | 
 86 | 
 87 | exp = []
 88 | gen_config = {}
 89 | train_config = {}
 90 | 
 91 | train_config = {"optim.lr": "5e-5"}
 92 | exp.append((gen_config.copy(), train_config.copy()))
 93 | 
 94 | train_config = {"optim.lr": "7e-5", "model.entropy_factor": "0.0005"}
 95 | exp.append((gen_config.copy(), train_config.copy()))
 96 | 
 97 | exp = {
 98 |     "autophase": exp
 99 | }
100 | 
101 | def submit_all(dry_run=True):
102 | 
103 |     submit("autophase", dry_run=dry_run)
104 | 
105 | 
106 | def parse_args():
107 |     parser = argparse.ArgumentParser(
108 |         description='Run RL-PPO experiments locally or on Slurm.')
109 |     parser.add_argument(
110 |         "--submitit_log_dir", default="null", type=str, 
111 |         help="If null, run experiments locally, otherwise, submit them to Slurm via submitit.")
112 |     parser.add_argument(
113 |         "--nproc_per_node", default=80, type=int, 
114 |         help="This is for setting how many processes to use when experiments are run locally."
115 |              "For experiments on Slurm, the number is determined by 'submitit' entries in config file.")
116 |     parser.add_argument(
117 |         "--num_seeds", default=3, type=int,
118 |         help="number of seeds (runs) to repeat experiments")
119 |     parser.add_argument("--dry_run", action="store_true", help="whether it is a dry run.")
120 |     parser.add_argument(
121 |         "--log_dir", default="outputs_autophase_ppo/", type=str,
122 |         help="log dir to save checkpoints and testing results")
123 |     parser.add_argument(
124 |         "--slurm_partition", default="", type=str,
125 |         help="slurm partition to use")
126 |     args = parser.parse_args()
127 |     return args
128 | 
129 | 
130 | if __name__ == "__main__":
131 |     args = parse_args()
132 |     nproc_per_node = args.nproc_per_node
133 |     submitit_log_dir = args.submitit_log_dir
134 |     partition = args.slurm_partition
135 |     log_dir = args.log_dir
136 |     local_run = submitit_log_dir == "null"
137 | 
138 |     gen_config_common0 = {
139 |         "nproc": nproc_per_node,
140 |         "submitit.log_dir": submitit_log_dir,
141 |         "submitit.partition": partition,
142 |     }
143 |     train_config_common0 = {
144 |         "dataset.num_generators": nproc_per_node,
145 |         "submitit.log_dir": submitit_log_dir,
146 |         "submitit.partition": partition,
147 |     }
148 |     for seed in range(args.num_seeds):
149 |         gen_config_common = {"seed": seed, **gen_config_common0}
150 |         train_config_common = {"seed": seed, **train_config_common0}
151 |         submit_all(args.dry_run)
152 | 


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | export NUM_CPU=14
 8 | export NUM_GPU=1
 9 | 
10 | FILES="outputs/*"
11 | for f in $FILES
12 | do
13 |   echo "Processing $f "
14 |   CUDA_VISIBLE_DEVICES=0 python rlcompopt/eval_local.py --args_path "$f/args.pkl"
15 | done


--------------------------------------------------------------------------------
/scripts/train_autophase_bc.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | make install
 8 | 
 9 | for i in 0 1 2
10 | do
11 | python -m torch.distributed.launch --nproc_per_node=1 --use_env --master_port=54567 \
12 |     rlcompopt/train.py --config-name autophase_bc \
13 |     seed=$i
14 | done


--------------------------------------------------------------------------------
/scripts/train_autophase_history_online.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | outdir=outputs_rl/autophase
 8 | 
 9 | # run locally
10 | python -m torch.distributed.launch --nproc_per_node=1 --use_env --master_port=54597 \
11 |     rlcompopt/train.py --config-path cl/conf/rl_online --config-name train_autophase \
12 |     hydra.run.dir=$outdir \
13 |     dataset.train=$outdir/summary.db \
14 |     dataset.num_generators=20


--------------------------------------------------------------------------------
/scripts/train_autophase_offline_q_value_rank.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | make install
 8 | 
 9 | for i in 0 1 2
10 | do
11 | python -m torch.distributed.launch --nproc_per_node=1 --use_env --master_port=54567 \
12 |     rlcompopt/train.py --config-name autophase_q_value \
13 |     seed=$i
14 | done


--------------------------------------------------------------------------------
/scripts/train_autophase_offline_seq_cls.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | make install
 8 | 
 9 | for i in 0 1 2
10 | do
11 | python -m torch.distributed.launch --nproc_per_node=1 --use_env --master_port=54567 \
12 |     rlcompopt/train.py --config-name autophase \
13 |     seed=$i
14 | done


--------------------------------------------------------------------------------
/scripts/train_graph_edgeattn_bc.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | make install
 8 | 
 9 | for i in 0 1 2
10 | do
11 | python -m torch.distributed.launch --nproc_per_node=1 --use_env --master_port=54567 \
12 |     rlcompopt/train.py --config-name attn_bc \
13 |     model.gnn_type=EdgeAttn \
14 |     seed=$i
15 | done


--------------------------------------------------------------------------------
/scripts/train_graph_edgeattn_nvp.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | make install
 8 | 
 9 | for i in 0 1 2
10 | do
11 | python -m torch.distributed.launch --nproc_per_node=1 --use_env --master_port=54567 \
12 |     rlcompopt/train.py --config-name attn \
13 |     model.gnn_type=EdgeAttn \
14 |     seed=$i
15 | done


--------------------------------------------------------------------------------
/scripts/train_graph_gcn_nvp.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | make install
 8 | 
 9 | for i in 0 1 2
10 | do
11 | python -m torch.distributed.launch --nproc_per_node=1 --use_env --master_port=54567 \
12 |     rlcompopt/train.py --config-name gcn \
13 |     model.gnn_type=GatedGraphConv \
14 |     seed=$i
15 | done


--------------------------------------------------------------------------------
/scripts/train_graph_gcn_real_nvp.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | make install
 8 | 
 9 | for i in 0 1 2
10 | do
11 | python -m torch.distributed.launch --nproc_per_node=1 --use_env --master_port=54567 \
12 |     rlcompopt/train.py --config-name gcn_real \
13 |     model.gnn_type=GCN \
14 |     seed=$i
15 | done
16 | 


--------------------------------------------------------------------------------
/scripts/train_graph_gnn_type2_nvp.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | make install
 8 | 
 9 | for i in 0 1 2
10 | do
11 | python -m torch.distributed.launch --nproc_per_node=1 --use_env --master_port=54567 \
12 |     rlcompopt/train.py --config-name gnn_type2 \
13 |     model.gnn_type=$1 \
14 |     seed=$i
15 | done
16 | 


--------------------------------------------------------------------------------
/scripts/train_graph_offline_edgeattn_q_value_rank.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | make install
 8 | 
 9 | for i in 0 1 2
10 | do
11 | python -m torch.distributed.launch --nproc_per_node=1 --use_env --master_port=54567 \
12 |     rlcompopt/train.py --config-name attn_q_value \
13 |     model.gnn_type=EdgeAttn \
14 |     seed=$i
15 | done


--------------------------------------------------------------------------------
/scripts/train_graph_reward_history_online.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | outdir=outputs_rl/cg_online
 8 | 
 9 | # run locally
10 | python -m torch.distributed.launch --nproc_per_node=1 --use_env --master_port=54567 \
11 |     rlcompopt/train.py --config-path cl/conf/rl_online --config-name train_gcn \
12 |     hydra.run.dir=$outdir \
13 |     dataset.train=$outdir/summary.db \
14 |     dataset.num_generators=5


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import distutils.util
 8 | 
 9 | import setuptools
10 | 
11 | setuptools.setup(
12 |     name="rlcompopt",
13 |     version="0.0.1",
14 |     description="Compiler pass ordering with machine learning",
15 |     author="Facebook AI Research",
16 |     url="https://github.com/facebookresearch/RLCompOpt",
17 |     license="MIT",
18 |     packages=[
19 |         "rlcompopt",
20 |         "rlcompopt.env_wrapper",
21 |         "rlcompopt.cl",
22 |         "rlcompopt.cl.models",
23 |         "rlcompopt.pipeline",
24 |         "rlcompopt.pipeline.lib",
25 |     ],
26 |     package_data={
27 |         "rlcompopt": [
28 |             "env_wrapper/database_schema4.sql",
29 |             "env_wrapper/database_schema2.sql",
30 |             "env_wrapper/database_schema.sql",
31 |             "env_wrapper/database_model.sql",
32 |             "cl/conf/*",
33 |             "cl/conf/model/*",
34 |             "cl/database_socket.sql",
35 |         ]
36 |     },
37 |     python_requires=">=3.8",
38 |     platforms=[distutils.util.get_platform()],
39 |     zip_safe=False,
40 | )
41 | 


--------------------------------------------------------------------------------