├── tests └── .keep ├── .vscode └── settings.json ├── doc └── img │ ├── drills-logo.png │ ├── drills-header.png │ └── drills-architecture.png ├── scripts ├── README.md ├── sin_drills.tcl ├── multiplier_drills.tcl ├── square_drills.tcl ├── max_drills.tcl ├── sqrt_drills.tcl ├── log2_drills.tcl ├── adder_drills.tcl ├── hyp_drills.tcl ├── bar_drills.tcl └── div_drills.tcl ├── baseline ├── README.md ├── simulated-annealing │ ├── README.md │ ├── data.yml │ └── simulated-annealing.py └── greedy │ ├── README.md │ ├── data.yml │ └── greedy.py ├── .github └── ISSUE_TEMPLATE │ ├── contribution.md │ └── bug_report.md ├── requirements.txt ├── drills ├── fixed_optimization.py ├── __init__.py ├── features.py ├── scl_session.py ├── fpga_session.py └── model.py ├── CITATION.cff ├── params.yml ├── LICENSE ├── .gitignore ├── drills.py └── README.md /tests/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": ".venv/bin/python" 3 | } -------------------------------------------------------------------------------- /doc/img/drills-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scale-lab/DRiLLS/HEAD/doc/img/drills-logo.png -------------------------------------------------------------------------------- /doc/img/drills-header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scale-lab/DRiLLS/HEAD/doc/img/drills-header.png -------------------------------------------------------------------------------- /doc/img/drills-architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scale-lab/DRiLLS/HEAD/doc/img/drills-architecture.png -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | # DRiLLS Scripts 2 | 3 | This folder contains scripts generated by the design space exploration using DRiLLS agent. -------------------------------------------------------------------------------- /baseline/README.md: -------------------------------------------------------------------------------- 1 | This directory contains miscellaneous methods for combinatorial optimzation that we used en route to discovering the RL methodology. 2 | -------------------------------------------------------------------------------- /scripts/sin_drills.tcl: -------------------------------------------------------------------------------- 1 | # Script generated by DRiLLS agent 2 | 3 | read asap7.lib 4 | read sin.v 5 | 6 | strash 7 | rewrite 8 | 9 | write_verilog sin_synth_drills.v 10 | 11 | map -D 3800 12 | stime -------------------------------------------------------------------------------- /scripts/multiplier_drills.tcl: -------------------------------------------------------------------------------- 1 | # Script generated by DRiLLS agent 2 | 3 | read asap7.lib 4 | read multiplier.v 5 | 6 | strash 7 | balance 8 | refactor 9 | rewrite -z 10 | refactor 11 | 12 | write_verilog mul_synth_drills.v 13 | 14 | map -D 4000 15 | stime 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/contribution.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Contribution First Step 3 | about: Suggest a contribution for DRiLLS 4 | labels: 5 | --- 6 | 7 | **What?** 8 | A clear and concise description of what you want to contribute. 9 | 10 | **Why?** 11 | Add any other context about the contribution here. -------------------------------------------------------------------------------- /scripts/square_drills.tcl: -------------------------------------------------------------------------------- 1 | # Script generated by DRiLLS agent 2 | 3 | read asap7.lib 4 | read square.v 5 | 6 | strash 7 | rewrite -z 8 | refactor -z 9 | balance 10 | balance 11 | resub 12 | resub 13 | resub 14 | refactor 15 | rewrite -z 16 | resub -z 17 | resub 18 | balance 19 | refactor -z 20 | resub 21 | refactor -z 22 | 23 | write_verilog square_synth_drills.v 24 | 25 | map -D 2200 26 | stime 27 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.8.1 2 | astor==0.8.0 3 | gast==0.2.2 4 | google-pasta==0.1.7 5 | grpcio==1.24.3 6 | h5py==2.10.0 7 | joblib==0.14.0 8 | Keras-Applications==1.0.8 9 | Keras-Preprocessing==1.1.0 10 | Markdown==3.1.1 11 | numpy==1.17.2 12 | opt-einsum==3.1.0 13 | protobuf==3.10.0 14 | pyfiglet==0.8.post1 15 | PyYAML==5.1.2 16 | six==1.12.0 17 | tensorflow==1.12.0 18 | termcolor==1.1.0 19 | Werkzeug==0.16.0 20 | wrapt==1.11.2 21 | -------------------------------------------------------------------------------- /scripts/max_drills.tcl: -------------------------------------------------------------------------------- 1 | # Script generated by DRiLLS agent 2 | 3 | read asap7.lib 4 | read max.v 5 | 6 | strash 7 | refactor -z 8 | resub 9 | rewrite 10 | resub -z 11 | refactor 12 | balance 13 | resub 14 | rewrite -z 15 | resub -z 16 | resub 17 | refactor -z 18 | rewrite -z 19 | resub -z 20 | refactor 21 | refactor 22 | balance 23 | 24 | write_verilog max_synth_drills.v 25 | 26 | map -D 4000 27 | stime 28 | -------------------------------------------------------------------------------- /scripts/sqrt_drills.tcl: -------------------------------------------------------------------------------- 1 | # Script generated by DRiLLS agent 2 | 3 | read asap7.lib 4 | read sqrt.v 5 | 6 | strash 7 | refactor -z 8 | resub -z 9 | refactor -z 10 | refactor -z 11 | refactor -z 12 | refactor 13 | balance 14 | rewrite -z 15 | rewrite -z 16 | resub 17 | refactor -z 18 | refactor -z 19 | refactor -z 20 | balance 21 | resub -z 22 | resub -z 23 | resub -z 24 | refactor -z 25 | rewrite -z 26 | 27 | write_verilog sqrt_synth_drills.v 28 | 29 | map -D 170000 30 | stime 31 | -------------------------------------------------------------------------------- /scripts/log2_drills.tcl: -------------------------------------------------------------------------------- 1 | # Script generated by DRiLLS agent 2 | 3 | read asap7.lib 4 | read log2.v 5 | 6 | strash 7 | resub -z 8 | refactor -z 9 | refactor -z 10 | refactor 11 | resub 12 | refactor 13 | refactor -z 14 | refactor -z 15 | refactor 16 | resub -z 17 | resub -z 18 | refactor -z 19 | refactor -z 20 | resub -z 21 | refactor 22 | refactor -z 23 | resub -z 24 | refactor 25 | resub 26 | refactor 27 | resub -z 28 | resub -z 29 | balance 30 | 31 | write_verilog log2_synth_drills.v 32 | 33 | map -D 7500 34 | stime 35 | -------------------------------------------------------------------------------- /scripts/adder_drills.tcl: -------------------------------------------------------------------------------- 1 | # Script generated by DRiLLS agent 2 | 3 | read asap7.lib 4 | read adder.v 5 | 6 | strash 7 | refactor 8 | rewrite -z 9 | rewrite -z 10 | rewrite -z 11 | resub -z 12 | refactor 13 | refactor 14 | rewrite -z 15 | refactor 16 | rewrite 17 | rewrite 18 | resub 19 | rewrite 20 | rewrite 21 | resub 22 | balance 23 | resub 24 | refactor -z 25 | balance 26 | rewrite 27 | rewrite -z 28 | balance 29 | resub -z 30 | rewrite -z 31 | refactor 32 | rewrite -z 33 | refactor -z 34 | 35 | write_verilog adder_synth_drills.v 36 | 37 | map -D 2000 38 | stime -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help DRiLLS improve 4 | labels: 5 | --- 6 | 7 | **Describe the bug** 8 | A clear and concise description of what the bug is. 9 | 10 | **To Reproduce** 11 | Steps to reproduce the behavior: 12 | 1. Clone the latest DRiLLS 13 | 2. Modify `data.yml` file to ... 14 | 3. Provide how you run the agent 15 | 4. Provide an output of the failrure 16 | 17 | **Expected behavior** 18 | A clear and concise description of what you expected to happen. 19 | 20 | **Additional context** 21 | Add any other context about the problem here. -------------------------------------------------------------------------------- /drills/fixed_optimization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019, SCALE Lab, Brown University 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the BSD-style license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | def optimize_with_fixed_script(params, fixed_script_file): 10 | """ 11 | Optimizes the design with the fixed script and writes down a new design file 12 | """ 13 | optimized_design_file = None 14 | # TODO: run an scl session with the fixed script. 15 | 16 | params.design_file = optimized_design_file 17 | return params -------------------------------------------------------------------------------- /baseline/simulated-annealing/README.md: -------------------------------------------------------------------------------- 1 | # Combinatorial Optimization with Simulated Annealing 2 | The algorithm takes an initial input design and try to optimize it using a simulated annealing approach, where the temperature changes over the iteration and the target metric (area/delay) is modified accordingly. 3 | 4 | ## How to run 5 | - Install dependencies: `pip3 install pyyaml joblib` 6 | - Edit `data.yml` file to specify your design file, library file, output directory and modify other parameters 7 | - Run using: `python3 simulated-annealing.py data.yml` 8 | - Logs and results are written to the `output_dir` specified in the `data.yml` file. 9 | -------------------------------------------------------------------------------- /scripts/hyp_drills.tcl: -------------------------------------------------------------------------------- 1 | # Script generated by DRiLLS agent 2 | 3 | read asap7.lib 4 | read hyp.v 5 | 6 | strash 7 | rewrite 8 | resub 9 | resub 10 | resub 11 | resub 12 | resub 13 | resub 14 | balance 15 | balance 16 | rewrite -z 17 | resub 18 | resub 19 | resub 20 | refactor -z 21 | resub 22 | resub 23 | resub 24 | resub 25 | resub 26 | refactor -z 27 | resub 28 | resub -z 29 | resub 30 | resub 31 | resub -z 32 | resub 33 | resub 34 | resub 35 | resub 36 | balance 37 | balance 38 | resub 39 | resub -z 40 | resub 41 | rewrite -z 42 | 43 | write_verilog hyp_synth_drills.v 44 | 45 | map -D 1000000 46 | stime 47 | -------------------------------------------------------------------------------- /baseline/greedy/README.md: -------------------------------------------------------------------------------- 1 | # Greedy Combinatorial Optimization 2 | The algorithm takes an initial input design and spawns parallel threads to perform each of the given transformations on the design. Then, it keeps the design with the minimum area for the next iteration, whether it meets the delay constraint or not. After that, it continues until no improvements in the design area is made. 3 | 4 | ## How to run 5 | - Install dependencies: `pip3 install pyyaml joblib` 6 | - Edit `data.yml` file to specify your design file, library file, output directory and modify other parameters 7 | - Run using: `python3 greedy.py data.yml` 8 | - Logs and results are written to the `output_dir` specified in the `data.yml` file. 9 | 10 | -------------------------------------------------------------------------------- /drills/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019, SCALE Lab, Brown University 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | """ 8 | A package to manage DRiLLS implementation; utilizing ABC and Tensorflow 9 | ... 10 | 11 | Classes: 12 | -------- 13 | SCLSession: to manage the logic synthesis environment when using a standard cell library 14 | FPGASession: to manage the logic synthesis environment when using FPGAs 15 | A2C: contains the deep neural network model (Advantage Actor Critic) 16 | 17 | Helpers: 18 | -------- 19 | yosys_stats: extract design metrics using yosys 20 | abc_stats: extract design metrics using ABC 21 | extract_features: extract design features used as input to the model 22 | """ -------------------------------------------------------------------------------- /scripts/bar_drills.tcl: -------------------------------------------------------------------------------- 1 | # Script generated by DRiLLS agent 2 | 3 | read asap7.lib 4 | read bar.v 5 | 6 | strash 7 | rewrite -z 8 | balance 9 | rewrite -z 10 | refactor 11 | refactor -z 12 | refactor 13 | resub 14 | refactor -z 15 | refactor 16 | rewrite 17 | refactor -z 18 | rewrite -z 19 | resub 20 | refactor -z 21 | rewrite -z 22 | refactor 23 | balance 24 | resub -z 25 | rewrite 26 | rewrite -z 27 | rewrite 28 | rewrite 29 | rewrite -z 30 | resub 31 | resub 32 | balance 33 | rewrite 34 | resub -z 35 | balance 36 | rewrite -z 37 | rewrite -z 38 | resub -z 39 | rewrite 40 | rewrite 41 | refactor 42 | refactor 43 | refactor 44 | balance 45 | resub -z 46 | rewrite -z 47 | resub -z 48 | resub -z 49 | balance 50 | refactor -z 51 | refactor -z 52 | rewrite -z 53 | 54 | write_verilog bar_synth_drills.v 55 | 56 | map -D 800 57 | stime 58 | -------------------------------------------------------------------------------- /scripts/div_drills.tcl: -------------------------------------------------------------------------------- 1 | # Script generated by DRiLLS agent 2 | 3 | read asap7.lib 4 | read div.v 5 | 6 | strash 7 | rewrite 8 | refactor -z 9 | refactor -z 10 | refactor -z 11 | resub 12 | rewrite -z 13 | resub -z 14 | refactor -z 15 | rewrite 16 | resub -z 17 | rewrite -z 18 | rewrite 19 | rewrite -z 20 | resub 21 | refactor 22 | resub 23 | refactor -z 24 | refactor -z 25 | refactor -z 26 | refactor 27 | refactor 28 | rewrite 29 | resub -z 30 | refactor -z 31 | resub 32 | refactor -z 33 | resub -z 34 | resub -z 35 | refactor -z 36 | resub 37 | refactor -z 38 | resub -z 39 | refactor 40 | resub -z 41 | resub -z 42 | resub 43 | refactor -z 44 | rewrite 45 | refactor -z 46 | rewrite 47 | rewrite 48 | rewrite -z 49 | resub 50 | refactor 51 | rewrite -z 52 | refactor -z 53 | refactor 54 | refactor 55 | resub 56 | 57 | write_verilog div_synth_drills.v 58 | 59 | map -D 75000 60 | stime 61 | -------------------------------------------------------------------------------- /baseline/simulated-annealing/data.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019, SCALE Lab, Brown University 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # ====================================================================== 8 | # This file holds parameters for running a simulated annealing algorithm 9 | # that optimizes a logic synthesis flow using ABC 10 | 11 | # path of the design file in one of the accepted formats by ABC 12 | design_file: my-design.blif 13 | 14 | # the directory to hold the output of the iterations 15 | output_dir: result 16 | 17 | mapping: 18 | clock_period: 150 # in pico seconds 19 | library_file: my-library.lib 20 | 21 | iterations: 100 22 | 23 | # add more optimization to the toolbox 24 | optimizations: 25 | - rewrite 26 | - rewrite -z 27 | - refactor 28 | - refactor -z 29 | - resub 30 | - resub -z 31 | - balance 32 | 33 | # Parameters for the simulated annealing algorithm 34 | simulated_annealing: 35 | initial_temp: 3 36 | cooling_rate: 0.9 37 | -------------------------------------------------------------------------------- /baseline/greedy/data.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019, SCALE Lab, Brown University 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # ======================================================================== 8 | # This file holds parameters for running a greedy algorithm that optimizes 9 | # a logic synthesis flow using ABC 10 | 11 | # path of the design file in one of the accepted formats by ABC 12 | design_file: my-design.blif 13 | 14 | # the directory to hold the output of the iterations 15 | output_dir: result 16 | 17 | mapping: 18 | clock_period: 150 # in pico seconds 19 | library_file: tech.lib 20 | 21 | # the number of iterations for the greedy optimization 22 | iterations: 100 23 | 24 | # add more optimization to the toolbox 25 | optimizations: 26 | - rewrite 27 | - rewrite -z 28 | - refactor 29 | - refactor -z 30 | - resub 31 | - resub -z 32 | - balance 33 | 34 | # when the greedy algorithm get stuck, it applies one of the below post-mapping commands 35 | post_mapping_commands: 36 | - dnsize -D 150 37 | - upsize -D 150 38 | - buffer 39 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If this code helped your research, please cite it as below." 3 | authors: 4 | - family-names: "Hosny" 5 | given-names: "Abdelrahman" 6 | orcid: "https://orcid.org/0000-0003-4020-7973" 7 | - family-names: "Hashemi" 8 | given-names: "Soheil" 9 | - family-names: "Shalan" 10 | given-names: "Mohamed" 11 | - family-names: "Reda" 12 | given-names: "Sherief" 13 | title: "DRiLLS: Deep Reinforcement Learning for Logic Synthesis" 14 | version: 1.0.0 15 | doi: 10.1109/ASP-DAC47756.2020.9045559 16 | date-released: 2019-11-11 17 | url: "https://github.com/scale-lab/DRiLLS" 18 | preferred-citation: 19 | type: article 20 | authors: 21 | - family-names: "Hosny" 22 | given-names: "Abdelrahman" 23 | orcid: "https://orcid.org/0000-0003-4020-7973" 24 | - family-names: "Hashemi" 25 | given-names: "Soheil" 26 | - family-names: "Shalan" 27 | given-names: "Mohamed" 28 | - family-names: "Reda" 29 | given-names: "Sherief" 30 | doi: "10.1109/ASP-DAC47756.2020.9045559" 31 | journal: "2020 25th Asia and South Pacific Design Automation Conference (ASP-DAC)" 32 | month: 9 33 | start: 581 # First page number 34 | end: 586 # Last page number 35 | title: "DRiLLS: Deep Reinforcement Learning for Logic Synthesis" 36 | year: 2020 -------------------------------------------------------------------------------- /params.yml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019, SCALE Lab, Brown University 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the BSD-style license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | # ====================================================================== 8 | # This file holds parameters for running a DRiLLS agent for training and 9 | # inference. It sets up the RL environment along with the logic synthesis 10 | # environment to train the RL agent. 11 | 12 | # change this to the abc binary path if the command is not recognized system-wide 13 | abc_binary: yosys-abc 14 | yosys_binary: yosys 15 | 16 | # path of the design file in one of the accepted formats by ABC 17 | design_file: design.v 18 | 19 | # standard cell library mapping 20 | mapping: 21 | clock_period: 150 # in pico seconds 22 | library_file: tech.lib 23 | 24 | # FPGA mapping - exlusive with the above 25 | fpga_mapping: 26 | levels: 100 27 | lut_inputs: 6 28 | 29 | # add more optimization to the toolbox 30 | optimizations: 31 | - rewrite 32 | - rewrite -z 33 | - refactor 34 | - refactor -z 35 | - resub 36 | - resub -z 37 | - balance 38 | 39 | # the directory to hold the playground an agent uses to practice 40 | playground_dir: playground 41 | 42 | # agent training parameters 43 | episodes: 100 44 | iterations: 50 45 | model_dir: /tmp/brain/model.ckpt # must be absolute path -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, SCALE Lab, Brown University 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # celery beat schedule file 86 | celerybeat-schedule 87 | 88 | # SageMath parsed files 89 | *.sage.py 90 | 91 | # Environments 92 | .env 93 | .venv 94 | env/ 95 | venv/ 96 | ENV/ 97 | env.bak/ 98 | venv.bak/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | .dmypy.json 113 | dmypy.json 114 | 115 | # Pyre type checker 116 | .pyre/ -------------------------------------------------------------------------------- /drills.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019, SCALE Lab, Brown University 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the BSD-style license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | import yaml 10 | import os 11 | import argparse 12 | import datetime 13 | import numpy as np 14 | import time 15 | from drills.model import A2C 16 | from drills.fixed_optimization import optimize_with_fixed_script 17 | from pyfiglet import Figlet 18 | 19 | def log(message): 20 | print('[DRiLLS {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message) 21 | 22 | class CapitalisedHelpFormatter(argparse.ArgumentDefaultsHelpFormatter): 23 | def add_usage(self, usage, actions, groups, prefix=None): 24 | if prefix is None: 25 | prefix = 'Usage: ' 26 | return super(CapitalisedHelpFormatter, self).add_usage(usage, actions, groups, prefix) 27 | 28 | if __name__ == '__main__': 29 | parser = argparse.ArgumentParser(add_help=True, formatter_class=CapitalisedHelpFormatter, \ 30 | description='Performs logic synthesis optimization using RL') 31 | parser._positionals.title = 'Positional arguments' 32 | parser._optionals.title = 'Optional arguments' 33 | parser.add_argument('-v', '--version', action='version', \ 34 | version = 'DRiLLS v0.1', help="Shows program's version number and exit") 35 | parser.add_argument("-l", "--load_model", action='store_true', \ 36 | help="Loads a saved Tensorflow model") 37 | parser.add_argument("-s", "--fixed_script", type=open, \ 38 | help="Executes a fixed optimization script before DRiLLS") 39 | parser.add_argument("mode", type=str, choices=['train', 'optimize'], \ 40 | help="Use the design to train the model or only optimize it") 41 | parser.add_argument("mapping", type=str, choices=['scl', 'fpga'], \ 42 | help="Map to standard cell library or FPGA") 43 | parser.add_argument("params", type=open, nargs='?', default='params.yml', \ 44 | help="Path to the params.yml file") 45 | args = parser.parse_args() 46 | 47 | options = yaml.load(args.params, Loader=yaml.FullLoader) 48 | 49 | f = Figlet(font='slant') 50 | print(f.renderText('DRiLLS')) 51 | 52 | if args.fixed_script: 53 | params = optimize_with_fixed_script(params, args.fixed_script) 54 | 55 | if args.mapping == 'scl': 56 | fpga_mapping = False 57 | else: 58 | fpga_mapping = True 59 | 60 | if args.mode == 'train': 61 | log('Starting to train the agent ..') 62 | 63 | all_rewards = [] 64 | learner = A2C(options, load_model=args.load_model, fpga_mapping=fpga_mapping) 65 | training_start_time = time.time() 66 | for i in range(options['episodes']): 67 | log('Episode: ' + str(i+1)) 68 | start = time.time() 69 | total_reward = learner.train_episode() 70 | end = time.time() 71 | all_rewards.append(total_reward) 72 | log('Episode: ' + str(i) + ' - done with total reward = ' + str(total_reward)) 73 | log('Episode ' + str(i) + ' Run Time ~ ' + str((start - end) / 60) + ' minutes.') 74 | print('') 75 | training_end_time = time.time() 76 | log('Total Training Run Time ~ ' + str((training_end_time - training_start_time) / 60) + ' minutes.') 77 | 78 | mean_reward = np.mean(all_rewards[-100:]) 79 | elif args.mode == 'optimize': 80 | log('Starting agent to optimize') 81 | learner = A2C(options, load_model=True) 82 | for _ in range(options['iterations']): 83 | # TODO: iteratively run the optimizer 84 | pass -------------------------------------------------------------------------------- /drills/features.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019, SCALE Lab, Brown University 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the BSD-style license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | import re 10 | import numpy as np 11 | import datetime 12 | from multiprocessing import Process, Manager 13 | from subprocess import check_output 14 | from collections import defaultdict 15 | 16 | def log(message): 17 | print('[DRiLLS {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message) 18 | 19 | def yosys_stats(design_file, yosys_binary, stats): 20 | yosys_command = "read_verilog " + design_file + "; stat" 21 | try: 22 | proc = check_output([yosys_binary, '-QT', '-p', yosys_command]) 23 | lines = proc.decode("utf-8").split('\n') 24 | for line in lines: 25 | if 'Number of wires' in line: 26 | stats['number_of_wires'] = int(line.strip().split()[-1]) 27 | if 'Number of public wires' in line: 28 | stats['number_of_public_wires'] = int(line.strip().split()[-1]) 29 | if 'Number of cells' in line: 30 | stats['number_of_cells'] = float(line.strip().split()[-1]) 31 | if '$and' in line: 32 | stats['ands'] = float(line.strip().split()[-1]) 33 | if '$or' in line: 34 | stats['ors'] = float(line.strip().split()[-1]) 35 | if '$not' in line: 36 | stats['nots'] = float(line.strip().split()[-1]) 37 | 38 | # catch some design special cases 39 | if 'ands' not in stats: 40 | stats['ands'] = 0.0 41 | if 'ors' not in stats: 42 | stats['ors'] = 0.0 43 | if 'nots' not in stats: 44 | stats['nots'] = 0.0 45 | except Exception as e: 46 | print(e) 47 | return None 48 | return stats 49 | 50 | def abc_stats(design_file, abc_binary, stats): 51 | abc_command = "read_verilog " + design_file + "; print_stats" 52 | try: 53 | proc = check_output([abc_binary, '-c', abc_command]) 54 | lines = proc.decode("utf-8").split('\n') 55 | for line in lines: 56 | if 'i/o' in line: 57 | ob = re.search(r'i/o *= *[0-9]+ */ *[0-9]+', line) 58 | stats['input_pins'] = int(ob.group().split('=')[1].strip().split('/')[0].strip()) 59 | stats['output_pins'] = int(ob.group().split('=')[1].strip().split('/')[1].strip()) 60 | 61 | ob = re.search(r'edge *= *[0-9]+', line) 62 | stats['edges'] = int(ob.group().split('=')[1].strip()) 63 | 64 | ob = re.search(r'lev *= *[0-9]+', line) 65 | stats['levels'] = int(ob.group().split('=')[1].strip()) 66 | 67 | ob = re.search(r'lat *= *[0-9]+', line) 68 | stats['latches'] = int(ob.group().split('=')[1].strip()) 69 | except Exception as e: 70 | print(e) 71 | return None 72 | 73 | return stats 74 | 75 | def extract_features(design_file, yosys_binary='yosys', abc_binary='abc'): 76 | ''' 77 | Returns features of a given circuit as a tuple. 78 | Features are listed below 79 | ''' 80 | manager = Manager() 81 | stats = manager.dict() 82 | p1 = Process(target=yosys_stats, args=(design_file, yosys_binary, stats)) 83 | p2 = Process(target=abc_stats, args=(design_file, abc_binary, stats)) 84 | p1.start() 85 | p2.start() 86 | p1.join() 87 | p2.join() 88 | 89 | # normalized features 90 | features = defaultdict(float) 91 | 92 | # (1) - number of input/output pins 93 | features['input_pins'] = stats['input_pins'] 94 | features['output_pins'] = stats['output_pins'] 95 | 96 | # (2) - number of nodes and edges 97 | features['number_of_nodes'] = stats['number_of_cells'] 98 | features['number_of_edges'] = stats['edges'] 99 | 100 | # (3) - number of levels 101 | features['number_of_levels'] = stats['levels'] 102 | 103 | # (4) - number of latches 104 | features['number_of_latches'] = stats['latches'] 105 | 106 | # (5) - gate types percentages 107 | features['percentage_of_ands'] = stats['ands'] / stats['number_of_cells'] 108 | features['percentage_of_ors'] = stats['ors'] / stats['number_of_cells'] 109 | features['percentage_of_nots'] = stats['nots'] / stats['number_of_cells'] 110 | 111 | return np.array([features['input_pins'], features['output_pins'], \ 112 | features['number_of_nodes'], features['number_of_edges'], \ 113 | features['number_of_levels'], features['number_of_latches'], \ 114 | features['percentage_of_ands'], features['percentage_of_ors'], features['percentage_of_nots']]) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DRiLLS 2 | Deep Reinforcement Learning for Logic Synthesis Optimization 3 | 4 | ## Abstract 5 | Logic synthesis requires extensive tuning of the synthesis optimization flow where the quality of results (QoR) depends on the sequence of opti-mizations used. Efficient design space exploration ischallenging due to the exponential number of possible optimization permutations. Therefore, automating the optimization process is necessary. In this work, we propose a novel reinforcement learning-based methodology that navigates the optimization space without human intervention. We demonstrate the training of an Advantage Actor Critic (A2C) agent that seeks to minimize area subject to a timing constraint. Using the proposed framework, designs can be optimized autonomously with no-humans in-loop. 6 | 7 | ## Paper 8 | DRiLLS has been presented at ASP-DAC 2020 and the manuscript is available on [IEEE Xplore](https://ieeexplore.ieee.org/abstract/document/9045559). A pre-print version is available on [arXiv](https://arxiv.org/abs/1911.04021). 9 | 10 | ## Setup 11 | DRiLLS requires `Python 3.6`, `pip3` and `virtualenv` installed on the system. 12 | 13 | 1. `virtualenv .venv --python=python3` 14 | 2. `source .venv/bin/activate` 15 | 3. `pip install -r requirements.txt` 16 | 17 | :warning: WARNING :warning: 18 | 19 | Since TensorFlow 2.x is not compatible with TensorFlow 1.x, this implementation is tested only on Python 3.6. 20 | If you have a newer version of Python, `pip` won't be able to find `tensorflow==1.x`. 21 | 22 | 23 | ## Run the agent 24 | 25 | 1. Edit `params.yml` file. Comments in the file illustrate the individual fields. 26 | 2. Run `python drills.py train scl` 27 | 28 | For help, `python drills.py -help` 29 | 30 | ## How It Works 31 | 32 | 33 | There are two major components in DRiLLS framework: 34 | 35 | * **Logic Synthesis** environment: a setup of the design space exploration problem as a reinforcement learning task. The logic synthesis environment is implemented as a session in [drills/scl_session.py](drills/scl_session.py) and [drills/fpga_session.py](drills/fpga_session.py). 36 | * **Reinforcement Learning** environment: it employs an *Advantage Actor Critic agent (A2C)* to navigate the environment searching for the best optimization at a given state. It is implemented in [drills/model.py](drills/model.py) and uses [drills/features.py](drills/features.py) to extract AIG features. 37 | 38 | DRiLLS agent exploring the design space of [Max](https://github.com/lsils/benchmarks/blob/master/arithmetic/max.v) design. 39 | 40 | ![](https://media.giphy.com/media/XbbW4WjeLuqneVbGEU/giphy.gif) 41 | 42 | For more details on the inner-workings of the framework, see Section 4 in [the paper](https://github.com/scale-lab/DRiLLS/blob/drills-preprint/doc/preprint/DRiLLS_preprint_AH.pdf). 43 | 44 | ## Reporting Bugs 45 | Please, use [ISSUE_TEMPLATE/bug_report.md](.github/ISSUE_TEMPLATE/bug_report.md) to create an issue and describe your bug. 46 | 47 | ## Contributing 48 | Below is a list of suggested contributions you can make. Before you work on any, it is advised that you create an issue using the [ISSUE_TEMPLATE/contribution.md](.github/ISSUE_TEMPLATE/contribution.md) to tell us what you plan to work on. This ensures that your work can be merged to the `master` branch in a timely manner. 49 | 50 | ### Modernize Tensorflow Implementation 51 | 52 | Google has recently released [Dopamine](https://github.com/google/dopamine) which sets up a framework for researching reinforcement learning algorithms. A new version of DRiLLS would adopt Dopamine to make it easier to implement the model and session classes. If you are new to Dopamine and want to try it on a real use case, it would be a great fit for DRiLLS and will add a great value to our repository. 53 | 54 | ### Better Integration 55 | The current implementation interacts with the logic synthesis environment using files. This affects the run time of the agent training as it tries to extract features and statistics through files. A better integrations keeps a session of `yosys` and `abc` where the design is loaded once in the beginning and the feature extraction (and results extraction) are retrieved through this open session. 56 | 57 | ### Study An Enhanced Model 58 | The goal is to enhance the model architecture used in [drills/model.py]. An enhancement should give better results (less area **AND** meets timing constraints): 59 | * Deeper network architecure. 60 | * Changing gamma rate. 61 | * Changing learning rate. 62 | * Improve normalization. 63 | 64 | ## Citation 65 | ``` 66 | @INPROCEEDINGS{9045559, 67 | author={A. {Hosny} and S. {Hashemi} and M. {Shalan} and S. {Reda}}, 68 | booktitle={2020 25th Asia and South Pacific Design Automation Conference (ASP-DAC)}, 69 | title={DRiLLS: Deep Reinforcement Learning for Logic Synthesis}, 70 | year={2020}, 71 | volume={}, 72 | number={}, 73 | pages={581-586},} 74 | ``` 75 | 76 | ## License 77 | BSD 3-Clause License. See [LICENSE](LICENSE) file 78 | -------------------------------------------------------------------------------- /baseline/greedy/greedy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019, SCALE Lab, Brown University 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the BSD-style license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | import yaml 10 | import os 11 | import subprocess 12 | import sys 13 | import timeit 14 | import re 15 | from joblib import Parallel, delayed 16 | 17 | data_file = sys.argv[1] 18 | 19 | with open(data_file, 'r') as f: 20 | options = yaml.load(f) 21 | 22 | start = timeit.default_timer() 23 | 24 | optimizations = options['optimizations'] 25 | iterations = options['iterations'] 26 | current_design_file = options['design_file'] 27 | library_file = options['mapping']['library_file'] 28 | clock_period = options['mapping']['clock_period'] 29 | post_mapping_optimizations = options['post_mapping_commands'] 30 | 31 | # Create directory if not exists 32 | if not os.path.exists(options['output_dir']): 33 | os.makedirs(options['output_dir']) 34 | 35 | def extract_results(stats): 36 | """ 37 | extracts area and delay from the printed stats on stdout 38 | """ 39 | line = stats.decode("utf-8").split('\n')[-2].split(':')[-1].strip() 40 | 41 | ob = re.search(r'Delay *= *[1-9]+.?[0-9]*', line) 42 | delay = float(ob.group().split('=')[1].strip()) 43 | ob = re.search(r'Area *= *[1-9]+.?[0-9]*', line) 44 | area = float(ob.group().split('=')[1].strip()) 45 | return delay, area 46 | 47 | def run_optimization(output_dir, optimization, design_file, library): 48 | """ 49 | returns new_design_file, delay, area 50 | """ 51 | output_dir = output_dir.replace(' ', '_') 52 | if not os.path.exists(output_dir): 53 | os.makedirs(output_dir) 54 | output_design_file = output_dir + '/design.blif' 55 | 56 | abc_command = 'read ' + library + '; ' 57 | abc_command += 'read ' + design_file + '; ' 58 | abc_command += 'strash; ' 59 | abc_command += optimization + '; ' 60 | abc_command += 'write ' + output_design_file + '; ' 61 | abc_command += 'map -D ' + str(clock_period) + '; ' 62 | abc_command += 'topo; stime; ' 63 | 64 | proc = subprocess.check_output(['yosys-abc','-c', abc_command]) 65 | d, a = extract_results(proc) 66 | return output_design_file, d, a 67 | 68 | def save_optimization_step(iteration, optimization, delay, area): 69 | """ 70 | saves the winning optimization to a csv file 71 | """ 72 | with open(os.path.join(options['output_dir'], 'results.csv'), 'a') as f: 73 | data_point = str(iteration) + ', ' + str(optimization) + ', ' 74 | data_point += str(delay) + ', ' + str(area) + '\n' 75 | f.write(data_point) 76 | 77 | def log(message=''): 78 | print(message) 79 | with open(os.path.join(options['output_dir'], 'greedy.log'), 'a') as f: 80 | f.write(message + '\n') 81 | 82 | def run_post_mapping(output_dir, optimization, design_file, library): 83 | """ 84 | returns new_design_file, delay, area 85 | """ 86 | output_dir = output_dir.replace(' ', '_') 87 | if not os.path.exists(output_dir): 88 | os.makedirs(output_dir) 89 | output_design_file = output_dir + '/design.blif' 90 | 91 | abc_command = 'read ' + library + '; ' 92 | abc_command += 'read ' + design_file + '; ' 93 | abc_command += 'strash; ' 94 | abc_command += 'map -D ' + str(clock_period) + '; ' 95 | abc_command += optimization + ';' 96 | abc_command += 'write ' + output_design_file + '; ' 97 | abc_command += 'print_stats; ' 98 | proc = subprocess.check_output(['yosys-abc','-c', abc_command]) 99 | d, a = extract_results(proc) 100 | return output_design_file, d, a 101 | 102 | def run_thread(iteration_dir, design_file, opt): 103 | opt_dir = os.path.join(iteration_dir, opt) 104 | opt_file, delay, area = run_optimization(opt_dir, opt, 105 | design_file, 106 | library_file) 107 | log('Optimization: ' + opt + ' -> delay: ' + str(delay) + ', area: ' + str(area)) 108 | return (opt, opt_file, delay, area) 109 | 110 | def run_thread_post_mapping(iteration_dir, design_file, opt): 111 | opt_dir = os.path.join(iteration_dir, opt) 112 | opt_file, delay, area = run_post_mapping(opt_dir, opt, 113 | design_file, 114 | library_file) 115 | log('Optimization: ' + opt + ' -> delay: ' + str(delay) + ', area: ' + str(area)) 116 | return (opt, opt_file, delay, area) 117 | 118 | # main optimizing iteration 119 | previous_area = None 120 | for i in range(iterations): 121 | # log 122 | log('Iteration: ' + str(i+1)) 123 | log('-------------') 124 | 125 | # create a directory for this iteration 126 | iteration_dir = os.path.join(options['output_dir'], str(i)) 127 | if not os.path.exists(iteration_dir): 128 | os.makedirs(iteration_dir) 129 | 130 | # in parallel, run ABC on each of the optimizations we have 131 | results = Parallel(n_jobs=len(optimizations))(delayed(run_thread)(iteration_dir, current_design_file, opt) for opt in optimizations) 132 | 133 | # get the minimum result of all threads 134 | best_thread = min(results, key = lambda t: t[3]) # getting minimum for delay (index=2) or area (index=3) 135 | 136 | # hold the best result in variables 137 | best_optimization = best_thread[0] 138 | best_optimization_file = best_thread[1] 139 | best_delay = best_thread[2] 140 | best_area = best_thread[3] 141 | 142 | 143 | if best_area == previous_area: 144 | # break for now 145 | log('Looks like the best area is exactly the same as last iteration!') 146 | log('Continue anyway ..') 147 | log('Choosing Optimization: ' + best_optimization + ' -> delay: ' + str(best_delay) + ', area: ' + str(best_area)) 148 | save_optimization_step(i, best_optimization, best_delay, best_area) 149 | 150 | log() 151 | 152 | # update design file for the next iteration 153 | current_design_file = best_optimization_file 154 | log('================') 155 | log() 156 | continue 157 | 158 | log() 159 | log('Looks like the best area is exactly the same as last iteration!') 160 | log('Performing post mapping optimizations ..') 161 | # run post mapping optimization 162 | results = Parallel(n_jobs=len(post_mapping_optimizations))(delayed(run_thread_post_mapping)(iteration_dir, current_design_file, opt) for opt in post_mapping_optimizations) 163 | 164 | # get the minimum result of all threads 165 | best_thread = min(results, key = lambda t: t[3]) # getting minimum for delay (index=2) or area (index=3) 166 | 167 | # hold the best result in variables 168 | best_optimization = best_thread[0] 169 | best_optimization_file = best_thread[1] 170 | best_delay = best_thread[2] 171 | best_area = best_thread[3] 172 | previous_area = None 173 | else: 174 | previous_area = best_area 175 | 176 | # save results 177 | log() 178 | log('Choosing Optimization: ' + best_optimization + ' -> delay: ' + str(best_delay) + ', area: ' + str(best_area)) 179 | save_optimization_step(i, best_optimization, best_delay, best_area) 180 | 181 | # update design file for the next iteration 182 | current_design_file = best_optimization_file 183 | log('================') 184 | log() 185 | 186 | stop = timeit.default_timer() 187 | 188 | log('Total Optimization Time: ' + str(stop - start)) -------------------------------------------------------------------------------- /drills/scl_session.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019, SCALE Lab, Brown University 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the BSD-style license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | import os 10 | import re 11 | import datetime 12 | import numpy as np 13 | from subprocess import check_output 14 | from .features import extract_features 15 | 16 | def log(message): 17 | print('[DRiLLS {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message) 18 | 19 | class SCLSession: 20 | """ 21 | A class to represent a logic synthesis optimization session using ABC 22 | """ 23 | def __init__(self, params): 24 | self.params = params 25 | 26 | self.action_space_length = len(self.params['optimizations']) 27 | self.observation_space_size = 9 # number of features 28 | 29 | self.iteration = 0 30 | self.episode = 0 31 | self.sequence = ['strash'] 32 | self.delay, self.area = float('inf'), float('inf') 33 | 34 | self.best_known_area = (float('inf'), float('inf'), -1, -1) 35 | self.best_known_delay = (float('inf'), float('inf'), -1, -1) 36 | self.best_known_area_meets_constraint = (float('inf'), float('inf'), -1, -1) 37 | 38 | # logging 39 | self.log = None 40 | 41 | def __del__(self): 42 | if self.log: 43 | self.log.close() 44 | 45 | def reset(self): 46 | """ 47 | resets the environment and returns the state 48 | """ 49 | self.iteration = 0 50 | self.episode += 1 51 | self.delay, self.area = float('inf'), float('inf') 52 | self.sequence = ['strash'] 53 | self.episode_dir = os.path.join(self.params['playground_dir'], str(self.episode)) 54 | if not os.path.exists(self.episode_dir): 55 | os.makedirs(self.episode_dir) 56 | 57 | # logging 58 | log_file = os.path.join(self.episode_dir, 'log.csv') 59 | if self.log: 60 | self.log.close() 61 | self.log = open(log_file, 'w') 62 | self.log.write('iteration, optimization, area, delay, best_area_meets_constraint, best_area, best_delay\n') 63 | 64 | state, _ = self._run() 65 | 66 | # logging 67 | self.log.write(', '.join([str(self.iteration), self.sequence[-1], str(self.area), str(self.delay)]) + '\n') 68 | self.log.flush() 69 | 70 | return state 71 | 72 | def step(self, optimization): 73 | """ 74 | accepts optimization index and returns (new state, reward, done, info) 75 | """ 76 | self.sequence.append(self.params['optimizations'][optimization]) 77 | new_state, reward = self._run() 78 | 79 | # logging 80 | if self.area < self.best_known_area[0]: 81 | self.best_known_area = (self.area, self.delay, self.episode, self.iteration) 82 | if self.delay < self.best_known_delay[1]: 83 | self.best_known_delay = (self.area, self.delay, self.episode, self.iteration) 84 | if self.delay <= self.params['mapping']['clock_period'] and self.area < self.best_known_area_meets_constraint[0]: 85 | self.best_known_area_meets_constraint = (self.area, self.delay, self.episode, self.iteration) 86 | self.log.write(', '.join([str(self.iteration), self.sequence[-1], str(self.area), str(self.delay)]) + ', ' + 87 | '; '.join(list(map(str, self.best_known_area_meets_constraint))) + ', ' + 88 | '; '.join(list(map(str, self.best_known_area))) + ', ' + 89 | '; '.join(list(map(str, self.best_known_delay))) + '\n') 90 | self.log.flush() 91 | 92 | return new_state, reward, self.iteration == self.params['iterations'], None 93 | 94 | def _run(self): 95 | """ 96 | run ABC on the given design file with the sequence of commands 97 | """ 98 | self.iteration += 1 99 | output_design_file = os.path.join(self.episode_dir, str(self.iteration) + '.v') 100 | output_design_file_mapped = os.path.join(self.episode_dir, str(self.iteration) + '-mapped.v') 101 | 102 | abc_command = 'read ' + self.params['mapping']['library_file'] + '; ' 103 | abc_command += 'read ' + self.params['design_file'] + '; ' 104 | abc_command += ';'.join(self.sequence) + '; ' 105 | abc_command += 'write ' + output_design_file + '; ' 106 | abc_command += 'map -D ' + str(self.params['mapping']['clock_period']) + '; ' 107 | abc_command += 'write ' + output_design_file_mapped + '; ' 108 | abc_command += 'topo; stime;' 109 | 110 | try: 111 | proc = check_output([self.params['abc_binary'], '-c', abc_command]) 112 | # get reward 113 | delay, area = self._get_metrics(proc) 114 | reward = self._get_reward(delay, area) 115 | self.delay, self.area = delay, area 116 | # get new state of the circuit 117 | state = self._get_state(output_design_file) 118 | return state, reward 119 | except Exception as e: 120 | print(e) 121 | return None, None 122 | 123 | def _get_metrics(self, stats): 124 | """ 125 | parse delay and area from the stats command of ABC 126 | """ 127 | line = stats.decode("utf-8").split('\n')[-2].split(':')[-1].strip() 128 | 129 | ob = re.search(r'Delay *= *[0-9]+.?[0-9]*', line) 130 | delay = float(ob.group().split('=')[1].strip()) 131 | 132 | ob = re.search(r'Area *= *[0-9]+.?[0-9]*', line) 133 | area = float(ob.group().split('=')[1].strip()) 134 | 135 | return delay, area 136 | 137 | def _get_reward(self, delay, area): 138 | constraint_met = True 139 | optimization_improvement = 0 # (-1, 0, 1) <=> (worse, same, improvement) 140 | constraint_improvement = 0 # (-1, 0, 1) <=> (worse, same, improvement) 141 | 142 | # check optimizing parameter 143 | if area < self.area: 144 | optimization_improvement = 1 145 | elif area == self.area: 146 | optimization_improvement = 0 147 | else: 148 | optimization_improvement = -1 149 | 150 | # check constraint parameter 151 | if delay > self.params["mapping"]["clock_period"]: 152 | constraint_met = False 153 | if delay < self.delay: 154 | constraint_improvement = 1 155 | elif delay == self.delay: 156 | constraint_improvement = 0 157 | else: 158 | constraint_improvement = -1 159 | 160 | # now calculate the reward 161 | return self._reward_table(constraint_met, constraint_improvement, optimization_improvement) 162 | 163 | def _reward_table(self, constraint_met, contraint_improvement, optimization_improvement): 164 | return { 165 | True: { 166 | 0: { 167 | 1: 3, 168 | 0: 0, 169 | -1: -1 170 | } 171 | }, 172 | False: { 173 | 1: { 174 | 1: 3, 175 | 0: 2, 176 | -1: 1 177 | }, 178 | 0: { 179 | 1: 2, 180 | 0: 0, 181 | -1: -2 182 | }, 183 | -1: { 184 | 1: -1, 185 | 0: -2, 186 | -1: -3 187 | } 188 | } 189 | }[constraint_met][contraint_improvement][optimization_improvement] 190 | 191 | def _get_state(self, design_file): 192 | return extract_features(design_file, self.params['yosys_binary'], self.params['abc_binary']) 193 | -------------------------------------------------------------------------------- /drills/fpga_session.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019, SCALE Lab, Brown University 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the BSD-style license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | import os 10 | import re 11 | import datetime 12 | import numpy as np 13 | from subprocess import check_output 14 | from .features import extract_features 15 | 16 | def log(message): 17 | print('[DRiLLS {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message) 18 | 19 | class FPGASession: 20 | """ 21 | A class to represent a logic synthesis optimization session using ABC 22 | """ 23 | def __init__(self, params): 24 | self.params = params 25 | 26 | self.action_space_length = len(self.params['optimizations']) 27 | self.observation_space_size = 9 # number of features 28 | 29 | self.iteration = 0 30 | self.episode = 0 31 | self.sequence = ['strash'] 32 | self.lut_6, self.levels = float('inf'), float('inf') 33 | 34 | self.best_known_lut_6 = (float('inf'), float('inf'), -1, -1) 35 | self.best_known_levels = (float('inf'), float('inf'), -1, -1) 36 | self.best_known_lut_6_meets_constraint = (float('inf'), float('inf'), -1, -1) 37 | 38 | # logging 39 | self.log = None 40 | 41 | def __del__(self): 42 | if self.log: 43 | self.log.close() 44 | 45 | def reset(self): 46 | """ 47 | resets the environment and returns the state 48 | """ 49 | self.iteration = 0 50 | self.episode += 1 51 | self.lut_6, self.levels = float('inf'), float('inf') 52 | self.sequence = ['strash'] 53 | self.episode_dir = os.path.join(self.params['playground_dir'], str(self.episode)) 54 | if not os.path.exists(self.episode_dir): 55 | os.makedirs(self.episode_dir) 56 | 57 | # logging 58 | log_file = os.path.join(self.episode_dir, 'log.csv') 59 | if self.log: 60 | self.log.close() 61 | self.log = open(log_file, 'w') 62 | self.log.write('iteration, optimization, LUT-6, Levels, best LUT-6 meets constraint, best LUT-6, best levels\n') 63 | 64 | state, _ = self._run() 65 | 66 | # logging 67 | self.log.write(', '.join([str(self.iteration), self.sequence[-1], str(int(self.lut_6)), str(int(self.levels))]) + '\n') 68 | self.log.flush() 69 | 70 | return state 71 | 72 | def step(self, optimization): 73 | """ 74 | accepts optimization index and returns (new state, reward, done, info) 75 | """ 76 | self.sequence.append(self.params['optimizations'][optimization]) 77 | new_state, reward = self._run() 78 | 79 | # logging 80 | if self.lut_6 < self.best_known_lut_6[0]: 81 | self.best_known_lut_6 = (int(self.lut_6), int(self.levels), self.episode, self.iteration) 82 | if self.levels < self.best_known_levels[1]: 83 | self.best_known_levels = (int(self.lut_6), int(self.levels), self.episode, self.iteration) 84 | if self.levels <= self.params['fpga_mapping']['levels'] and self.lut_6 < self.best_known_lut_6_meets_constraint[0]: 85 | self.best_known_lut_6_meets_constraint = (int(self.lut_6), int(self.levels), self.episode, self.iteration) 86 | self.log.write(', '.join([str(self.iteration), self.sequence[-1], str(int(self.lut_6)), str(int(self.levels))]) + ', ' + 87 | '; '.join(list(map(str, self.best_known_lut_6_meets_constraint))) + ', ' + 88 | '; '.join(list(map(str, self.best_known_lut_6))) + ', ' + 89 | '; '.join(list(map(str, self.best_known_levels))) + '\n') 90 | self.log.flush() 91 | 92 | return new_state, reward, self.iteration == self.params['iterations'], None 93 | 94 | def _run(self): 95 | """ 96 | run ABC on the given design file with the sequence of commands 97 | """ 98 | self.iteration += 1 99 | output_design_file = os.path.join(self.episode_dir, str(self.iteration) + '.v') 100 | output_design_file_mapped = os.path.join(self.episode_dir, str(self.iteration) + '-mapped.v') 101 | 102 | abc_command = 'read ' + self.params['design_file'] + '; ' 103 | abc_command += ';'.join(self.sequence) + '; ' 104 | abc_command += 'write ' + output_design_file + '; ' 105 | abc_command += 'if -K ' + str(self.params['fpga_mapping']['lut_inputs']) + '; ' 106 | abc_command += 'write ' + output_design_file_mapped + '; ' 107 | abc_command += 'print_stats;' 108 | 109 | try: 110 | proc = check_output([self.params['abc_binary'], '-c', abc_command]) 111 | # get reward 112 | lut_6, levels = self._get_metrics(proc) 113 | reward = self._get_reward(lut_6, levels) 114 | self.lut_6, self.levels = lut_6, levels 115 | # get new state of the circuit 116 | state = self._get_state(output_design_file) 117 | return state, reward 118 | except Exception as e: 119 | print(e) 120 | return None, None 121 | 122 | def _get_metrics(self, stats): 123 | """ 124 | parse LUT count and levels from the stats command of ABC 125 | """ 126 | line = stats.decode("utf-8").split('\n')[-2].split(':')[-1].strip() 127 | 128 | ob = re.search(r'lev *= *[0-9]+', line) 129 | levels = int(ob.group().split('=')[1].strip()) 130 | 131 | ob = re.search(r'nd *= *[0-9]+', line) 132 | lut_6 = int(ob.group().split('=')[1].strip()) 133 | 134 | return lut_6, levels 135 | 136 | def _get_reward(self, lut_6, levels): 137 | constraint_met = True 138 | optimization_improvement = 0 # (-1, 0, 1) <=> (worse, same, improvement) 139 | constraint_improvement = 0 # (-1, 0, 1) <=> (worse, same, improvement) 140 | 141 | # check optimizing parameter 142 | if lut_6 < self.lut_6: 143 | optimization_improvement = 1 144 | elif lut_6 == self.lut_6: 145 | optimization_improvement = 0 146 | else: 147 | optimization_improvement = -1 148 | 149 | # check constraint parameter 150 | if levels > self.params["fpga_mapping"]["levels"]: 151 | constraint_met = False 152 | if levels < self.levels: 153 | constraint_improvement = 1 154 | elif levels == self.levels: 155 | constraint_improvement = 0 156 | else: 157 | constraint_improvement = -1 158 | 159 | # now calculate the reward 160 | return self._reward_table(constraint_met, constraint_improvement, optimization_improvement) 161 | 162 | def _reward_table(self, constraint_met, contraint_improvement, optimization_improvement): 163 | return { 164 | True: { 165 | 0: { 166 | 1: 3, 167 | 0: 0, 168 | -1: -1 169 | } 170 | }, 171 | False: { 172 | 1: { 173 | 1: 3, 174 | 0: 2, 175 | -1: 1 176 | }, 177 | 0: { 178 | 1: 2, 179 | 0: 0, 180 | -1: -2 181 | }, 182 | -1: { 183 | 1: -1, 184 | 0: -2, 185 | -1: -3 186 | } 187 | } 188 | }[constraint_met][contraint_improvement][optimization_improvement] 189 | 190 | def _get_state(self, design_file): 191 | return extract_features(design_file, self.params['yosys_binary'], self.params['abc_binary']) 192 | -------------------------------------------------------------------------------- /baseline/simulated-annealing/simulated-annealing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019, SCALE Lab, Brown University 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the BSD-style license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | import yaml 10 | import os 11 | import subprocess 12 | import sys 13 | import timeit 14 | import re 15 | import random 16 | import math 17 | from joblib import Parallel, delayed 18 | 19 | data_file = sys.argv[1] 20 | 21 | with open(data_file, 'r') as f: 22 | options = yaml.load(f) 23 | 24 | start = timeit.default_timer() 25 | 26 | optimizations = options['optimizations'] 27 | iterations = options['iterations'] 28 | current_design_file = options['design_file'] 29 | library_file = options['mapping']['library_file'] 30 | clock_period = options['mapping']['clock_period'] 31 | # post_mapping_optimizations = options['post_mapping_commands'] 32 | 33 | temperature = options['simulated_annealing']['initial_temp'] 34 | cooling_rate = options['simulated_annealing']['cooling_rate'] 35 | 36 | # Create directory if not exists 37 | if not os.path.exists(options['output_dir']): 38 | os.makedirs(options['output_dir']) 39 | 40 | def extract_results(stats): 41 | """ 42 | extracts area and delay from the printed stats on stdout 43 | """ 44 | line = stats.decode("utf-8").split('\n')[-2].split(':')[-1].strip() 45 | ob = re.search(r'delay *= *[1-9]+.?[0-9]+', line) 46 | delay = float(ob.group().split('=')[1].strip()) 47 | ob = re.search(r'area *= *[1-9]+.?[0-9]+', line) 48 | area = float(ob.group().split('=')[1].strip()) 49 | return delay, area 50 | 51 | def run_optimization(output_dir, optimization, design_file, library): 52 | """ 53 | returns new_design_file, delay, area 54 | """ 55 | output_dir = output_dir.replace(' ', '_') 56 | if not os.path.exists(output_dir): 57 | os.makedirs(output_dir) 58 | output_design_file = output_dir + '/design.blif' 59 | 60 | abc_command = 'read ' + library + '; ' 61 | abc_command += 'read ' + design_file + '; ' 62 | abc_command += 'strash; ' 63 | abc_command += optimization + '; ' 64 | abc_command += 'write ' + output_design_file + '; ' 65 | abc_command += 'map -D ' + str(clock_period) + '; ' 66 | abc_command += 'print_stats; ' 67 | 68 | proc = subprocess.check_output(['yosys-abc','-c', abc_command]) 69 | d, a = extract_results(proc) 70 | return output_design_file, d, a 71 | 72 | def save_optimization_step(iteration, optimization, delay, area): 73 | """ 74 | saves the winning optimization to a csv file 75 | """ 76 | with open(os.path.join(options['output_dir'], 'results.csv'), 'a') as f: 77 | data_point = str(iteration) + ', ' + str(optimization) + ', ' 78 | data_point += str(delay) + ', ' + str(area) + '\n' 79 | f.write(data_point) 80 | 81 | def log(message=''): 82 | print(message) 83 | with open(os.path.join(options['output_dir'], 'greedy.log'), 'a') as f: 84 | f.write(message + '\n') 85 | 86 | def run_post_mapping(output_dir, optimization, design_file, library): 87 | """ 88 | returns new_design_file, delay, area 89 | """ 90 | output_dir = output_dir.replace(' ', '_') 91 | if not os.path.exists(output_dir): 92 | os.makedirs(output_dir) 93 | output_design_file = output_dir + '/design.blif' 94 | 95 | abc_command = 'read ' + library + '; ' 96 | abc_command += 'read ' + design_file + '; ' 97 | abc_command += 'strash; ' 98 | abc_command += 'map -D ' + str(clock_period) + '; ' 99 | abc_command += optimization + ';' 100 | abc_command += 'write ' + output_design_file + '; ' 101 | abc_command += 'print_stats; ' 102 | proc = subprocess.check_output(['yosys-abc','-c', abc_command]) 103 | d, a = extract_results(proc) 104 | return output_design_file, d, a 105 | 106 | def run_thread(iteration_dir, design_file, opt): 107 | opt_dir = os.path.join(iteration_dir, opt) 108 | opt_file, delay, area = run_optimization(opt_dir, opt, 109 | design_file, 110 | library_file) 111 | log('Optimization: ' + opt + ' -> delay: ' + str(delay) + ', area: ' + str(area)) 112 | return (opt, opt_file, delay, area) 113 | 114 | def run_thread_post_mapping(iteration_dir, design_file, opt): 115 | opt_dir = os.path.join(iteration_dir, opt) 116 | opt_file, delay, area = run_post_mapping(opt_dir, opt, 117 | design_file, 118 | library_file) 119 | log('Optimization: ' + opt + ' -> delay: ' + str(delay) + ', area: ' + str(area)) 120 | return (opt, opt_file, delay, area) 121 | 122 | i = 0 123 | # run the optimization once to set the initial energy (delay) of the system 124 | log('Initializing annealing ..') 125 | log('Current temperature: ' + str(temperature)) 126 | log('----------------') 127 | iteration_dir = os.path.join(options['output_dir'], str(i)) 128 | if not os.path.exists(iteration_dir): 129 | os.makedirs(iteration_dir) 130 | # Pick an optimization at random 131 | random_optimization = 'strash' # a command that does no optimization 132 | result = run_thread(iteration_dir, current_design_file, random_optimization) 133 | opt_file = result[1] 134 | delay = result[2] 135 | area = result[3] 136 | # accept it to set the energe of the system in the beginning 137 | save_optimization_step(i, random_optimization, delay, area) 138 | current_design_file = opt_file 139 | previous_delay = delay 140 | i += 1 141 | 142 | log('System initialized with delay: ' + str(delay)) 143 | log('Starting annealing ..') 144 | log() 145 | 146 | # main optimizing iteration 147 | while True: 148 | number_of_accepted_optimizations = 0 149 | 150 | for _ in range(100): 151 | # if we accept 10 optimizations, we cool down the system 152 | # otherwise, only continue up to 100 trials for this temperature 153 | 154 | # log 155 | log('Iteration: ' + str(i)) 156 | log('Temperature: ' + str(temperature)) 157 | log('----------------') 158 | 159 | # create a directory for this iteration 160 | iteration_dir = os.path.join(options['output_dir'], str(i)) 161 | if not os.path.exists(iteration_dir): 162 | os.makedirs(iteration_dir) 163 | 164 | # Pick an optimization at random 165 | random_optimization = random.choice(optimizations) 166 | result = run_thread(iteration_dir, current_design_file, random_optimization) 167 | opt_file = result[1] 168 | delay = result[2] 169 | area = result[3] 170 | 171 | # if better than the previous delay, accept. Otherwise, accept with probability 172 | if delay < previous_delay: 173 | log('The optimization reduced the delay!') 174 | log('Accepting it ..') 175 | save_optimization_step(i, random_optimization, delay, area) 176 | current_design_file = opt_file 177 | previous_delay = delay 178 | number_of_accepted_optimizations += 1 179 | else: 180 | delta_delay = delay - previous_delay 181 | probability_of_acceptance = math.exp((- delta_delay) / temperature) 182 | log('The optimization didn\'t reduce the delay, the system looks to be still hot.') 183 | log('The probability of acceptance is: ' + str(probability_of_acceptance)) 184 | log('Uniformly generating a number to see if we accept it ..') 185 | if random.uniform(0, 1.0) < probability_of_acceptance: 186 | log('Accepting it ..') 187 | save_optimization_step(i, random_optimization, delay, area) 188 | current_design_file = opt_file 189 | previous_delay = delay 190 | number_of_accepted_optimizations += 1 191 | else: 192 | log('Rejected ..') 193 | pass 194 | i += 1 195 | log() 196 | 197 | if number_of_accepted_optimizations == 10: 198 | break 199 | 200 | if temperature <= 0.1: 201 | log('System has sufficiently cooled down ..') 202 | log('Shutting down simulation ..') 203 | log() 204 | break 205 | 206 | new_temperature = temperature * cooling_rate 207 | log('Cooling down system from ' + str(temperature) + ' to ' + str(new_temperature) + ' ..') 208 | temperature = new_temperature 209 | log('================') 210 | log() 211 | 212 | stop = timeit.default_timer() 213 | 214 | log('Total Optimization Time: ' + str(stop - start)) 215 | -------------------------------------------------------------------------------- /drills/model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2019, SCALE Lab, Brown University 4 | # All rights reserved. 5 | 6 | # This source code is licensed under the BSD-style license found in the 7 | # LICENSE file in the root directory of this source tree. 8 | 9 | import tensorflow as tf 10 | import numpy as np 11 | import datetime 12 | import time 13 | from .scl_session import SCLSession as SCLGame 14 | from .fpga_session import FPGASession as FPGAGame 15 | 16 | def log(message): 17 | print('[DRiLLS {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message) 18 | 19 | class Normalizer(): 20 | def __init__(self, num_inputs): 21 | self.num_inputs = num_inputs 22 | self.n = tf.zeros(num_inputs) 23 | self.mean = tf.zeros(num_inputs) 24 | self.mean_diff = tf.zeros(num_inputs) 25 | self.var = tf.zeros(num_inputs) 26 | 27 | def observe(self, x): 28 | self.n += 1. 29 | last_mean = tf.identity(self.mean) 30 | self.mean += (x-self.mean)/self.n 31 | self.mean_diff += (x-last_mean)*(x-self.mean) 32 | self.var = tf.clip_by_value(self.mean_diff/self.n, clip_value_min=1e-2, clip_value_max=1000000000) 33 | 34 | def normalize(self, inputs): 35 | obs_std = tf.sqrt(self.var) 36 | return (inputs - self.mean)/obs_std 37 | 38 | def reset(self): 39 | self.n = tf.zeros(self.num_inputs) 40 | self.mean = tf.zeros(self.num_inputs) 41 | self.mean_diff = tf.zeros(self.num_inputs) 42 | self.var = tf.zeros(self.num_inputs) 43 | 44 | class A2C: 45 | def __init__(self, options, load_model=False, fpga_mapping=False): 46 | if fpga_mapping: 47 | self.game = FPGAGame(options) 48 | else: 49 | self.game = SCLGame(options) 50 | 51 | self.num_actions = self.game.action_space_length 52 | self.state_size = self.game.observation_space_size 53 | self.normalizer = Normalizer(self.state_size) 54 | 55 | self.state_input = tf.placeholder(tf.float32, [None, self.state_size]) 56 | 57 | # Define any additional placeholders needed for training your agent here: 58 | self.actions = tf.placeholder(tf.float32, [None, self.num_actions]) 59 | self.discounted_episode_rewards_ = tf.placeholder(tf.float32, [None, ]) 60 | 61 | self.state_value = self.critic() 62 | self.actor_probs = self.actor() 63 | self.loss_val = self.loss() 64 | self.train_op = self.optimizer() 65 | self.session = tf.Session() 66 | 67 | # model saving/restoring 68 | self.model_dir = options['model_dir'] 69 | self.saver = tf.train.Saver() 70 | 71 | if load_model: 72 | self.saver.restore(self.session, self.model_dir) 73 | log("Model restored.") 74 | else: 75 | self.session.run(tf.global_variables_initializer()) 76 | 77 | self.gamma = 0.99 78 | self.learning_rate = 0.01 79 | 80 | def optimizer(self): 81 | """ 82 | :return: Optimizer for your loss function 83 | """ 84 | return tf.train.AdamOptimizer(0.01).minimize(self.loss_val) 85 | 86 | def critic(self): 87 | """ 88 | Calculates the estimated value for every state in self.state_input. The critic should not depend on 89 | any other tensors besides self.state_input. 90 | :return: A tensor of shape [num_states] representing the estimated value of each state in the trajectory. 91 | """ 92 | c_fc1 = tf.contrib.layers.fully_connected(inputs=self.state_input, 93 | num_outputs=10, 94 | activation_fn=tf.nn.relu, 95 | weights_initializer=tf.contrib.layers.xavier_initializer()) 96 | 97 | 98 | c_fc2 = tf.contrib.layers.fully_connected(inputs=c_fc1, 99 | num_outputs=1, 100 | activation_fn=None, 101 | weights_initializer=tf.contrib.layers.xavier_initializer()) 102 | 103 | return c_fc2 104 | 105 | def actor(self): 106 | """ 107 | Calculates the action probabilities for every state in self.state_input. The actor should not depend on 108 | any other tensors besides self.state_input. 109 | :return: A tensor of shape [num_states, num_actions] representing the probability distribution 110 | over actions that is generated by your actor. 111 | """ 112 | a_fc1 = tf.contrib.layers.fully_connected(inputs=self.state_input, 113 | num_outputs=20, 114 | activation_fn=tf.nn.relu, 115 | weights_initializer=tf.contrib.layers.xavier_initializer()) 116 | 117 | a_fc2 = tf.contrib.layers.fully_connected(inputs=a_fc1, 118 | num_outputs=20, 119 | activation_fn=tf.nn.relu, 120 | weights_initializer=tf.contrib.layers.xavier_initializer()) 121 | 122 | a_fc3 = tf.contrib.layers.fully_connected(inputs=a_fc2, 123 | num_outputs=self.num_actions, 124 | activation_fn=None, 125 | weights_initializer=tf.contrib.layers.xavier_initializer()) 126 | 127 | return tf.nn.softmax(a_fc3) 128 | 129 | def loss(self): 130 | """ 131 | :return: A scalar tensor representing the combined actor and critic loss. 132 | """ 133 | # critic loss 134 | advantage = self.discounted_episode_rewards_ - self.state_value 135 | critic_loss = tf.reduce_sum(tf.square(advantage)) 136 | 137 | # actor loss 138 | neg_log_prob = tf.nn.softmax_cross_entropy_with_logits_v2(logits=tf.log(self.actor_probs), 139 | labels=self.actions) 140 | actor_loss = tf.reduce_sum(neg_log_prob * advantage) 141 | 142 | neg_log_prob = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.actor_probs, 143 | labels=self.actions) 144 | policy_gradient_loss = tf.reduce_mean(neg_log_prob * self.discounted_episode_rewards_) 145 | # return policy_gradient_loss 146 | 147 | return critic_loss + actor_loss 148 | 149 | def save_model(self): 150 | save_path = self.saver.save(self.session, self.model_dir) 151 | log("Model saved in path: %s" % str(save_path)) 152 | 153 | def train_episode(self): 154 | """ 155 | train_episode will be called several times by the drills.py to train the agent. In this method, 156 | we run the agent for a single episode, then use that data to train the agent. 157 | """ 158 | state = self.game.reset() 159 | self.normalizer.reset() 160 | self.normalizer.observe(state) 161 | state = self.normalizer.normalize(state).eval(session=self.session) 162 | done = False 163 | 164 | episode_states = [] 165 | episode_actions = [] 166 | episode_rewards = [] 167 | 168 | while not done: 169 | log(' iteration: ' + str(self.game.iteration)) 170 | action_probability_distribution = self.session.run(self.actor_probs, \ 171 | feed_dict={self.state_input: state.reshape([1, self.state_size])}) 172 | action = np.random.choice(range(action_probability_distribution.shape[1]), \ 173 | p=action_probability_distribution.ravel()) 174 | new_state, reward, done, _ = self.game.step(action) 175 | 176 | # append this step 177 | episode_states.append(state) 178 | action_ = np.zeros(self.num_actions) 179 | action_[action] = 1 180 | episode_actions.append(action_) 181 | episode_rewards.append(reward) 182 | 183 | state = new_state 184 | self.normalizer.observe(state) 185 | state = self.normalizer.normalize(state).eval(session=self.session) 186 | 187 | # Now that we have run the episode, we use this data to train the agent 188 | start = time.time() 189 | discounted_episode_rewards = self.discount_and_normalize_rewards(episode_rewards) 190 | 191 | _ = self.session.run(self.train_op, feed_dict={self.state_input: np.array(episode_states), \ 192 | self.actions: np.array(episode_actions), \ 193 | self.discounted_episode_rewards_: discounted_episode_rewards}) 194 | end = time.time() 195 | log('Episode Agent Training Time ~ ' + str((start - end) / 60) + ' minutes.') 196 | 197 | self.save_model() 198 | 199 | return np.sum(episode_rewards) 200 | 201 | def discount_and_normalize_rewards(self, episode_rewards): 202 | """ 203 | used internally to calculate the discounted episode rewards 204 | """ 205 | discounted_episode_rewards = np.zeros_like(episode_rewards) 206 | cumulative = 0.0 207 | for i in reversed(range(len(episode_rewards))): 208 | cumulative = cumulative * self.gamma + episode_rewards[i] 209 | discounted_episode_rewards[i] = cumulative 210 | 211 | mean = np.mean(discounted_episode_rewards) 212 | std = np.std(discounted_episode_rewards) 213 | 214 | discounted_episode_rewards = (discounted_episode_rewards - mean) / std 215 | 216 | return discounted_episode_rewards 217 | 218 | --------------------------------------------------------------------------------