├── tests
└── .keep
├── .vscode
└── settings.json
├── doc
└── img
│ ├── drills-logo.png
│ ├── drills-header.png
│ └── drills-architecture.png
├── scripts
├── README.md
├── sin_drills.tcl
├── multiplier_drills.tcl
├── square_drills.tcl
├── max_drills.tcl
├── sqrt_drills.tcl
├── log2_drills.tcl
├── adder_drills.tcl
├── hyp_drills.tcl
├── bar_drills.tcl
└── div_drills.tcl
├── baseline
├── README.md
├── simulated-annealing
│ ├── README.md
│ ├── data.yml
│ └── simulated-annealing.py
└── greedy
│ ├── README.md
│ ├── data.yml
│ └── greedy.py
├── .github
└── ISSUE_TEMPLATE
│ ├── contribution.md
│ └── bug_report.md
├── requirements.txt
├── drills
├── fixed_optimization.py
├── __init__.py
├── features.py
├── scl_session.py
├── fpga_session.py
└── model.py
├── CITATION.cff
├── params.yml
├── LICENSE
├── .gitignore
├── drills.py
└── README.md
/tests/.keep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.pythonPath": ".venv/bin/python"
3 | }
--------------------------------------------------------------------------------
/doc/img/drills-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scale-lab/DRiLLS/HEAD/doc/img/drills-logo.png
--------------------------------------------------------------------------------
/doc/img/drills-header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scale-lab/DRiLLS/HEAD/doc/img/drills-header.png
--------------------------------------------------------------------------------
/doc/img/drills-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scale-lab/DRiLLS/HEAD/doc/img/drills-architecture.png
--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
1 | # DRiLLS Scripts
2 |
3 | This folder contains scripts generated by the design space exploration using DRiLLS agent.
--------------------------------------------------------------------------------
/baseline/README.md:
--------------------------------------------------------------------------------
1 | This directory contains miscellaneous methods for combinatorial optimzation that we used en route to discovering the RL methodology.
2 |
--------------------------------------------------------------------------------
/scripts/sin_drills.tcl:
--------------------------------------------------------------------------------
1 | # Script generated by DRiLLS agent
2 |
3 | read asap7.lib
4 | read sin.v
5 |
6 | strash
7 | rewrite
8 |
9 | write_verilog sin_synth_drills.v
10 |
11 | map -D 3800
12 | stime
--------------------------------------------------------------------------------
/scripts/multiplier_drills.tcl:
--------------------------------------------------------------------------------
1 | # Script generated by DRiLLS agent
2 |
3 | read asap7.lib
4 | read multiplier.v
5 |
6 | strash
7 | balance
8 | refactor
9 | rewrite -z
10 | refactor
11 |
12 | write_verilog mul_synth_drills.v
13 |
14 | map -D 4000
15 | stime
16 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/contribution.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Contribution First Step
3 | about: Suggest a contribution for DRiLLS
4 | labels:
5 | ---
6 |
7 | **What?**
8 | A clear and concise description of what you want to contribute.
9 |
10 | **Why?**
11 | Add any other context about the contribution here.
--------------------------------------------------------------------------------
/scripts/square_drills.tcl:
--------------------------------------------------------------------------------
1 | # Script generated by DRiLLS agent
2 |
3 | read asap7.lib
4 | read square.v
5 |
6 | strash
7 | rewrite -z
8 | refactor -z
9 | balance
10 | balance
11 | resub
12 | resub
13 | resub
14 | refactor
15 | rewrite -z
16 | resub -z
17 | resub
18 | balance
19 | refactor -z
20 | resub
21 | refactor -z
22 |
23 | write_verilog square_synth_drills.v
24 |
25 | map -D 2200
26 | stime
27 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.8.1
2 | astor==0.8.0
3 | gast==0.2.2
4 | google-pasta==0.1.7
5 | grpcio==1.24.3
6 | h5py==2.10.0
7 | joblib==0.14.0
8 | Keras-Applications==1.0.8
9 | Keras-Preprocessing==1.1.0
10 | Markdown==3.1.1
11 | numpy==1.17.2
12 | opt-einsum==3.1.0
13 | protobuf==3.10.0
14 | pyfiglet==0.8.post1
15 | PyYAML==5.1.2
16 | six==1.12.0
17 | tensorflow==1.12.0
18 | termcolor==1.1.0
19 | Werkzeug==0.16.0
20 | wrapt==1.11.2
21 |
--------------------------------------------------------------------------------
/scripts/max_drills.tcl:
--------------------------------------------------------------------------------
1 | # Script generated by DRiLLS agent
2 |
3 | read asap7.lib
4 | read max.v
5 |
6 | strash
7 | refactor -z
8 | resub
9 | rewrite
10 | resub -z
11 | refactor
12 | balance
13 | resub
14 | rewrite -z
15 | resub -z
16 | resub
17 | refactor -z
18 | rewrite -z
19 | resub -z
20 | refactor
21 | refactor
22 | balance
23 |
24 | write_verilog max_synth_drills.v
25 |
26 | map -D 4000
27 | stime
28 |
--------------------------------------------------------------------------------
/scripts/sqrt_drills.tcl:
--------------------------------------------------------------------------------
1 | # Script generated by DRiLLS agent
2 |
3 | read asap7.lib
4 | read sqrt.v
5 |
6 | strash
7 | refactor -z
8 | resub -z
9 | refactor -z
10 | refactor -z
11 | refactor -z
12 | refactor
13 | balance
14 | rewrite -z
15 | rewrite -z
16 | resub
17 | refactor -z
18 | refactor -z
19 | refactor -z
20 | balance
21 | resub -z
22 | resub -z
23 | resub -z
24 | refactor -z
25 | rewrite -z
26 |
27 | write_verilog sqrt_synth_drills.v
28 |
29 | map -D 170000
30 | stime
31 |
--------------------------------------------------------------------------------
/scripts/log2_drills.tcl:
--------------------------------------------------------------------------------
1 | # Script generated by DRiLLS agent
2 |
3 | read asap7.lib
4 | read log2.v
5 |
6 | strash
7 | resub -z
8 | refactor -z
9 | refactor -z
10 | refactor
11 | resub
12 | refactor
13 | refactor -z
14 | refactor -z
15 | refactor
16 | resub -z
17 | resub -z
18 | refactor -z
19 | refactor -z
20 | resub -z
21 | refactor
22 | refactor -z
23 | resub -z
24 | refactor
25 | resub
26 | refactor
27 | resub -z
28 | resub -z
29 | balance
30 |
31 | write_verilog log2_synth_drills.v
32 |
33 | map -D 7500
34 | stime
35 |
--------------------------------------------------------------------------------
/scripts/adder_drills.tcl:
--------------------------------------------------------------------------------
1 | # Script generated by DRiLLS agent
2 |
3 | read asap7.lib
4 | read adder.v
5 |
6 | strash
7 | refactor
8 | rewrite -z
9 | rewrite -z
10 | rewrite -z
11 | resub -z
12 | refactor
13 | refactor
14 | rewrite -z
15 | refactor
16 | rewrite
17 | rewrite
18 | resub
19 | rewrite
20 | rewrite
21 | resub
22 | balance
23 | resub
24 | refactor -z
25 | balance
26 | rewrite
27 | rewrite -z
28 | balance
29 | resub -z
30 | rewrite -z
31 | refactor
32 | rewrite -z
33 | refactor -z
34 |
35 | write_verilog adder_synth_drills.v
36 |
37 | map -D 2000
38 | stime
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help DRiLLS improve
4 | labels:
5 | ---
6 |
7 | **Describe the bug**
8 | A clear and concise description of what the bug is.
9 |
10 | **To Reproduce**
11 | Steps to reproduce the behavior:
12 | 1. Clone the latest DRiLLS
13 | 2. Modify `data.yml` file to ...
14 | 3. Provide how you run the agent
15 | 4. Provide an output of the failrure
16 |
17 | **Expected behavior**
18 | A clear and concise description of what you expected to happen.
19 |
20 | **Additional context**
21 | Add any other context about the problem here.
--------------------------------------------------------------------------------
/drills/fixed_optimization.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright (c) 2019, SCALE Lab, Brown University
4 | # All rights reserved.
5 |
6 | # This source code is licensed under the BSD-style license found in the
7 | # LICENSE file in the root directory of this source tree.
8 |
9 | def optimize_with_fixed_script(params, fixed_script_file):
10 | """
11 | Optimizes the design with the fixed script and writes down a new design file
12 | """
13 | optimized_design_file = None
14 | # TODO: run an scl session with the fixed script.
15 |
16 | params.design_file = optimized_design_file
17 | return params
--------------------------------------------------------------------------------
/baseline/simulated-annealing/README.md:
--------------------------------------------------------------------------------
1 | # Combinatorial Optimization with Simulated Annealing
2 | The algorithm takes an initial input design and try to optimize it using a simulated annealing approach, where the temperature changes over the iteration and the target metric (area/delay) is modified accordingly.
3 |
4 | ## How to run
5 | - Install dependencies: `pip3 install pyyaml joblib`
6 | - Edit `data.yml` file to specify your design file, library file, output directory and modify other parameters
7 | - Run using: `python3 simulated-annealing.py data.yml`
8 | - Logs and results are written to the `output_dir` specified in the `data.yml` file.
9 |
--------------------------------------------------------------------------------
/scripts/hyp_drills.tcl:
--------------------------------------------------------------------------------
1 | # Script generated by DRiLLS agent
2 |
3 | read asap7.lib
4 | read hyp.v
5 |
6 | strash
7 | rewrite
8 | resub
9 | resub
10 | resub
11 | resub
12 | resub
13 | resub
14 | balance
15 | balance
16 | rewrite -z
17 | resub
18 | resub
19 | resub
20 | refactor -z
21 | resub
22 | resub
23 | resub
24 | resub
25 | resub
26 | refactor -z
27 | resub
28 | resub -z
29 | resub
30 | resub
31 | resub -z
32 | resub
33 | resub
34 | resub
35 | resub
36 | balance
37 | balance
38 | resub
39 | resub -z
40 | resub
41 | rewrite -z
42 |
43 | write_verilog hyp_synth_drills.v
44 |
45 | map -D 1000000
46 | stime
47 |
--------------------------------------------------------------------------------
/baseline/greedy/README.md:
--------------------------------------------------------------------------------
1 | # Greedy Combinatorial Optimization
2 | The algorithm takes an initial input design and spawns parallel threads to perform each of the given transformations on the design. Then, it keeps the design with the minimum area for the next iteration, whether it meets the delay constraint or not. After that, it continues until no improvements in the design area is made.
3 |
4 | ## How to run
5 | - Install dependencies: `pip3 install pyyaml joblib`
6 | - Edit `data.yml` file to specify your design file, library file, output directory and modify other parameters
7 | - Run using: `python3 greedy.py data.yml`
8 | - Logs and results are written to the `output_dir` specified in the `data.yml` file.
9 |
10 |
--------------------------------------------------------------------------------
/drills/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019, SCALE Lab, Brown University
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | """
8 | A package to manage DRiLLS implementation; utilizing ABC and Tensorflow
9 | ...
10 |
11 | Classes:
12 | --------
13 | SCLSession: to manage the logic synthesis environment when using a standard cell library
14 | FPGASession: to manage the logic synthesis environment when using FPGAs
15 | A2C: contains the deep neural network model (Advantage Actor Critic)
16 |
17 | Helpers:
18 | --------
19 | yosys_stats: extract design metrics using yosys
20 | abc_stats: extract design metrics using ABC
21 | extract_features: extract design features used as input to the model
22 | """
--------------------------------------------------------------------------------
/scripts/bar_drills.tcl:
--------------------------------------------------------------------------------
1 | # Script generated by DRiLLS agent
2 |
3 | read asap7.lib
4 | read bar.v
5 |
6 | strash
7 | rewrite -z
8 | balance
9 | rewrite -z
10 | refactor
11 | refactor -z
12 | refactor
13 | resub
14 | refactor -z
15 | refactor
16 | rewrite
17 | refactor -z
18 | rewrite -z
19 | resub
20 | refactor -z
21 | rewrite -z
22 | refactor
23 | balance
24 | resub -z
25 | rewrite
26 | rewrite -z
27 | rewrite
28 | rewrite
29 | rewrite -z
30 | resub
31 | resub
32 | balance
33 | rewrite
34 | resub -z
35 | balance
36 | rewrite -z
37 | rewrite -z
38 | resub -z
39 | rewrite
40 | rewrite
41 | refactor
42 | refactor
43 | refactor
44 | balance
45 | resub -z
46 | rewrite -z
47 | resub -z
48 | resub -z
49 | balance
50 | refactor -z
51 | refactor -z
52 | rewrite -z
53 |
54 | write_verilog bar_synth_drills.v
55 |
56 | map -D 800
57 | stime
58 |
--------------------------------------------------------------------------------
/scripts/div_drills.tcl:
--------------------------------------------------------------------------------
1 | # Script generated by DRiLLS agent
2 |
3 | read asap7.lib
4 | read div.v
5 |
6 | strash
7 | rewrite
8 | refactor -z
9 | refactor -z
10 | refactor -z
11 | resub
12 | rewrite -z
13 | resub -z
14 | refactor -z
15 | rewrite
16 | resub -z
17 | rewrite -z
18 | rewrite
19 | rewrite -z
20 | resub
21 | refactor
22 | resub
23 | refactor -z
24 | refactor -z
25 | refactor -z
26 | refactor
27 | refactor
28 | rewrite
29 | resub -z
30 | refactor -z
31 | resub
32 | refactor -z
33 | resub -z
34 | resub -z
35 | refactor -z
36 | resub
37 | refactor -z
38 | resub -z
39 | refactor
40 | resub -z
41 | resub -z
42 | resub
43 | refactor -z
44 | rewrite
45 | refactor -z
46 | rewrite
47 | rewrite
48 | rewrite -z
49 | resub
50 | refactor
51 | rewrite -z
52 | refactor -z
53 | refactor
54 | refactor
55 | resub
56 |
57 | write_verilog div_synth_drills.v
58 |
59 | map -D 75000
60 | stime
61 |
--------------------------------------------------------------------------------
/baseline/simulated-annealing/data.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019, SCALE Lab, Brown University
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | # ======================================================================
8 | # This file holds parameters for running a simulated annealing algorithm
9 | # that optimizes a logic synthesis flow using ABC
10 |
11 | # path of the design file in one of the accepted formats by ABC
12 | design_file: my-design.blif
13 |
14 | # the directory to hold the output of the iterations
15 | output_dir: result
16 |
17 | mapping:
18 | clock_period: 150 # in pico seconds
19 | library_file: my-library.lib
20 |
21 | iterations: 100
22 |
23 | # add more optimization to the toolbox
24 | optimizations:
25 | - rewrite
26 | - rewrite -z
27 | - refactor
28 | - refactor -z
29 | - resub
30 | - resub -z
31 | - balance
32 |
33 | # Parameters for the simulated annealing algorithm
34 | simulated_annealing:
35 | initial_temp: 3
36 | cooling_rate: 0.9
37 |
--------------------------------------------------------------------------------
/baseline/greedy/data.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019, SCALE Lab, Brown University
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | # ========================================================================
8 | # This file holds parameters for running a greedy algorithm that optimizes
9 | # a logic synthesis flow using ABC
10 |
11 | # path of the design file in one of the accepted formats by ABC
12 | design_file: my-design.blif
13 |
14 | # the directory to hold the output of the iterations
15 | output_dir: result
16 |
17 | mapping:
18 | clock_period: 150 # in pico seconds
19 | library_file: tech.lib
20 |
21 | # the number of iterations for the greedy optimization
22 | iterations: 100
23 |
24 | # add more optimization to the toolbox
25 | optimizations:
26 | - rewrite
27 | - rewrite -z
28 | - refactor
29 | - refactor -z
30 | - resub
31 | - resub -z
32 | - balance
33 |
34 | # when the greedy algorithm get stuck, it applies one of the below post-mapping commands
35 | post_mapping_commands:
36 | - dnsize -D 150
37 | - upsize -D 150
38 | - buffer
39 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | message: "If this code helped your research, please cite it as below."
3 | authors:
4 | - family-names: "Hosny"
5 | given-names: "Abdelrahman"
6 | orcid: "https://orcid.org/0000-0003-4020-7973"
7 | - family-names: "Hashemi"
8 | given-names: "Soheil"
9 | - family-names: "Shalan"
10 | given-names: "Mohamed"
11 | - family-names: "Reda"
12 | given-names: "Sherief"
13 | title: "DRiLLS: Deep Reinforcement Learning for Logic Synthesis"
14 | version: 1.0.0
15 | doi: 10.1109/ASP-DAC47756.2020.9045559
16 | date-released: 2019-11-11
17 | url: "https://github.com/scale-lab/DRiLLS"
18 | preferred-citation:
19 | type: article
20 | authors:
21 | - family-names: "Hosny"
22 | given-names: "Abdelrahman"
23 | orcid: "https://orcid.org/0000-0003-4020-7973"
24 | - family-names: "Hashemi"
25 | given-names: "Soheil"
26 | - family-names: "Shalan"
27 | given-names: "Mohamed"
28 | - family-names: "Reda"
29 | given-names: "Sherief"
30 | doi: "10.1109/ASP-DAC47756.2020.9045559"
31 | journal: "2020 25th Asia and South Pacific Design Automation Conference (ASP-DAC)"
32 | month: 9
33 | start: 581 # First page number
34 | end: 586 # Last page number
35 | title: "DRiLLS: Deep Reinforcement Learning for Logic Synthesis"
36 | year: 2020
--------------------------------------------------------------------------------
/params.yml:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019, SCALE Lab, Brown University
2 | # All rights reserved.
3 |
4 | # This source code is licensed under the BSD-style license found in the
5 | # LICENSE file in the root directory of this source tree.
6 |
7 | # ======================================================================
8 | # This file holds parameters for running a DRiLLS agent for training and
9 | # inference. It sets up the RL environment along with the logic synthesis
10 | # environment to train the RL agent.
11 |
12 | # change this to the abc binary path if the command is not recognized system-wide
13 | abc_binary: yosys-abc
14 | yosys_binary: yosys
15 |
16 | # path of the design file in one of the accepted formats by ABC
17 | design_file: design.v
18 |
19 | # standard cell library mapping
20 | mapping:
21 | clock_period: 150 # in pico seconds
22 | library_file: tech.lib
23 |
24 | # FPGA mapping - exlusive with the above
25 | fpga_mapping:
26 | levels: 100
27 | lut_inputs: 6
28 |
29 | # add more optimization to the toolbox
30 | optimizations:
31 | - rewrite
32 | - rewrite -z
33 | - refactor
34 | - refactor -z
35 | - resub
36 | - resub -z
37 | - balance
38 |
39 | # the directory to hold the playground an agent uses to practice
40 | playground_dir: playground
41 |
42 | # agent training parameters
43 | episodes: 100
44 | iterations: 50
45 | model_dir: /tmp/brain/model.ckpt # must be absolute path
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2019, SCALE Lab, Brown University
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 | .pytest_cache/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # IPython
79 | profile_default/
80 | ipython_config.py
81 |
82 | # pyenv
83 | .python-version
84 |
85 | # celery beat schedule file
86 | celerybeat-schedule
87 |
88 | # SageMath parsed files
89 | *.sage.py
90 |
91 | # Environments
92 | .env
93 | .venv
94 | env/
95 | venv/
96 | ENV/
97 | env.bak/
98 | venv.bak/
99 |
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 |
104 | # Rope project settings
105 | .ropeproject
106 |
107 | # mkdocs documentation
108 | /site
109 |
110 | # mypy
111 | .mypy_cache/
112 | .dmypy.json
113 | dmypy.json
114 |
115 | # Pyre type checker
116 | .pyre/
--------------------------------------------------------------------------------
/drills.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright (c) 2019, SCALE Lab, Brown University
4 | # All rights reserved.
5 |
6 | # This source code is licensed under the BSD-style license found in the
7 | # LICENSE file in the root directory of this source tree.
8 |
9 | import yaml
10 | import os
11 | import argparse
12 | import datetime
13 | import numpy as np
14 | import time
15 | from drills.model import A2C
16 | from drills.fixed_optimization import optimize_with_fixed_script
17 | from pyfiglet import Figlet
18 |
19 | def log(message):
20 | print('[DRiLLS {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message)
21 |
22 | class CapitalisedHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
23 | def add_usage(self, usage, actions, groups, prefix=None):
24 | if prefix is None:
25 | prefix = 'Usage: '
26 | return super(CapitalisedHelpFormatter, self).add_usage(usage, actions, groups, prefix)
27 |
28 | if __name__ == '__main__':
29 | parser = argparse.ArgumentParser(add_help=True, formatter_class=CapitalisedHelpFormatter, \
30 | description='Performs logic synthesis optimization using RL')
31 | parser._positionals.title = 'Positional arguments'
32 | parser._optionals.title = 'Optional arguments'
33 | parser.add_argument('-v', '--version', action='version', \
34 | version = 'DRiLLS v0.1', help="Shows program's version number and exit")
35 | parser.add_argument("-l", "--load_model", action='store_true', \
36 | help="Loads a saved Tensorflow model")
37 | parser.add_argument("-s", "--fixed_script", type=open, \
38 | help="Executes a fixed optimization script before DRiLLS")
39 | parser.add_argument("mode", type=str, choices=['train', 'optimize'], \
40 | help="Use the design to train the model or only optimize it")
41 | parser.add_argument("mapping", type=str, choices=['scl', 'fpga'], \
42 | help="Map to standard cell library or FPGA")
43 | parser.add_argument("params", type=open, nargs='?', default='params.yml', \
44 | help="Path to the params.yml file")
45 | args = parser.parse_args()
46 |
47 | options = yaml.load(args.params, Loader=yaml.FullLoader)
48 |
49 | f = Figlet(font='slant')
50 | print(f.renderText('DRiLLS'))
51 |
52 | if args.fixed_script:
53 | params = optimize_with_fixed_script(params, args.fixed_script)
54 |
55 | if args.mapping == 'scl':
56 | fpga_mapping = False
57 | else:
58 | fpga_mapping = True
59 |
60 | if args.mode == 'train':
61 | log('Starting to train the agent ..')
62 |
63 | all_rewards = []
64 | learner = A2C(options, load_model=args.load_model, fpga_mapping=fpga_mapping)
65 | training_start_time = time.time()
66 | for i in range(options['episodes']):
67 | log('Episode: ' + str(i+1))
68 | start = time.time()
69 | total_reward = learner.train_episode()
70 | end = time.time()
71 | all_rewards.append(total_reward)
72 | log('Episode: ' + str(i) + ' - done with total reward = ' + str(total_reward))
73 | log('Episode ' + str(i) + ' Run Time ~ ' + str((start - end) / 60) + ' minutes.')
74 | print('')
75 | training_end_time = time.time()
76 | log('Total Training Run Time ~ ' + str((training_end_time - training_start_time) / 60) + ' minutes.')
77 |
78 | mean_reward = np.mean(all_rewards[-100:])
79 | elif args.mode == 'optimize':
80 | log('Starting agent to optimize')
81 | learner = A2C(options, load_model=True)
82 | for _ in range(options['iterations']):
83 | # TODO: iteratively run the optimizer
84 | pass
--------------------------------------------------------------------------------
/drills/features.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright (c) 2019, SCALE Lab, Brown University
4 | # All rights reserved.
5 |
6 | # This source code is licensed under the BSD-style license found in the
7 | # LICENSE file in the root directory of this source tree.
8 |
9 | import re
10 | import numpy as np
11 | import datetime
12 | from multiprocessing import Process, Manager
13 | from subprocess import check_output
14 | from collections import defaultdict
15 |
16 | def log(message):
17 | print('[DRiLLS {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message)
18 |
19 | def yosys_stats(design_file, yosys_binary, stats):
20 | yosys_command = "read_verilog " + design_file + "; stat"
21 | try:
22 | proc = check_output([yosys_binary, '-QT', '-p', yosys_command])
23 | lines = proc.decode("utf-8").split('\n')
24 | for line in lines:
25 | if 'Number of wires' in line:
26 | stats['number_of_wires'] = int(line.strip().split()[-1])
27 | if 'Number of public wires' in line:
28 | stats['number_of_public_wires'] = int(line.strip().split()[-1])
29 | if 'Number of cells' in line:
30 | stats['number_of_cells'] = float(line.strip().split()[-1])
31 | if '$and' in line:
32 | stats['ands'] = float(line.strip().split()[-1])
33 | if '$or' in line:
34 | stats['ors'] = float(line.strip().split()[-1])
35 | if '$not' in line:
36 | stats['nots'] = float(line.strip().split()[-1])
37 |
38 | # catch some design special cases
39 | if 'ands' not in stats:
40 | stats['ands'] = 0.0
41 | if 'ors' not in stats:
42 | stats['ors'] = 0.0
43 | if 'nots' not in stats:
44 | stats['nots'] = 0.0
45 | except Exception as e:
46 | print(e)
47 | return None
48 | return stats
49 |
50 | def abc_stats(design_file, abc_binary, stats):
51 | abc_command = "read_verilog " + design_file + "; print_stats"
52 | try:
53 | proc = check_output([abc_binary, '-c', abc_command])
54 | lines = proc.decode("utf-8").split('\n')
55 | for line in lines:
56 | if 'i/o' in line:
57 | ob = re.search(r'i/o *= *[0-9]+ */ *[0-9]+', line)
58 | stats['input_pins'] = int(ob.group().split('=')[1].strip().split('/')[0].strip())
59 | stats['output_pins'] = int(ob.group().split('=')[1].strip().split('/')[1].strip())
60 |
61 | ob = re.search(r'edge *= *[0-9]+', line)
62 | stats['edges'] = int(ob.group().split('=')[1].strip())
63 |
64 | ob = re.search(r'lev *= *[0-9]+', line)
65 | stats['levels'] = int(ob.group().split('=')[1].strip())
66 |
67 | ob = re.search(r'lat *= *[0-9]+', line)
68 | stats['latches'] = int(ob.group().split('=')[1].strip())
69 | except Exception as e:
70 | print(e)
71 | return None
72 |
73 | return stats
74 |
75 | def extract_features(design_file, yosys_binary='yosys', abc_binary='abc'):
76 | '''
77 | Returns features of a given circuit as a tuple.
78 | Features are listed below
79 | '''
80 | manager = Manager()
81 | stats = manager.dict()
82 | p1 = Process(target=yosys_stats, args=(design_file, yosys_binary, stats))
83 | p2 = Process(target=abc_stats, args=(design_file, abc_binary, stats))
84 | p1.start()
85 | p2.start()
86 | p1.join()
87 | p2.join()
88 |
89 | # normalized features
90 | features = defaultdict(float)
91 |
92 | # (1) - number of input/output pins
93 | features['input_pins'] = stats['input_pins']
94 | features['output_pins'] = stats['output_pins']
95 |
96 | # (2) - number of nodes and edges
97 | features['number_of_nodes'] = stats['number_of_cells']
98 | features['number_of_edges'] = stats['edges']
99 |
100 | # (3) - number of levels
101 | features['number_of_levels'] = stats['levels']
102 |
103 | # (4) - number of latches
104 | features['number_of_latches'] = stats['latches']
105 |
106 | # (5) - gate types percentages
107 | features['percentage_of_ands'] = stats['ands'] / stats['number_of_cells']
108 | features['percentage_of_ors'] = stats['ors'] / stats['number_of_cells']
109 | features['percentage_of_nots'] = stats['nots'] / stats['number_of_cells']
110 |
111 | return np.array([features['input_pins'], features['output_pins'], \
112 | features['number_of_nodes'], features['number_of_edges'], \
113 | features['number_of_levels'], features['number_of_latches'], \
114 | features['percentage_of_ands'], features['percentage_of_ors'], features['percentage_of_nots']])
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DRiLLS
2 | Deep Reinforcement Learning for Logic Synthesis Optimization
3 |
4 | ## Abstract
5 | Logic synthesis requires extensive tuning of the synthesis optimization flow where the quality of results (QoR) depends on the sequence of opti-mizations used. Efficient design space exploration ischallenging due to the exponential number of possible optimization permutations. Therefore, automating the optimization process is necessary. In this work, we propose a novel reinforcement learning-based methodology that navigates the optimization space without human intervention. We demonstrate the training of an Advantage Actor Critic (A2C) agent that seeks to minimize area subject to a timing constraint. Using the proposed framework, designs can be optimized autonomously with no-humans in-loop.
6 |
7 | ## Paper
8 | DRiLLS has been presented at ASP-DAC 2020 and the manuscript is available on [IEEE Xplore](https://ieeexplore.ieee.org/abstract/document/9045559). A pre-print version is available on [arXiv](https://arxiv.org/abs/1911.04021).
9 |
10 | ## Setup
11 | DRiLLS requires `Python 3.6`, `pip3` and `virtualenv` installed on the system.
12 |
13 | 1. `virtualenv .venv --python=python3`
14 | 2. `source .venv/bin/activate`
15 | 3. `pip install -r requirements.txt`
16 |
17 | :warning: WARNING :warning:
18 |
19 | Since TensorFlow 2.x is not compatible with TensorFlow 1.x, this implementation is tested only on Python 3.6.
20 | If you have a newer version of Python, `pip` won't be able to find `tensorflow==1.x`.
21 |
22 |
23 | ## Run the agent
24 |
25 | 1. Edit `params.yml` file. Comments in the file illustrate the individual fields.
26 | 2. Run `python drills.py train scl`
27 |
28 | For help, `python drills.py -help`
29 |
30 | ## How It Works
31 |
32 |
33 | There are two major components in DRiLLS framework:
34 |
35 | * **Logic Synthesis** environment: a setup of the design space exploration problem as a reinforcement learning task. The logic synthesis environment is implemented as a session in [drills/scl_session.py](drills/scl_session.py) and [drills/fpga_session.py](drills/fpga_session.py).
36 | * **Reinforcement Learning** environment: it employs an *Advantage Actor Critic agent (A2C)* to navigate the environment searching for the best optimization at a given state. It is implemented in [drills/model.py](drills/model.py) and uses [drills/features.py](drills/features.py) to extract AIG features.
37 |
38 | DRiLLS agent exploring the design space of [Max](https://github.com/lsils/benchmarks/blob/master/arithmetic/max.v) design.
39 |
40 | 
41 |
42 | For more details on the inner-workings of the framework, see Section 4 in [the paper](https://github.com/scale-lab/DRiLLS/blob/drills-preprint/doc/preprint/DRiLLS_preprint_AH.pdf).
43 |
44 | ## Reporting Bugs
45 | Please, use [ISSUE_TEMPLATE/bug_report.md](.github/ISSUE_TEMPLATE/bug_report.md) to create an issue and describe your bug.
46 |
47 | ## Contributing
48 | Below is a list of suggested contributions you can make. Before you work on any, it is advised that you create an issue using the [ISSUE_TEMPLATE/contribution.md](.github/ISSUE_TEMPLATE/contribution.md) to tell us what you plan to work on. This ensures that your work can be merged to the `master` branch in a timely manner.
49 |
50 | ### Modernize Tensorflow Implementation
51 |
52 | Google has recently released [Dopamine](https://github.com/google/dopamine) which sets up a framework for researching reinforcement learning algorithms. A new version of DRiLLS would adopt Dopamine to make it easier to implement the model and session classes. If you are new to Dopamine and want to try it on a real use case, it would be a great fit for DRiLLS and will add a great value to our repository.
53 |
54 | ### Better Integration
55 | The current implementation interacts with the logic synthesis environment using files. This affects the run time of the agent training as it tries to extract features and statistics through files. A better integrations keeps a session of `yosys` and `abc` where the design is loaded once in the beginning and the feature extraction (and results extraction) are retrieved through this open session.
56 |
57 | ### Study An Enhanced Model
58 | The goal is to enhance the model architecture used in [drills/model.py]. An enhancement should give better results (less area **AND** meets timing constraints):
59 | * Deeper network architecure.
60 | * Changing gamma rate.
61 | * Changing learning rate.
62 | * Improve normalization.
63 |
64 | ## Citation
65 | ```
66 | @INPROCEEDINGS{9045559,
67 | author={A. {Hosny} and S. {Hashemi} and M. {Shalan} and S. {Reda}},
68 | booktitle={2020 25th Asia and South Pacific Design Automation Conference (ASP-DAC)},
69 | title={DRiLLS: Deep Reinforcement Learning for Logic Synthesis},
70 | year={2020},
71 | volume={},
72 | number={},
73 | pages={581-586},}
74 | ```
75 |
76 | ## License
77 | BSD 3-Clause License. See [LICENSE](LICENSE) file
78 |
--------------------------------------------------------------------------------
/baseline/greedy/greedy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright (c) 2019, SCALE Lab, Brown University
4 | # All rights reserved.
5 |
6 | # This source code is licensed under the BSD-style license found in the
7 | # LICENSE file in the root directory of this source tree.
8 |
9 | import yaml
10 | import os
11 | import subprocess
12 | import sys
13 | import timeit
14 | import re
15 | from joblib import Parallel, delayed
16 |
17 | data_file = sys.argv[1]
18 |
19 | with open(data_file, 'r') as f:
20 | options = yaml.load(f)
21 |
22 | start = timeit.default_timer()
23 |
24 | optimizations = options['optimizations']
25 | iterations = options['iterations']
26 | current_design_file = options['design_file']
27 | library_file = options['mapping']['library_file']
28 | clock_period = options['mapping']['clock_period']
29 | post_mapping_optimizations = options['post_mapping_commands']
30 |
31 | # Create directory if not exists
32 | if not os.path.exists(options['output_dir']):
33 | os.makedirs(options['output_dir'])
34 |
35 | def extract_results(stats):
36 | """
37 | extracts area and delay from the printed stats on stdout
38 | """
39 | line = stats.decode("utf-8").split('\n')[-2].split(':')[-1].strip()
40 |
41 | ob = re.search(r'Delay *= *[1-9]+.?[0-9]*', line)
42 | delay = float(ob.group().split('=')[1].strip())
43 | ob = re.search(r'Area *= *[1-9]+.?[0-9]*', line)
44 | area = float(ob.group().split('=')[1].strip())
45 | return delay, area
46 |
47 | def run_optimization(output_dir, optimization, design_file, library):
48 | """
49 | returns new_design_file, delay, area
50 | """
51 | output_dir = output_dir.replace(' ', '_')
52 | if not os.path.exists(output_dir):
53 | os.makedirs(output_dir)
54 | output_design_file = output_dir + '/design.blif'
55 |
56 | abc_command = 'read ' + library + '; '
57 | abc_command += 'read ' + design_file + '; '
58 | abc_command += 'strash; '
59 | abc_command += optimization + '; '
60 | abc_command += 'write ' + output_design_file + '; '
61 | abc_command += 'map -D ' + str(clock_period) + '; '
62 | abc_command += 'topo; stime; '
63 |
64 | proc = subprocess.check_output(['yosys-abc','-c', abc_command])
65 | d, a = extract_results(proc)
66 | return output_design_file, d, a
67 |
68 | def save_optimization_step(iteration, optimization, delay, area):
69 | """
70 | saves the winning optimization to a csv file
71 | """
72 | with open(os.path.join(options['output_dir'], 'results.csv'), 'a') as f:
73 | data_point = str(iteration) + ', ' + str(optimization) + ', '
74 | data_point += str(delay) + ', ' + str(area) + '\n'
75 | f.write(data_point)
76 |
77 | def log(message=''):
78 | print(message)
79 | with open(os.path.join(options['output_dir'], 'greedy.log'), 'a') as f:
80 | f.write(message + '\n')
81 |
82 | def run_post_mapping(output_dir, optimization, design_file, library):
83 | """
84 | returns new_design_file, delay, area
85 | """
86 | output_dir = output_dir.replace(' ', '_')
87 | if not os.path.exists(output_dir):
88 | os.makedirs(output_dir)
89 | output_design_file = output_dir + '/design.blif'
90 |
91 | abc_command = 'read ' + library + '; '
92 | abc_command += 'read ' + design_file + '; '
93 | abc_command += 'strash; '
94 | abc_command += 'map -D ' + str(clock_period) + '; '
95 | abc_command += optimization + ';'
96 | abc_command += 'write ' + output_design_file + '; '
97 | abc_command += 'print_stats; '
98 | proc = subprocess.check_output(['yosys-abc','-c', abc_command])
99 | d, a = extract_results(proc)
100 | return output_design_file, d, a
101 |
102 | def run_thread(iteration_dir, design_file, opt):
103 | opt_dir = os.path.join(iteration_dir, opt)
104 | opt_file, delay, area = run_optimization(opt_dir, opt,
105 | design_file,
106 | library_file)
107 | log('Optimization: ' + opt + ' -> delay: ' + str(delay) + ', area: ' + str(area))
108 | return (opt, opt_file, delay, area)
109 |
110 | def run_thread_post_mapping(iteration_dir, design_file, opt):
111 | opt_dir = os.path.join(iteration_dir, opt)
112 | opt_file, delay, area = run_post_mapping(opt_dir, opt,
113 | design_file,
114 | library_file)
115 | log('Optimization: ' + opt + ' -> delay: ' + str(delay) + ', area: ' + str(area))
116 | return (opt, opt_file, delay, area)
117 |
118 | # main optimizing iteration
119 | previous_area = None
120 | for i in range(iterations):
121 | # log
122 | log('Iteration: ' + str(i+1))
123 | log('-------------')
124 |
125 | # create a directory for this iteration
126 | iteration_dir = os.path.join(options['output_dir'], str(i))
127 | if not os.path.exists(iteration_dir):
128 | os.makedirs(iteration_dir)
129 |
130 | # in parallel, run ABC on each of the optimizations we have
131 | results = Parallel(n_jobs=len(optimizations))(delayed(run_thread)(iteration_dir, current_design_file, opt) for opt in optimizations)
132 |
133 | # get the minimum result of all threads
134 | best_thread = min(results, key = lambda t: t[3]) # getting minimum for delay (index=2) or area (index=3)
135 |
136 | # hold the best result in variables
137 | best_optimization = best_thread[0]
138 | best_optimization_file = best_thread[1]
139 | best_delay = best_thread[2]
140 | best_area = best_thread[3]
141 |
142 |
143 | if best_area == previous_area:
144 | # break for now
145 | log('Looks like the best area is exactly the same as last iteration!')
146 | log('Continue anyway ..')
147 | log('Choosing Optimization: ' + best_optimization + ' -> delay: ' + str(best_delay) + ', area: ' + str(best_area))
148 | save_optimization_step(i, best_optimization, best_delay, best_area)
149 |
150 | log()
151 |
152 | # update design file for the next iteration
153 | current_design_file = best_optimization_file
154 | log('================')
155 | log()
156 | continue
157 |
158 | log()
159 | log('Looks like the best area is exactly the same as last iteration!')
160 | log('Performing post mapping optimizations ..')
161 | # run post mapping optimization
162 | results = Parallel(n_jobs=len(post_mapping_optimizations))(delayed(run_thread_post_mapping)(iteration_dir, current_design_file, opt) for opt in post_mapping_optimizations)
163 |
164 | # get the minimum result of all threads
165 | best_thread = min(results, key = lambda t: t[3]) # getting minimum for delay (index=2) or area (index=3)
166 |
167 | # hold the best result in variables
168 | best_optimization = best_thread[0]
169 | best_optimization_file = best_thread[1]
170 | best_delay = best_thread[2]
171 | best_area = best_thread[3]
172 | previous_area = None
173 | else:
174 | previous_area = best_area
175 |
176 | # save results
177 | log()
178 | log('Choosing Optimization: ' + best_optimization + ' -> delay: ' + str(best_delay) + ', area: ' + str(best_area))
179 | save_optimization_step(i, best_optimization, best_delay, best_area)
180 |
181 | # update design file for the next iteration
182 | current_design_file = best_optimization_file
183 | log('================')
184 | log()
185 |
186 | stop = timeit.default_timer()
187 |
188 | log('Total Optimization Time: ' + str(stop - start))
--------------------------------------------------------------------------------
/drills/scl_session.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright (c) 2019, SCALE Lab, Brown University
4 | # All rights reserved.
5 |
6 | # This source code is licensed under the BSD-style license found in the
7 | # LICENSE file in the root directory of this source tree.
8 |
9 | import os
10 | import re
11 | import datetime
12 | import numpy as np
13 | from subprocess import check_output
14 | from .features import extract_features
15 |
16 | def log(message):
17 | print('[DRiLLS {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message)
18 |
19 | class SCLSession:
20 | """
21 | A class to represent a logic synthesis optimization session using ABC
22 | """
23 | def __init__(self, params):
24 | self.params = params
25 |
26 | self.action_space_length = len(self.params['optimizations'])
27 | self.observation_space_size = 9 # number of features
28 |
29 | self.iteration = 0
30 | self.episode = 0
31 | self.sequence = ['strash']
32 | self.delay, self.area = float('inf'), float('inf')
33 |
34 | self.best_known_area = (float('inf'), float('inf'), -1, -1)
35 | self.best_known_delay = (float('inf'), float('inf'), -1, -1)
36 | self.best_known_area_meets_constraint = (float('inf'), float('inf'), -1, -1)
37 |
38 | # logging
39 | self.log = None
40 |
41 | def __del__(self):
42 | if self.log:
43 | self.log.close()
44 |
45 | def reset(self):
46 | """
47 | resets the environment and returns the state
48 | """
49 | self.iteration = 0
50 | self.episode += 1
51 | self.delay, self.area = float('inf'), float('inf')
52 | self.sequence = ['strash']
53 | self.episode_dir = os.path.join(self.params['playground_dir'], str(self.episode))
54 | if not os.path.exists(self.episode_dir):
55 | os.makedirs(self.episode_dir)
56 |
57 | # logging
58 | log_file = os.path.join(self.episode_dir, 'log.csv')
59 | if self.log:
60 | self.log.close()
61 | self.log = open(log_file, 'w')
62 | self.log.write('iteration, optimization, area, delay, best_area_meets_constraint, best_area, best_delay\n')
63 |
64 | state, _ = self._run()
65 |
66 | # logging
67 | self.log.write(', '.join([str(self.iteration), self.sequence[-1], str(self.area), str(self.delay)]) + '\n')
68 | self.log.flush()
69 |
70 | return state
71 |
72 | def step(self, optimization):
73 | """
74 | accepts optimization index and returns (new state, reward, done, info)
75 | """
76 | self.sequence.append(self.params['optimizations'][optimization])
77 | new_state, reward = self._run()
78 |
79 | # logging
80 | if self.area < self.best_known_area[0]:
81 | self.best_known_area = (self.area, self.delay, self.episode, self.iteration)
82 | if self.delay < self.best_known_delay[1]:
83 | self.best_known_delay = (self.area, self.delay, self.episode, self.iteration)
84 | if self.delay <= self.params['mapping']['clock_period'] and self.area < self.best_known_area_meets_constraint[0]:
85 | self.best_known_area_meets_constraint = (self.area, self.delay, self.episode, self.iteration)
86 | self.log.write(', '.join([str(self.iteration), self.sequence[-1], str(self.area), str(self.delay)]) + ', ' +
87 | '; '.join(list(map(str, self.best_known_area_meets_constraint))) + ', ' +
88 | '; '.join(list(map(str, self.best_known_area))) + ', ' +
89 | '; '.join(list(map(str, self.best_known_delay))) + '\n')
90 | self.log.flush()
91 |
92 | return new_state, reward, self.iteration == self.params['iterations'], None
93 |
94 | def _run(self):
95 | """
96 | run ABC on the given design file with the sequence of commands
97 | """
98 | self.iteration += 1
99 | output_design_file = os.path.join(self.episode_dir, str(self.iteration) + '.v')
100 | output_design_file_mapped = os.path.join(self.episode_dir, str(self.iteration) + '-mapped.v')
101 |
102 | abc_command = 'read ' + self.params['mapping']['library_file'] + '; '
103 | abc_command += 'read ' + self.params['design_file'] + '; '
104 | abc_command += ';'.join(self.sequence) + '; '
105 | abc_command += 'write ' + output_design_file + '; '
106 | abc_command += 'map -D ' + str(self.params['mapping']['clock_period']) + '; '
107 | abc_command += 'write ' + output_design_file_mapped + '; '
108 | abc_command += 'topo; stime;'
109 |
110 | try:
111 | proc = check_output([self.params['abc_binary'], '-c', abc_command])
112 | # get reward
113 | delay, area = self._get_metrics(proc)
114 | reward = self._get_reward(delay, area)
115 | self.delay, self.area = delay, area
116 | # get new state of the circuit
117 | state = self._get_state(output_design_file)
118 | return state, reward
119 | except Exception as e:
120 | print(e)
121 | return None, None
122 |
123 | def _get_metrics(self, stats):
124 | """
125 | parse delay and area from the stats command of ABC
126 | """
127 | line = stats.decode("utf-8").split('\n')[-2].split(':')[-1].strip()
128 |
129 | ob = re.search(r'Delay *= *[0-9]+.?[0-9]*', line)
130 | delay = float(ob.group().split('=')[1].strip())
131 |
132 | ob = re.search(r'Area *= *[0-9]+.?[0-9]*', line)
133 | area = float(ob.group().split('=')[1].strip())
134 |
135 | return delay, area
136 |
137 | def _get_reward(self, delay, area):
138 | constraint_met = True
139 | optimization_improvement = 0 # (-1, 0, 1) <=> (worse, same, improvement)
140 | constraint_improvement = 0 # (-1, 0, 1) <=> (worse, same, improvement)
141 |
142 | # check optimizing parameter
143 | if area < self.area:
144 | optimization_improvement = 1
145 | elif area == self.area:
146 | optimization_improvement = 0
147 | else:
148 | optimization_improvement = -1
149 |
150 | # check constraint parameter
151 | if delay > self.params["mapping"]["clock_period"]:
152 | constraint_met = False
153 | if delay < self.delay:
154 | constraint_improvement = 1
155 | elif delay == self.delay:
156 | constraint_improvement = 0
157 | else:
158 | constraint_improvement = -1
159 |
160 | # now calculate the reward
161 | return self._reward_table(constraint_met, constraint_improvement, optimization_improvement)
162 |
163 | def _reward_table(self, constraint_met, contraint_improvement, optimization_improvement):
164 | return {
165 | True: {
166 | 0: {
167 | 1: 3,
168 | 0: 0,
169 | -1: -1
170 | }
171 | },
172 | False: {
173 | 1: {
174 | 1: 3,
175 | 0: 2,
176 | -1: 1
177 | },
178 | 0: {
179 | 1: 2,
180 | 0: 0,
181 | -1: -2
182 | },
183 | -1: {
184 | 1: -1,
185 | 0: -2,
186 | -1: -3
187 | }
188 | }
189 | }[constraint_met][contraint_improvement][optimization_improvement]
190 |
191 | def _get_state(self, design_file):
192 | return extract_features(design_file, self.params['yosys_binary'], self.params['abc_binary'])
193 |
--------------------------------------------------------------------------------
/drills/fpga_session.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright (c) 2019, SCALE Lab, Brown University
4 | # All rights reserved.
5 |
6 | # This source code is licensed under the BSD-style license found in the
7 | # LICENSE file in the root directory of this source tree.
8 |
9 | import os
10 | import re
11 | import datetime
12 | import numpy as np
13 | from subprocess import check_output
14 | from .features import extract_features
15 |
16 | def log(message):
17 | print('[DRiLLS {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message)
18 |
19 | class FPGASession:
20 | """
21 | A class to represent a logic synthesis optimization session using ABC
22 | """
23 | def __init__(self, params):
24 | self.params = params
25 |
26 | self.action_space_length = len(self.params['optimizations'])
27 | self.observation_space_size = 9 # number of features
28 |
29 | self.iteration = 0
30 | self.episode = 0
31 | self.sequence = ['strash']
32 | self.lut_6, self.levels = float('inf'), float('inf')
33 |
34 | self.best_known_lut_6 = (float('inf'), float('inf'), -1, -1)
35 | self.best_known_levels = (float('inf'), float('inf'), -1, -1)
36 | self.best_known_lut_6_meets_constraint = (float('inf'), float('inf'), -1, -1)
37 |
38 | # logging
39 | self.log = None
40 |
41 | def __del__(self):
42 | if self.log:
43 | self.log.close()
44 |
45 | def reset(self):
46 | """
47 | resets the environment and returns the state
48 | """
49 | self.iteration = 0
50 | self.episode += 1
51 | self.lut_6, self.levels = float('inf'), float('inf')
52 | self.sequence = ['strash']
53 | self.episode_dir = os.path.join(self.params['playground_dir'], str(self.episode))
54 | if not os.path.exists(self.episode_dir):
55 | os.makedirs(self.episode_dir)
56 |
57 | # logging
58 | log_file = os.path.join(self.episode_dir, 'log.csv')
59 | if self.log:
60 | self.log.close()
61 | self.log = open(log_file, 'w')
62 | self.log.write('iteration, optimization, LUT-6, Levels, best LUT-6 meets constraint, best LUT-6, best levels\n')
63 |
64 | state, _ = self._run()
65 |
66 | # logging
67 | self.log.write(', '.join([str(self.iteration), self.sequence[-1], str(int(self.lut_6)), str(int(self.levels))]) + '\n')
68 | self.log.flush()
69 |
70 | return state
71 |
72 | def step(self, optimization):
73 | """
74 | accepts optimization index and returns (new state, reward, done, info)
75 | """
76 | self.sequence.append(self.params['optimizations'][optimization])
77 | new_state, reward = self._run()
78 |
79 | # logging
80 | if self.lut_6 < self.best_known_lut_6[0]:
81 | self.best_known_lut_6 = (int(self.lut_6), int(self.levels), self.episode, self.iteration)
82 | if self.levels < self.best_known_levels[1]:
83 | self.best_known_levels = (int(self.lut_6), int(self.levels), self.episode, self.iteration)
84 | if self.levels <= self.params['fpga_mapping']['levels'] and self.lut_6 < self.best_known_lut_6_meets_constraint[0]:
85 | self.best_known_lut_6_meets_constraint = (int(self.lut_6), int(self.levels), self.episode, self.iteration)
86 | self.log.write(', '.join([str(self.iteration), self.sequence[-1], str(int(self.lut_6)), str(int(self.levels))]) + ', ' +
87 | '; '.join(list(map(str, self.best_known_lut_6_meets_constraint))) + ', ' +
88 | '; '.join(list(map(str, self.best_known_lut_6))) + ', ' +
89 | '; '.join(list(map(str, self.best_known_levels))) + '\n')
90 | self.log.flush()
91 |
92 | return new_state, reward, self.iteration == self.params['iterations'], None
93 |
94 | def _run(self):
95 | """
96 | run ABC on the given design file with the sequence of commands
97 | """
98 | self.iteration += 1
99 | output_design_file = os.path.join(self.episode_dir, str(self.iteration) + '.v')
100 | output_design_file_mapped = os.path.join(self.episode_dir, str(self.iteration) + '-mapped.v')
101 |
102 | abc_command = 'read ' + self.params['design_file'] + '; '
103 | abc_command += ';'.join(self.sequence) + '; '
104 | abc_command += 'write ' + output_design_file + '; '
105 | abc_command += 'if -K ' + str(self.params['fpga_mapping']['lut_inputs']) + '; '
106 | abc_command += 'write ' + output_design_file_mapped + '; '
107 | abc_command += 'print_stats;'
108 |
109 | try:
110 | proc = check_output([self.params['abc_binary'], '-c', abc_command])
111 | # get reward
112 | lut_6, levels = self._get_metrics(proc)
113 | reward = self._get_reward(lut_6, levels)
114 | self.lut_6, self.levels = lut_6, levels
115 | # get new state of the circuit
116 | state = self._get_state(output_design_file)
117 | return state, reward
118 | except Exception as e:
119 | print(e)
120 | return None, None
121 |
122 | def _get_metrics(self, stats):
123 | """
124 | parse LUT count and levels from the stats command of ABC
125 | """
126 | line = stats.decode("utf-8").split('\n')[-2].split(':')[-1].strip()
127 |
128 | ob = re.search(r'lev *= *[0-9]+', line)
129 | levels = int(ob.group().split('=')[1].strip())
130 |
131 | ob = re.search(r'nd *= *[0-9]+', line)
132 | lut_6 = int(ob.group().split('=')[1].strip())
133 |
134 | return lut_6, levels
135 |
136 | def _get_reward(self, lut_6, levels):
137 | constraint_met = True
138 | optimization_improvement = 0 # (-1, 0, 1) <=> (worse, same, improvement)
139 | constraint_improvement = 0 # (-1, 0, 1) <=> (worse, same, improvement)
140 |
141 | # check optimizing parameter
142 | if lut_6 < self.lut_6:
143 | optimization_improvement = 1
144 | elif lut_6 == self.lut_6:
145 | optimization_improvement = 0
146 | else:
147 | optimization_improvement = -1
148 |
149 | # check constraint parameter
150 | if levels > self.params["fpga_mapping"]["levels"]:
151 | constraint_met = False
152 | if levels < self.levels:
153 | constraint_improvement = 1
154 | elif levels == self.levels:
155 | constraint_improvement = 0
156 | else:
157 | constraint_improvement = -1
158 |
159 | # now calculate the reward
160 | return self._reward_table(constraint_met, constraint_improvement, optimization_improvement)
161 |
162 | def _reward_table(self, constraint_met, contraint_improvement, optimization_improvement):
163 | return {
164 | True: {
165 | 0: {
166 | 1: 3,
167 | 0: 0,
168 | -1: -1
169 | }
170 | },
171 | False: {
172 | 1: {
173 | 1: 3,
174 | 0: 2,
175 | -1: 1
176 | },
177 | 0: {
178 | 1: 2,
179 | 0: 0,
180 | -1: -2
181 | },
182 | -1: {
183 | 1: -1,
184 | 0: -2,
185 | -1: -3
186 | }
187 | }
188 | }[constraint_met][contraint_improvement][optimization_improvement]
189 |
190 | def _get_state(self, design_file):
191 | return extract_features(design_file, self.params['yosys_binary'], self.params['abc_binary'])
192 |
--------------------------------------------------------------------------------
/baseline/simulated-annealing/simulated-annealing.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright (c) 2019, SCALE Lab, Brown University
4 | # All rights reserved.
5 |
6 | # This source code is licensed under the BSD-style license found in the
7 | # LICENSE file in the root directory of this source tree.
8 |
9 | import yaml
10 | import os
11 | import subprocess
12 | import sys
13 | import timeit
14 | import re
15 | import random
16 | import math
17 | from joblib import Parallel, delayed
18 |
19 | data_file = sys.argv[1]
20 |
21 | with open(data_file, 'r') as f:
22 | options = yaml.load(f)
23 |
24 | start = timeit.default_timer()
25 |
26 | optimizations = options['optimizations']
27 | iterations = options['iterations']
28 | current_design_file = options['design_file']
29 | library_file = options['mapping']['library_file']
30 | clock_period = options['mapping']['clock_period']
31 | # post_mapping_optimizations = options['post_mapping_commands']
32 |
33 | temperature = options['simulated_annealing']['initial_temp']
34 | cooling_rate = options['simulated_annealing']['cooling_rate']
35 |
36 | # Create directory if not exists
37 | if not os.path.exists(options['output_dir']):
38 | os.makedirs(options['output_dir'])
39 |
40 | def extract_results(stats):
41 | """
42 | extracts area and delay from the printed stats on stdout
43 | """
44 | line = stats.decode("utf-8").split('\n')[-2].split(':')[-1].strip()
45 | ob = re.search(r'delay *= *[1-9]+.?[0-9]+', line)
46 | delay = float(ob.group().split('=')[1].strip())
47 | ob = re.search(r'area *= *[1-9]+.?[0-9]+', line)
48 | area = float(ob.group().split('=')[1].strip())
49 | return delay, area
50 |
51 | def run_optimization(output_dir, optimization, design_file, library):
52 | """
53 | returns new_design_file, delay, area
54 | """
55 | output_dir = output_dir.replace(' ', '_')
56 | if not os.path.exists(output_dir):
57 | os.makedirs(output_dir)
58 | output_design_file = output_dir + '/design.blif'
59 |
60 | abc_command = 'read ' + library + '; '
61 | abc_command += 'read ' + design_file + '; '
62 | abc_command += 'strash; '
63 | abc_command += optimization + '; '
64 | abc_command += 'write ' + output_design_file + '; '
65 | abc_command += 'map -D ' + str(clock_period) + '; '
66 | abc_command += 'print_stats; '
67 |
68 | proc = subprocess.check_output(['yosys-abc','-c', abc_command])
69 | d, a = extract_results(proc)
70 | return output_design_file, d, a
71 |
72 | def save_optimization_step(iteration, optimization, delay, area):
73 | """
74 | saves the winning optimization to a csv file
75 | """
76 | with open(os.path.join(options['output_dir'], 'results.csv'), 'a') as f:
77 | data_point = str(iteration) + ', ' + str(optimization) + ', '
78 | data_point += str(delay) + ', ' + str(area) + '\n'
79 | f.write(data_point)
80 |
81 | def log(message=''):
82 | print(message)
83 | with open(os.path.join(options['output_dir'], 'greedy.log'), 'a') as f:
84 | f.write(message + '\n')
85 |
86 | def run_post_mapping(output_dir, optimization, design_file, library):
87 | """
88 | returns new_design_file, delay, area
89 | """
90 | output_dir = output_dir.replace(' ', '_')
91 | if not os.path.exists(output_dir):
92 | os.makedirs(output_dir)
93 | output_design_file = output_dir + '/design.blif'
94 |
95 | abc_command = 'read ' + library + '; '
96 | abc_command += 'read ' + design_file + '; '
97 | abc_command += 'strash; '
98 | abc_command += 'map -D ' + str(clock_period) + '; '
99 | abc_command += optimization + ';'
100 | abc_command += 'write ' + output_design_file + '; '
101 | abc_command += 'print_stats; '
102 | proc = subprocess.check_output(['yosys-abc','-c', abc_command])
103 | d, a = extract_results(proc)
104 | return output_design_file, d, a
105 |
106 | def run_thread(iteration_dir, design_file, opt):
107 | opt_dir = os.path.join(iteration_dir, opt)
108 | opt_file, delay, area = run_optimization(opt_dir, opt,
109 | design_file,
110 | library_file)
111 | log('Optimization: ' + opt + ' -> delay: ' + str(delay) + ', area: ' + str(area))
112 | return (opt, opt_file, delay, area)
113 |
114 | def run_thread_post_mapping(iteration_dir, design_file, opt):
115 | opt_dir = os.path.join(iteration_dir, opt)
116 | opt_file, delay, area = run_post_mapping(opt_dir, opt,
117 | design_file,
118 | library_file)
119 | log('Optimization: ' + opt + ' -> delay: ' + str(delay) + ', area: ' + str(area))
120 | return (opt, opt_file, delay, area)
121 |
122 | i = 0
123 | # run the optimization once to set the initial energy (delay) of the system
124 | log('Initializing annealing ..')
125 | log('Current temperature: ' + str(temperature))
126 | log('----------------')
127 | iteration_dir = os.path.join(options['output_dir'], str(i))
128 | if not os.path.exists(iteration_dir):
129 | os.makedirs(iteration_dir)
130 | # Pick an optimization at random
131 | random_optimization = 'strash' # a command that does no optimization
132 | result = run_thread(iteration_dir, current_design_file, random_optimization)
133 | opt_file = result[1]
134 | delay = result[2]
135 | area = result[3]
136 | # accept it to set the energe of the system in the beginning
137 | save_optimization_step(i, random_optimization, delay, area)
138 | current_design_file = opt_file
139 | previous_delay = delay
140 | i += 1
141 |
142 | log('System initialized with delay: ' + str(delay))
143 | log('Starting annealing ..')
144 | log()
145 |
146 | # main optimizing iteration
147 | while True:
148 | number_of_accepted_optimizations = 0
149 |
150 | for _ in range(100):
151 | # if we accept 10 optimizations, we cool down the system
152 | # otherwise, only continue up to 100 trials for this temperature
153 |
154 | # log
155 | log('Iteration: ' + str(i))
156 | log('Temperature: ' + str(temperature))
157 | log('----------------')
158 |
159 | # create a directory for this iteration
160 | iteration_dir = os.path.join(options['output_dir'], str(i))
161 | if not os.path.exists(iteration_dir):
162 | os.makedirs(iteration_dir)
163 |
164 | # Pick an optimization at random
165 | random_optimization = random.choice(optimizations)
166 | result = run_thread(iteration_dir, current_design_file, random_optimization)
167 | opt_file = result[1]
168 | delay = result[2]
169 | area = result[3]
170 |
171 | # if better than the previous delay, accept. Otherwise, accept with probability
172 | if delay < previous_delay:
173 | log('The optimization reduced the delay!')
174 | log('Accepting it ..')
175 | save_optimization_step(i, random_optimization, delay, area)
176 | current_design_file = opt_file
177 | previous_delay = delay
178 | number_of_accepted_optimizations += 1
179 | else:
180 | delta_delay = delay - previous_delay
181 | probability_of_acceptance = math.exp((- delta_delay) / temperature)
182 | log('The optimization didn\'t reduce the delay, the system looks to be still hot.')
183 | log('The probability of acceptance is: ' + str(probability_of_acceptance))
184 | log('Uniformly generating a number to see if we accept it ..')
185 | if random.uniform(0, 1.0) < probability_of_acceptance:
186 | log('Accepting it ..')
187 | save_optimization_step(i, random_optimization, delay, area)
188 | current_design_file = opt_file
189 | previous_delay = delay
190 | number_of_accepted_optimizations += 1
191 | else:
192 | log('Rejected ..')
193 | pass
194 | i += 1
195 | log()
196 |
197 | if number_of_accepted_optimizations == 10:
198 | break
199 |
200 | if temperature <= 0.1:
201 | log('System has sufficiently cooled down ..')
202 | log('Shutting down simulation ..')
203 | log()
204 | break
205 |
206 | new_temperature = temperature * cooling_rate
207 | log('Cooling down system from ' + str(temperature) + ' to ' + str(new_temperature) + ' ..')
208 | temperature = new_temperature
209 | log('================')
210 | log()
211 |
212 | stop = timeit.default_timer()
213 |
214 | log('Total Optimization Time: ' + str(stop - start))
215 |
--------------------------------------------------------------------------------
/drills/model.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | # Copyright (c) 2019, SCALE Lab, Brown University
4 | # All rights reserved.
5 |
6 | # This source code is licensed under the BSD-style license found in the
7 | # LICENSE file in the root directory of this source tree.
8 |
9 | import tensorflow as tf
10 | import numpy as np
11 | import datetime
12 | import time
13 | from .scl_session import SCLSession as SCLGame
14 | from .fpga_session import FPGASession as FPGAGame
15 |
16 | def log(message):
17 | print('[DRiLLS {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message)
18 |
19 | class Normalizer():
20 | def __init__(self, num_inputs):
21 | self.num_inputs = num_inputs
22 | self.n = tf.zeros(num_inputs)
23 | self.mean = tf.zeros(num_inputs)
24 | self.mean_diff = tf.zeros(num_inputs)
25 | self.var = tf.zeros(num_inputs)
26 |
27 | def observe(self, x):
28 | self.n += 1.
29 | last_mean = tf.identity(self.mean)
30 | self.mean += (x-self.mean)/self.n
31 | self.mean_diff += (x-last_mean)*(x-self.mean)
32 | self.var = tf.clip_by_value(self.mean_diff/self.n, clip_value_min=1e-2, clip_value_max=1000000000)
33 |
34 | def normalize(self, inputs):
35 | obs_std = tf.sqrt(self.var)
36 | return (inputs - self.mean)/obs_std
37 |
38 | def reset(self):
39 | self.n = tf.zeros(self.num_inputs)
40 | self.mean = tf.zeros(self.num_inputs)
41 | self.mean_diff = tf.zeros(self.num_inputs)
42 | self.var = tf.zeros(self.num_inputs)
43 |
44 | class A2C:
45 | def __init__(self, options, load_model=False, fpga_mapping=False):
46 | if fpga_mapping:
47 | self.game = FPGAGame(options)
48 | else:
49 | self.game = SCLGame(options)
50 |
51 | self.num_actions = self.game.action_space_length
52 | self.state_size = self.game.observation_space_size
53 | self.normalizer = Normalizer(self.state_size)
54 |
55 | self.state_input = tf.placeholder(tf.float32, [None, self.state_size])
56 |
57 | # Define any additional placeholders needed for training your agent here:
58 | self.actions = tf.placeholder(tf.float32, [None, self.num_actions])
59 | self.discounted_episode_rewards_ = tf.placeholder(tf.float32, [None, ])
60 |
61 | self.state_value = self.critic()
62 | self.actor_probs = self.actor()
63 | self.loss_val = self.loss()
64 | self.train_op = self.optimizer()
65 | self.session = tf.Session()
66 |
67 | # model saving/restoring
68 | self.model_dir = options['model_dir']
69 | self.saver = tf.train.Saver()
70 |
71 | if load_model:
72 | self.saver.restore(self.session, self.model_dir)
73 | log("Model restored.")
74 | else:
75 | self.session.run(tf.global_variables_initializer())
76 |
77 | self.gamma = 0.99
78 | self.learning_rate = 0.01
79 |
80 | def optimizer(self):
81 | """
82 | :return: Optimizer for your loss function
83 | """
84 | return tf.train.AdamOptimizer(0.01).minimize(self.loss_val)
85 |
86 | def critic(self):
87 | """
88 | Calculates the estimated value for every state in self.state_input. The critic should not depend on
89 | any other tensors besides self.state_input.
90 | :return: A tensor of shape [num_states] representing the estimated value of each state in the trajectory.
91 | """
92 | c_fc1 = tf.contrib.layers.fully_connected(inputs=self.state_input,
93 | num_outputs=10,
94 | activation_fn=tf.nn.relu,
95 | weights_initializer=tf.contrib.layers.xavier_initializer())
96 |
97 |
98 | c_fc2 = tf.contrib.layers.fully_connected(inputs=c_fc1,
99 | num_outputs=1,
100 | activation_fn=None,
101 | weights_initializer=tf.contrib.layers.xavier_initializer())
102 |
103 | return c_fc2
104 |
105 | def actor(self):
106 | """
107 | Calculates the action probabilities for every state in self.state_input. The actor should not depend on
108 | any other tensors besides self.state_input.
109 | :return: A tensor of shape [num_states, num_actions] representing the probability distribution
110 | over actions that is generated by your actor.
111 | """
112 | a_fc1 = tf.contrib.layers.fully_connected(inputs=self.state_input,
113 | num_outputs=20,
114 | activation_fn=tf.nn.relu,
115 | weights_initializer=tf.contrib.layers.xavier_initializer())
116 |
117 | a_fc2 = tf.contrib.layers.fully_connected(inputs=a_fc1,
118 | num_outputs=20,
119 | activation_fn=tf.nn.relu,
120 | weights_initializer=tf.contrib.layers.xavier_initializer())
121 |
122 | a_fc3 = tf.contrib.layers.fully_connected(inputs=a_fc2,
123 | num_outputs=self.num_actions,
124 | activation_fn=None,
125 | weights_initializer=tf.contrib.layers.xavier_initializer())
126 |
127 | return tf.nn.softmax(a_fc3)
128 |
129 | def loss(self):
130 | """
131 | :return: A scalar tensor representing the combined actor and critic loss.
132 | """
133 | # critic loss
134 | advantage = self.discounted_episode_rewards_ - self.state_value
135 | critic_loss = tf.reduce_sum(tf.square(advantage))
136 |
137 | # actor loss
138 | neg_log_prob = tf.nn.softmax_cross_entropy_with_logits_v2(logits=tf.log(self.actor_probs),
139 | labels=self.actions)
140 | actor_loss = tf.reduce_sum(neg_log_prob * advantage)
141 |
142 | neg_log_prob = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.actor_probs,
143 | labels=self.actions)
144 | policy_gradient_loss = tf.reduce_mean(neg_log_prob * self.discounted_episode_rewards_)
145 | # return policy_gradient_loss
146 |
147 | return critic_loss + actor_loss
148 |
149 | def save_model(self):
150 | save_path = self.saver.save(self.session, self.model_dir)
151 | log("Model saved in path: %s" % str(save_path))
152 |
153 | def train_episode(self):
154 | """
155 | train_episode will be called several times by the drills.py to train the agent. In this method,
156 | we run the agent for a single episode, then use that data to train the agent.
157 | """
158 | state = self.game.reset()
159 | self.normalizer.reset()
160 | self.normalizer.observe(state)
161 | state = self.normalizer.normalize(state).eval(session=self.session)
162 | done = False
163 |
164 | episode_states = []
165 | episode_actions = []
166 | episode_rewards = []
167 |
168 | while not done:
169 | log(' iteration: ' + str(self.game.iteration))
170 | action_probability_distribution = self.session.run(self.actor_probs, \
171 | feed_dict={self.state_input: state.reshape([1, self.state_size])})
172 | action = np.random.choice(range(action_probability_distribution.shape[1]), \
173 | p=action_probability_distribution.ravel())
174 | new_state, reward, done, _ = self.game.step(action)
175 |
176 | # append this step
177 | episode_states.append(state)
178 | action_ = np.zeros(self.num_actions)
179 | action_[action] = 1
180 | episode_actions.append(action_)
181 | episode_rewards.append(reward)
182 |
183 | state = new_state
184 | self.normalizer.observe(state)
185 | state = self.normalizer.normalize(state).eval(session=self.session)
186 |
187 | # Now that we have run the episode, we use this data to train the agent
188 | start = time.time()
189 | discounted_episode_rewards = self.discount_and_normalize_rewards(episode_rewards)
190 |
191 | _ = self.session.run(self.train_op, feed_dict={self.state_input: np.array(episode_states), \
192 | self.actions: np.array(episode_actions), \
193 | self.discounted_episode_rewards_: discounted_episode_rewards})
194 | end = time.time()
195 | log('Episode Agent Training Time ~ ' + str((start - end) / 60) + ' minutes.')
196 |
197 | self.save_model()
198 |
199 | return np.sum(episode_rewards)
200 |
201 | def discount_and_normalize_rewards(self, episode_rewards):
202 | """
203 | used internally to calculate the discounted episode rewards
204 | """
205 | discounted_episode_rewards = np.zeros_like(episode_rewards)
206 | cumulative = 0.0
207 | for i in reversed(range(len(episode_rewards))):
208 | cumulative = cumulative * self.gamma + episode_rewards[i]
209 | discounted_episode_rewards[i] = cumulative
210 |
211 | mean = np.mean(discounted_episode_rewards)
212 | std = np.std(discounted_episode_rewards)
213 |
214 | discounted_episode_rewards = (discounted_episode_rewards - mean) / std
215 |
216 | return discounted_episode_rewards
217 |
218 |
--------------------------------------------------------------------------------