├── nativerl
    ├── python
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── factory
    │   │   │   ├── __init__.py
    │   │   │   ├── util
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── masking.py
    │   │   │   │   └── writer.py
    │   │   │   ├── config.py
    │   │   │   ├── controls.py
    │   │   │   ├── config.yml
    │   │   │   └── simulation.py
    │   │   ├── game2048
    │   │   │   ├── __init__.py
    │   │   │   ├── obs.yaml
    │   │   │   ├── print_play.py
    │   │   │   ├── constants.py
    │   │   │   ├── base_simple.py
    │   │   │   ├── env.py
    │   │   │   ├── base.py
    │   │   │   └── visual_play.py
    │   │   ├── mouse
    │   │   │   ├── __init__.py
    │   │   │   ├── reward.py
    │   │   │   ├── obs.yaml
    │   │   │   ├── mouse_env.py
    │   │   │   ├── mouse_env_pathmind.py
    │   │   │   ├── two_reward.py
    │   │   │   └── multi_mouse_env_pathmind.py
    │   │   ├── custom_callback.py
    │   │   ├── test_nativerl_arrays.py
    │   │   ├── test_training.py
    │   │   ├── cartpole.py
    │   │   └── gym_cartpole.py
    │   ├── pytest.ini
    │   ├── requirements-dev.txt
    │   ├── requirements.txt
    │   ├── pathmind_training
    │   │   ├── __init__.py
    │   │   ├── exports.py
    │   │   ├── loggers.py
    │   │   ├── scheduler.py
    │   │   ├── pynativerl.py
    │   │   ├── models.py
    │   │   └── callbacks.py
    │   ├── config.json
    │   └── .gitignore
    ├── CMakeLists.txt.in
    ├── src
    │   └── main
    │   │   ├── assembly
    │   │       ├── src.xml
    │   │       └── bin.xml
    │   │   ├── java
    │   │       └── ai
    │   │       │   └── skymind
    │   │       │       └── nativerl
    │   │       │           ├── ReleaseEnvironment.java
    │   │       │           ├── CreateEnvironment.java
    │   │       │           ├── NativeRLPresets.java
    │   │       │           ├── Space.java
    │   │       │           ├── Discrete.java
    │   │       │           ├── Continuous.java
    │   │       │           ├── LearningAgentHelper.java
    │   │       │           ├── NativeRL.java
    │   │       │           ├── Array.java
    │   │       │           ├── SSizeTVector.java
    │   │       │           ├── FloatVector.java
    │   │       │           └── Environment.java
    │   │   └── resources
    │   │       └── ai
    │   │           └── skymind
    │   │               └── nativerl
    │   │                   └── PathmindLearningAgent.java.hbs
    ├── examples
    │   ├── setup.sh
    │   ├── traintraffic.sh
    │   └── traincartpole.sh
    ├── pr_test
    │   ├── errorCheck.sh
    │   └── script.sh
    └── CMakeLists.txt
├── .mvn
    └── wrapper
    │   ├── maven-wrapper.jar
    │   └── maven-wrapper.properties
├── PathmindPolicyHelper
    ├── Assets
    │   ├── pathmind-multi-16x16.png
    │   ├── pathmind-multi-32x32.png
    │   ├── pathmind-multi-source.psd
    │   ├── pathmind-single-16x16.png
    │   ├── pathmind-single-32x32.png
    │   ├── pathmind-single-source.psd
    │   ├── pathmind-multi-original.png
    │   └── pathmind-single-original.png
    ├── bundle.sh
    ├── fixup.sh
    └── README.md
├── nativerl-tests
    ├── src
    │   └── test
    │   │   ├── resources
    │   │       └── ai
    │   │       │   └── skymind
    │   │       │       └── nativerl
    │   │       │           └── trafficphases
    │   │       │               └── database
    │   │       │                   └── db.properties
    │   │   └── java
    │   │       └── ai
    │   │           └── skymind
    │   │               └── nativerl
    │   │                   ├── PythonModelTest.java
    │   │                   ├── ModelTest.java
    │   │                   └── AnyLogicModelTest.java
    └── pom.xml
├── nativerl-policy
    ├── src
    │   ├── main
    │   │   └── java
    │   │   │   └── ai
    │   │   │       └── skymind
    │   │   │           └── nativerl
    │   │   │               ├── exception
    │   │   │                   └── PathmindInvalidResponseException.java
    │   │   │               ├── RewardFunction.java
    │   │   │               ├── annotation
    │   │   │                   ├── Discrete.java
    │   │   │                   └── Continuous.java
    │   │   │               ├── util
    │   │   │                   └── ObjectMapperHolder.java
    │   │   │               ├── AnnotationProcessor.java
    │   │   │               ├── ObservationFilter.java
    │   │   │               ├── PolicyHelper.java
    │   │   │               ├── ServerPolicyHelper.java
    │   │   │               └── ActionMaskProcessor.java
    │   └── test
    │   │   └── java
    │   │       └── ai
    │   │           └── skymind
    │   │               └── nativerl
    │   │                   ├── ActionMaskProcessorTest.java
    │   │                   ├── RewardProcessorTest.java
    │   │                   ├── ObservationProcessorTest.java
    │   │                   └── ActionProcessorTest.java
    └── pom.xml
├── nativerl-analyzer
    ├── api
    │   ├── src
    │   │   ├── main
    │   │   │   ├── java
    │   │   │   │   └── io
    │   │   │   │   │   └── skymind
    │   │   │   │   │       └── pathmind
    │   │   │   │   │           └── analyzer
    │   │   │   │   │               ├── exception
    │   │   │   │   │                   ├── ProcessingException.java
    │   │   │   │   │                   ├── ZipExtractionException.java
    │   │   │   │   │                   ├── InvalidZipFileException.java
    │   │   │   │   │                   └── UnexpectedScriptResultException.java
    │   │   │   │   │               ├── PathmindModelAnalyzerApplication.java
    │   │   │   │   │               ├── config
    │   │   │   │   │                   └── swagger
    │   │   │   │   │                   │   ├── SwaggerProperties.java
    │   │   │   │   │                   │   └── SwaggerConfig.java
    │   │   │   │   │               └── api
    │   │   │   │   │                   ├── exception
    │   │   │   │   │                       ├── dto
    │   │   │   │   │                       │   └── ApiErrorsResponse.java
    │   │   │   │   │                       └── handler
    │   │   │   │   │                       │   └── ControllerExceptionHandler.java
    │   │   │   │   │                   └── dto
    │   │   │   │   │                       └── AnalyzeRequestDTO.java
    │   │   │   └── resources
    │   │   │   │   └── application.yaml
    │   │   └── test
    │   │   │   └── java
    │   │   │       └── io
    │   │   │           └── skymind
    │   │   │               └── pathmind
    │   │   │                   └── analyzer
    │   │   │                       └── PathmindModelAnalyzerApplicationTests.java
    │   └── pom.xml
    ├── common
    │   ├── src
    │   │   └── main
    │   │   │   └── java
    │   │   │       └── io
    │   │   │           └── skymind
    │   │   │               └── pathmind
    │   │   │                   └── analyzer
    │   │   │                       └── dto
    │   │   │                           ├── SimulationParameter.java
    │   │   │                           └── HyperparametersDTO.java
    │   └── pom.xml
    ├── scripts
    │   ├── download_lib.sh
    │   └── check_model.sh
    ├── generator
    │   ├── src
    │   │   └── main
    │   │   │   ├── resources
    │   │   │       └── templates
    │   │   │       │   ├── PathmindLearningAgent.java.hbs
    │   │   │       │   └── Training.java.hbs
    │   │   │   └── java
    │   │   │       └── io
    │   │   │           └── skymind
    │   │   │               └── pathmind
    │   │   │                   └── analyzer
    │   │   │                       └── code
    │   │   │                           └── CodeGenerator.java
    │   └── pom.xml
    ├── Dockerfile
    ├── pom.xml
    └── README.md
├── .gitignore
├── .pre-commit-config.yaml
├── Dockerfile
├── .github
    └── workflows
    │   └── python-package.yml
└── Jenkinsfile


/nativerl/python/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/factory/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/game2048/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/mouse/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/factory/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/mouse/reward.py:
--------------------------------------------------------------------------------
1 | def reward_function(rew: dict):
2 |     return rew["found_cheese"] * 2
3 | 


--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PathmindAI/nativerl/HEAD/.mvn/wrapper/maven-wrapper.jar


--------------------------------------------------------------------------------
/nativerl/python/tests/mouse/obs.yaml:
--------------------------------------------------------------------------------
1 | observations:
2 |   - mouse_row
3 |   - mouse_col
4 |   - mouse_row_dist
5 |   - mouse_col_dist
6 | 


--------------------------------------------------------------------------------
/PathmindPolicyHelper/Assets/pathmind-multi-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PathmindAI/nativerl/HEAD/PathmindPolicyHelper/Assets/pathmind-multi-16x16.png


--------------------------------------------------------------------------------
/PathmindPolicyHelper/Assets/pathmind-multi-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PathmindAI/nativerl/HEAD/PathmindPolicyHelper/Assets/pathmind-multi-32x32.png


--------------------------------------------------------------------------------
/PathmindPolicyHelper/Assets/pathmind-multi-source.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PathmindAI/nativerl/HEAD/PathmindPolicyHelper/Assets/pathmind-multi-source.psd


--------------------------------------------------------------------------------
/PathmindPolicyHelper/Assets/pathmind-single-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PathmindAI/nativerl/HEAD/PathmindPolicyHelper/Assets/pathmind-single-16x16.png


--------------------------------------------------------------------------------
/PathmindPolicyHelper/Assets/pathmind-single-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PathmindAI/nativerl/HEAD/PathmindPolicyHelper/Assets/pathmind-single-32x32.png


--------------------------------------------------------------------------------
/PathmindPolicyHelper/Assets/pathmind-single-source.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PathmindAI/nativerl/HEAD/PathmindPolicyHelper/Assets/pathmind-single-source.psd


--------------------------------------------------------------------------------
/PathmindPolicyHelper/Assets/pathmind-multi-original.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PathmindAI/nativerl/HEAD/PathmindPolicyHelper/Assets/pathmind-multi-original.png


--------------------------------------------------------------------------------
/PathmindPolicyHelper/Assets/pathmind-single-original.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PathmindAI/nativerl/HEAD/PathmindPolicyHelper/Assets/pathmind-single-original.png


--------------------------------------------------------------------------------
/nativerl/python/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | markers =
3 |     integration: long-running tests
4 |     pynativerl: tests for pynativerl
5 |     nativerl: tests for nativerl
6 | 


--------------------------------------------------------------------------------
/nativerl/python/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | # Install everything the developer needs in addition to prod requirements
2 | -r requirements.txt
3 | 
4 | flake8
5 | flake8-debugger
6 | pre-commit
7 | pytest
8 | 


--------------------------------------------------------------------------------
/nativerl-tests/src/test/resources/ai/skymind/nativerl/trafficphases/database/db.properties:
--------------------------------------------------------------------------------
1 | #HSQL Database Engine 2.4.1
2 | #Tue Aug 25 10:17:48 JST 2020
3 | tx_timestamp=0
4 | modified=no
5 | version=2.4.1
6 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/game2048/obs.yaml:
--------------------------------------------------------------------------------
 1 | observations:
 2 |   - "0"
 3 |   - "2"
 4 |   - "4"
 5 |   - "8"
 6 |   - "16"
 7 |   - "32"
 8 |   - "64"
 9 |   - "128"
10 |   - "256"
11 |   - "512"
12 |   - "1024"
13 | 


--------------------------------------------------------------------------------
/.mvn/wrapper/maven-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.2/apache-maven-3.6.2-bin.zip
2 | wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.5/maven-wrapper-0.5.5.jar
3 | 


--------------------------------------------------------------------------------
/nativerl/python/requirements.txt:
--------------------------------------------------------------------------------
 1 | aioredis<2
 2 | numpy==1.19.4
 3 | gym==0.17.3
 4 | or_gym==0.1.5
 5 | dm_tree==0.1.5
 6 | ray[rllib]==1.3.0
 7 | fire==0.3.1
 8 | matplotlib==3.3.3
 9 | tensorflow==2.4.3
10 | Gpy==1.9.9
11 | scikit-learn==0.24.0
12 | pathmind==0.4
13 | 


--------------------------------------------------------------------------------
/nativerl/python/pathmind_training/__init__.py:
--------------------------------------------------------------------------------
1 | from .environments import get_environment
2 | from .loggers import get_loggers
3 | from .scheduler import get_scheduler
4 | from .stopper import Stopper
5 | from .utils import modify_anylogic_db_properties, write_completion_report, write_file
6 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/main/java/ai/skymind/nativerl/exception/PathmindInvalidResponseException.java:
--------------------------------------------------------------------------------
1 | package ai.skymind.nativerl.exception;
2 | 
3 | public class PathmindInvalidResponseException extends RuntimeException {
4 |     public PathmindInvalidResponseException(String message) {
5 |         super(message);
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/main/java/io/skymind/pathmind/analyzer/exception/ProcessingException.java:
--------------------------------------------------------------------------------
1 | package io.skymind.pathmind.analyzer.exception;
2 | 
3 | public class ProcessingException extends RuntimeException {
4 |     public ProcessingException(String message, Throwable cause) {
5 |         super(message, cause);
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/main/java/io/skymind/pathmind/analyzer/exception/ZipExtractionException.java:
--------------------------------------------------------------------------------
1 | package io.skymind.pathmind.analyzer.exception;
2 | 
3 | public class ZipExtractionException extends RuntimeException {
4 |     public ZipExtractionException(String message, Throwable cause) {
5 |         super(message, cause);
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/main/java/ai/skymind/nativerl/RewardFunction.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | /**
 4 |  * An interface that users can implement to compute the reward somehow using before and after state values.
 5 |  *
 6 |  * @author saudet
 7 |  */
 8 | public interface RewardFunction<V> {
 9 |     double reward(V before, V after);
10 | }
11 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/test/java/io/skymind/pathmind/analyzer/PathmindModelAnalyzerApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package io.skymind.pathmind.analyzer;
 2 | 
 3 | import org.junit.jupiter.api.Test;
 4 | import org.springframework.boot.test.context.SpringBootTest;
 5 | 
 6 | @SpringBootTest
 7 | class PathmindModelAnalyzerApplicationTests {
 8 | 
 9 |     @Test
10 |     void contextLoads() {
11 |     }
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/PathmindPolicyHelper/bundle.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | mkdir -p target/classes/
3 | unzip -o PathmindHelper.jar -d target/classes/
4 | unzip -o ../nativerl-policy/target/nativerl-policy-*-SNAPSHOT.jar -d target/classes/
5 | cp -a Assets/pathmind-single-original.png Assets/pathmind-single-??x??.png target/classes/
6 | sed -i '/<ClassPathEntry>/,/<\/ClassPathEntry>/d' target/classes/library.xml
7 | cd target/classes/ && zip -r ../PathmindHelper.jar .
8 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/main/java/io/skymind/pathmind/analyzer/exception/InvalidZipFileException.java:
--------------------------------------------------------------------------------
 1 | package io.skymind.pathmind.analyzer.exception;
 2 | 
 3 | public class InvalidZipFileException extends RuntimeException {
 4 |     public InvalidZipFileException(String message) {
 5 |         super(message);
 6 |     }
 7 | 
 8 |     public InvalidZipFileException(String message, Throwable cause) {
 9 |         super(message, cause);
10 |     }
11 | }
12 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/common/src/main/java/io/skymind/pathmind/analyzer/dto/SimulationParameter.java:
--------------------------------------------------------------------------------
 1 | package io.skymind.pathmind.analyzer.dto;
 2 | 
 3 | import lombok.AllArgsConstructor;
 4 | import lombok.Getter;
 5 | import lombok.NoArgsConstructor;
 6 | 
 7 | @AllArgsConstructor
 8 | @NoArgsConstructor
 9 | @Getter
10 | public class SimulationParameter {
11 |     private Integer index;
12 |     private String key;
13 |     private String value;
14 |     private Integer type;
15 | }
16 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/main/java/io/skymind/pathmind/analyzer/exception/UnexpectedScriptResultException.java:
--------------------------------------------------------------------------------
 1 | package io.skymind.pathmind.analyzer.exception;
 2 | 
 3 | public class UnexpectedScriptResultException extends RuntimeException {
 4 |     public UnexpectedScriptResultException(String message) {
 5 |         super(message);
 6 |     }
 7 | 
 8 |     public UnexpectedScriptResultException(String message, Throwable cause) {
 9 |         super(message, cause);
10 |     }
11 | }
12 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/main/resources/application.yaml:
--------------------------------------------------------------------------------
 1 | io:
 2 |   skymind:
 3 |     pathmind:
 4 |       model-analyzer:
 5 |         swagger:
 6 |           title: Pathmind model analyzer
 7 |           description: A service that processes an AnyLogic model to extract data needed to be used in Pathmind Platform
 8 |           base-package: io.skymind.pathmind.analyzer
 9 | spring:
10 |   servlet:
11 |     multipart:
12 |       max-file-size: 500MB
13 |       max-request-size: 500MB
14 | 


--------------------------------------------------------------------------------
/nativerl/python/pathmind_training/exports.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | 
 5 | def export_policy_from_checkpoint(trainer):
 6 |     # Save to experiment root directory
 7 |     checkpoint_model_dir = os.path.join(os.pardir, "checkpoint_model")
 8 |     # If model directory already exist, remove it first
 9 |     if os.path.exists(checkpoint_model_dir):
10 |         shutil.rmtree(checkpoint_model_dir)
11 |     # Generate policy
12 |     trainer.export_policy_model(checkpoint_model_dir)
13 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/main/java/io/skymind/pathmind/analyzer/PathmindModelAnalyzerApplication.java:
--------------------------------------------------------------------------------
 1 | package io.skymind.pathmind.analyzer;
 2 | 
 3 | import org.springframework.boot.SpringApplication;
 4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
 5 | 
 6 | @SpringBootApplication
 7 | public class PathmindModelAnalyzerApplication {
 8 | 
 9 |     public static void main(String[] args) {
10 |         SpringApplication.run(PathmindModelAnalyzerApplication.class, args);
11 |     }
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/nativerl/CMakeLists.txt.in:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.4)
 2 | project(pybind11-download NONE)
 3 | 
 4 | include(ExternalProject)
 5 | ExternalProject_Add(pybind11
 6 |   URL               https://github.com/pybind/pybind11/archive/v2.6.1.tar.gz
 7 |   SOURCE_DIR        "${CMAKE_CURRENT_BINARY_DIR}/pybind11-src"
 8 |   BINARY_DIR        "${CMAKE_CURRENT_BINARY_DIR}/pybind11-build"
 9 |   CONFIGURE_COMMAND ""
10 |   CMAKE_ARGS        ""
11 |   BUILD_COMMAND     ""
12 |   INSTALL_COMMAND   ""
13 |   TEST_COMMAND      ""
14 | )
15 | 


--------------------------------------------------------------------------------
/PathmindPolicyHelper/fixup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | mkdir -p target/classes/
 3 | unzip -o PathmindHelper.jar -d target/classes/
 4 | cp -a Assets/pathmind-single-original.png Assets/pathmind-single-??x??.png target/classes/
 5 | cp -a ../nativerl-policy/target/nativerl-policy-*-SNAPSHOT.jar target/PathmindPolicy.jar
 6 | cd target/classes/
 7 | sed -i '/<ClassPathEntry>/,/<\/ClassPathEntry>/d' library.xml
 8 | sed -i '/^\s*$/d' META-INF/MANIFEST.MF
 9 | echo "Class-Path: PathmindPolicy.jar" >> META-INF/MANIFEST.MF
10 | zip -r ../PathmindHelper.jar .
11 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/scripts/download_lib.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | S3BUCKET=$1
 3 | folder=$2
 4 | default_file=$3
 5 | alternative_file=$4
 6 | 
 7 | aws s3api head-object --bucket ${S3BUCKET} --key ${folder}/${default_file} || not_exist=true
 8 | if [ ${not_exist} ]; then
 9 |   file=${alternative_file}
10 | else
11 |   file=${default_file}
12 | fi
13 | 
14 | aws s3 cp s3://${S3BUCKET}/${folder}/${file} ./
15 | 
16 | if [[ $file == *.tar.gz ]]; then
17 |     tar -xzf ${file}
18 |     rm ${file}
19 | fi
20 | 
21 | if [[ $file == *.zip ]]; then
22 |     unzip ${file}
23 |     rm ${file}
24 | fi
25 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/main/java/io/skymind/pathmind/analyzer/config/swagger/SwaggerProperties.java:
--------------------------------------------------------------------------------
 1 | package io.skymind.pathmind.analyzer.config.swagger;
 2 | 
 3 | import lombok.Data;
 4 | import org.springframework.boot.context.properties.ConfigurationProperties;
 5 | 
 6 | @Data
 7 | @ConfigurationProperties(prefix = "io.skymind.pathmind.model-analyzer.swagger")
 8 | public class SwaggerProperties {
 9 | 
10 |     private String title;
11 |     private String description;
12 |     /**
13 |      * Fully qualified name of the APIs package
14 |      */
15 |     private String basePackage;
16 | }
17 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/main/java/ai/skymind/nativerl/annotation/Discrete.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl.annotation;
 2 | 
 3 | import java.lang.annotation.Documented;
 4 | import java.lang.annotation.ElementType;
 5 | import java.lang.annotation.Retention;
 6 | import java.lang.annotation.RetentionPolicy;
 7 | import java.lang.annotation.Target;
 8 | 
 9 | /**
10 |  * Defines a tuple of {@link #size()} discrete action spaces with {@link #n()} actions each.
11 |  *
12 |  * @author saudet
13 |  */
14 | @Documented
15 | @Retention(RetentionPolicy.RUNTIME)
16 | @Target({ElementType.FIELD})
17 | public @interface Discrete {
18 |     long n();
19 |     long size() default 1;
20 | }
21 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/common/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 | 
 6 |     <parent>
 7 |         <groupId>io.skymind.pathmind</groupId>
 8 |         <artifactId>model-analyzer-parent</artifactId>
 9 |         <version>0.0.1-SNAPSHOT</version>
10 |     </parent>
11 | 
12 |     <artifactId>model-analyzer-common</artifactId>
13 | 
14 |     <build>
15 |         <finalName>ma-common</finalName>
16 |     </build>
17 | 
18 | </project>
19 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/main/java/io/skymind/pathmind/analyzer/api/exception/dto/ApiErrorsResponse.java:
--------------------------------------------------------------------------------
 1 | package io.skymind.pathmind.analyzer.api.exception.dto;
 2 | 
 3 | import io.swagger.annotations.ApiModelProperty;
 4 | import lombok.AllArgsConstructor;
 5 | import lombok.Getter;
 6 | import lombok.Setter;
 7 | import lombok.ToString;
 8 | 
 9 | @Getter
10 | @Setter
11 | @ToString
12 | @AllArgsConstructor
13 | public class ApiErrorsResponse {
14 |     @ApiModelProperty(value = "Code", example = "Response status code")
15 |     private int code;
16 |     @ApiModelProperty(value = "Error", example = "Error message detected during the processing the request")
17 |     private String error;
18 | }
19 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/main/java/ai/skymind/nativerl/annotation/Continuous.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl.annotation;
 2 | 
 3 | import java.lang.annotation.Documented;
 4 | import java.lang.annotation.ElementType;
 5 | import java.lang.annotation.Retention;
 6 | import java.lang.annotation.RetentionPolicy;
 7 | import java.lang.annotation.Target;
 8 | 
 9 | /**
10 |  * Defines a continuous space with given {@link #shape()} and with values between {@link #low()} and {@link #high()}.
11 |  *
12 |  * @author saudet
13 |  */
14 | @Documented
15 | @Retention(RetentionPolicy.RUNTIME)
16 | @Target({ElementType.FIELD})
17 | public @interface Continuous {
18 |     double[] low();
19 |     double[] high();
20 |     long[] shape();
21 | }
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | dependency-reduced-pom.xml
 2 | **/target/**
 3 | **/*.class
 4 | **/*.jar
 5 | **/*.so
 6 | **/*.dylib
 7 | database
 8 | lib
 9 | 
10 | .DS_Store
11 | *.autosave
12 | *.jar
13 | *.class
14 | *.lst
15 | 
16 | HELP.md
17 | target/
18 | !.mvn/wrapper/maven-wrapper.jar
19 | !**/src/main/**
20 | !**/src/test/**
21 | 
22 | ### STS ###
23 | .apt_generated
24 | .classpath
25 | .factorypath
26 | .project
27 | .settings
28 | .springBeans
29 | .sts4-cache
30 | 
31 | ### IntelliJ IDEA ###
32 | .idea
33 | *.iws
34 | *.iml
35 | *.ipr
36 | 
37 | ### NetBeans ###
38 | /nbproject/private/
39 | /nbbuild/
40 | /dist/
41 | /nbdist/
42 | /.nb-gradle/
43 | build/
44 | 
45 | ### VS Code ###
46 | .vscode/
47 | pathmind-lib
48 | 
49 | .venv
50 | .pyenv
51 | 
52 | testoutputs/
53 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/game2048/print_play.py:
--------------------------------------------------------------------------------
 1 | import logic
 2 | from base import Game2048
 3 | 
 4 | 
 5 | def render(matrix):
 6 |     for row in matrix:
 7 |         print(row)
 8 |     print(f"Total reward: {game.total_reward}, steps: {game.steps}")
 9 | 
10 | 
11 | game = Game2048(random_movements=True, human=True)
12 | 
13 | while not game.is_done():
14 |     render(game.matrix)
15 |     print(game.get_observation())
16 |     if not game.random:
17 |         game.action = input("\nMake a move: Up: 0, Down: 1, Left: 2, Right: 3\n")
18 |     game.step()
19 | 
20 | if logic.game_state(game.matrix) == "win":
21 |     print("You win!")
22 | else:
23 |     print("You lose :(")
24 | print(f"Total reward: {game.total_reward}, steps: {game.steps}")
25 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/factory/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import yaml
 4 | 
 5 | full_dir_name = os.path.dirname(os.path.realpath(__file__))
 6 | config_file_path = os.path.join(full_dir_name, "./config.yml")
 7 | 
 8 | with open(config_file_path, "r") as f:
 9 |     config = yaml.safe_load(f.read()).get("config")
10 | 
11 | SIMULATION_CONFIG = config
12 | MASK_KEY = "action_mask"
13 | OBS_KEY = "observations"
14 | 
15 | 
16 | def get_observation_names():
17 |     return [
18 |         k for k, v in SIMULATION_CONFIG.items() if k.startswith("obs_") and v is True
19 |     ]
20 | 
21 | 
22 | def get_reward_names_and_weights():
23 |     return {
24 |         k: v.get("weight")
25 |         for k, v in SIMULATION_CONFIG.items()
26 |         if k.startswith("rew_") and v.get("value") is True
27 |     }
28 | 


--------------------------------------------------------------------------------
/nativerl/python/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "environment": "tests.cartpole.PathmindEnvironment",
 3 |   "is_gym": false,
 4 |   "algorithm": "PPO",
 5 |   "output_dir": ".",
 6 |   "multi_agent": true,
 7 |   "max_memory_in_mb": 4096,
 8 |   "num_cpus": 1,
 9 |   "num_gpus": 0,
10 |   "num_workers": 1,
11 |   "num_hidden_layers": 2,
12 |   "num_hidden_nodes": 256,
13 |   "max_iterations": 500,
14 |   "max_time_in_sec": 43200,
15 |   "max_episodes": 50000,
16 |   "num_samples": 4,
17 |   "resume": false,
18 |   "checkpoint_frequency": 50,
19 |   "debug_metrics": false,
20 |   "user_log": false,
21 |   "autoregressive": false,
22 |   "episode_reward_range_th": 0.01,
23 |   "entropy_slope_th": 0.01,
24 |   "vf_loss_range_th": 0.1,
25 |   "value_pred_th": 0.01,
26 |   "action_masking": false,
27 |   "freezing": false,
28 |   "discrete": true
29 | }
30 | 


--------------------------------------------------------------------------------
/nativerl/python/pathmind_training/loggers.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from ray.tune.logger import DEFAULT_LOGGERS, CSVLogger
 4 | from ray.tune.result import EXPR_PROGRESS_FILE
 5 | 
 6 | 
 7 | class PathmindCSVLogger(CSVLogger):
 8 |     def _init(self):
 9 |         """CSV outputted with Headers as first set of results."""
10 |         progress_file = os.path.join(self.logdir, EXPR_PROGRESS_FILE)
11 |         self._continuing = (
12 |             os.path.exists(progress_file) and os.path.getsize(progress_file) > 0
13 |         )
14 |         self._file = open(progress_file, "a")
15 |         self._csv_out = None
16 | 
17 | 
18 | def get_loggers():
19 |     # (ray.tune.logger.JsonLogger,
20 |     #  ray.tune.logger.CSVLogger,
21 |     #  ray.tune.logger.TBXLogger)
22 |     loggers = list(DEFAULT_LOGGERS)
23 |     loggers[1] = PathmindCSVLogger
24 |     return loggers
25 | 


--------------------------------------------------------------------------------
/nativerl/src/main/assembly/src.xml:
--------------------------------------------------------------------------------
 1 | <assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 |     xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
 3 |   <id>src</id>
 4 |   <formats>
 5 |     <format>zip</format>
 6 |   </formats>
 7 |   <fileSets>
 8 |     <fileSet>
 9 |       <directory>${project.basedir}</directory>
10 |       <outputDirectory>/</outputDirectory>
11 |       <useDefaultExcludes>true</useDefaultExcludes>
12 |       <excludes>
13 |         <!-- TODO: use expresssions instead: ${project.build.sourceDirectory}, etc -->
14 |         <exclude>**/target/**</exclude>
15 |         <exclude>**/cppbuild/**</exclude>
16 |       </excludes>
17 |     </fileSet>
18 |   </fileSets>
19 | </assembly>
20 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/ReleaseEnvironment.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import org.bytedeco.javacpp.*;
 4 | import org.bytedeco.javacpp.annotation.*;
 5 | 
 6 | /**
 7 |  * The factory method to release instances of arbitrary subclasses of Environment.
 8 |  * This gets exported to jniNativeRL.h as the C function releaseJavaEnvironment().
 9 |  * This must be used to release objects created with createJavaEnvironment().
10 |  */
11 | @Properties(inherit = ai.skymind.nativerl.NativeRLPresets.class)
12 | public class ReleaseEnvironment extends FunctionPointer {
13 |     public @Name("releaseJavaEnvironment") void call(Environment environment) throws Exception {
14 |         Environment e = CreateEnvironment.instances.remove(new Pointer(environment));
15 |         if (e != null) {
16 |             e.close();
17 |             e.deallocate();
18 |         }
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/nativerl/src/main/resources/ai/skymind/nativerl/PathmindLearningAgent.java.hbs:
--------------------------------------------------------------------------------
 1 | package com.pathmind.anylogic;
 2 | 
 3 | import com.anylogic.engine.LearningAgentInterface;
 4 | import com.anylogic.rl.data.Action;
 5 | import com.anylogic.rl.data.Configuration;
 6 | import com.anylogic.rl.data.Observation;
 7 | 
 8 | public class PathmindLearningAgent<Obs extends Observation, Act extends Action, Config extends Configuration> implements LearningAgentInterface<Obs,Act,Config> {
 9 | 
10 |     @SuppressWarnings("unchecked")
11 |     public Act takeAction(Obs obs, Act act) {
12 |     // get obs extends Observaion via  reflection
13 | 
14 |     // translate it to double[]
15 | 
16 |     // call this.computeActions(double[])
17 | 
18 |     // convert float[] to act extends Action
19 | 
20 |     // return act
21 | 
22 |     throw new RuntimeException("takeAction is not allowed to run for now");
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/generator/src/main/resources/templates/PathmindLearningAgent.java.hbs:
--------------------------------------------------------------------------------
 1 | package com.pathmind.anylogic;
 2 | 
 3 | import com.anylogic.engine.LearningAgentInterface;
 4 | import com.anylogic.rl.data.Action;
 5 | import com.anylogic.rl.data.Configuration;
 6 | import com.anylogic.rl.data.Observation;
 7 | 
 8 | public class PathmindLearningAgent<Obs extends Observation, Act extends Action, Config extends Configuration> implements LearningAgentInterface<Obs,Act,Config> {
 9 | 
10 |     @SuppressWarnings("unchecked")
11 |     public Act takeAction(Obs obs, Act act) {
12 |     // get obs extends Observaion via  reflection
13 | 
14 |     // translate it to double[]
15 | 
16 |     // call this.computeActions(double[])
17 | 
18 |     // convert float[] to act extends Action
19 | 
20 |     // return act
21 | 
22 |     throw new RuntimeException("takeAction is not allowed to run for now");
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/nativerl/examples/setup.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | export CLASS_SNIPPET='' ; # Unused, ignore
 3 | export RESET_SNIPPET='' ; # Unused, ignore
 4 | export REWARD_TERMS_SNIPPET ='rewardTermsRaw[0] = after[0] - before[0];' ; # Write reward function here.
 5 | export METRICS_SNIPPET='' ; # Unused, ignore
 6 | export DISCRETE_ACTIONS='123' ; # Set num actions
 7 | export CONTINUOUS_OBSERVATIONS='123' ; # Set num observations
 8 | export MAX_ITERATIONS='250' ; # One of many stop criteria options
 9 | export TEST_ITERATIONS='0' ; # Unused, ignore
10 | export STEP_TIME='1' ; # Unused, ignore
11 | export STOP_TIME='420' ; # Unused, ignore
12 | export TIME_UNIT='MINUTE' ; # Unused, ignore
13 | export MAX_TIME_IN_SEC='43200' ; # One of many stop criteria options
14 | export NUM_SAMPLES='4' ; # Num PBT samples
15 | export MULTIAGENT='false' ; # Experimental, do not use
16 | export RESUME=${RESUME:='false'} ; # Keep false
17 | export CHECKPOINT_FREQUENCY='50' ; # Up to you
18 | export USER_LOG='true' ; # Log verbosity. Up to you.
19 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/CreateEnvironment.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import java.util.Collections;
 4 | import java.util.HashMap;
 5 | import java.util.Map;
 6 | import org.bytedeco.javacpp.*;
 7 | import org.bytedeco.javacpp.annotation.*;
 8 | 
 9 | /**
10 |  * The factory method to create instances of arbitrary subclasses of Environment.
11 |  * This gets exported to jniNativeRL.h as the C function createJavaEnvironment().
12 |  * To release them we must call releaseJavaEnvironment().
13 |  */
14 | @Properties(inherit = ai.skymind.nativerl.NativeRLPresets.class)
15 | public class CreateEnvironment extends FunctionPointer {
16 | 
17 |     static Map<Environment, Environment> instances = Collections.synchronizedMap(new HashMap<Environment, Environment>());
18 | 
19 |     public @Name("createJavaEnvironment") Environment call(String name) throws Exception {
20 |         Environment e = Class.forName(name).asSubclass(Environment.class).newInstance();
21 |         instances.put(e, e);
22 |         return e;
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/custom_callback.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | from ray.rllib.agents.callbacks import DefaultCallbacks
 4 | from ray.rllib.env import BaseEnv
 5 | from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker
 6 | from ray.rllib.policy import Policy
 7 | 
 8 | 
 9 | def get_callback():
10 |     class Callbacks(DefaultCallbacks):
11 |         def on_episode_start(
12 |             self,
13 |             worker: RolloutWorker,
14 |             base_env: BaseEnv,
15 |             policies: Dict[str, Policy],
16 |             episode: MultiAgentEpisode,
17 |             **kwargs
18 |         ):
19 |             pass
20 | 
21 |         def on_episode_end(
22 |             self,
23 |             worker: RolloutWorker,
24 |             base_env: BaseEnv,
25 |             policies: Dict[str, Policy],
26 |             episode: MultiAgentEpisode,
27 |             **kwargs
28 |         ):
29 |             pass
30 | 
31 |         def on_train_result(self, trainer, result: dict, **kwargs):
32 |             pass
33 | 
34 |     return Callbacks
35 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/main/java/ai/skymind/nativerl/util/ObjectMapperHolder.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl.util;
 2 | 
 3 | import com.fasterxml.jackson.annotation.JsonAutoDetect;
 4 | import com.fasterxml.jackson.annotation.PropertyAccessor;
 5 | import com.fasterxml.jackson.databind.ObjectMapper;
 6 | 
 7 | /**
 8 |  * A simple object mapper holder for
 9 |  * using one single {@link ObjectMapper}
10 |  * across the whole project.
11 |  */
12 | public class ObjectMapperHolder {
13 |     private static ObjectMapper objectMapper = getMapper();
14 | 
15 |     private ObjectMapperHolder() {
16 |     }
17 | 
18 |     /**
19 |      * Get a single object mapper for use
20 |      * with reading and writing json
21 |      *
22 |      * @return
23 |      */
24 |     public static ObjectMapper getJsonMapper() {
25 |         return objectMapper;
26 |     }
27 | 
28 |     private static ObjectMapper getMapper() {
29 |         ObjectMapper om = new ObjectMapper();
30 |         om.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
31 | 
32 |         return om;
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v3.4.0
 6 |     hooks:
 7 |       - id: trailing-whitespace
 8 |       - id: end-of-file-fixer
 9 |       - id: check-yaml
10 |       - id: check-added-large-files
11 |   - repo: https://github.com/myint/autoflake
12 |     rev: "v1.4"
13 |     hooks:
14 |       - id: autoflake
15 |         args:
16 |           [
17 |             "--in-place",
18 |             "--remove-all-unused-imports",
19 |             "--ignore-init-module-imports",
20 |           ]
21 |   - repo: https://github.com/pycqa/isort
22 |     rev: 5.8.0
23 |     hooks:
24 |       - id: isort
25 |         name: isort (python)
26 |         args: ["--profile", "black", "--filter-files"]
27 |   - repo: https://github.com/psf/black
28 |     rev: 21.4b2
29 |     hooks:
30 |       - id: black
31 |   - repo: https://github.com/pre-commit/mirrors-prettier
32 |     rev: "v2.2.1"
33 |     hooks:
34 |       - id: prettier
35 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/test_nativerl_arrays.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | if os.environ.get("USE_PY_NATIVERL"):
 6 |     import pathmind_training.pynativerl as nativerl
 7 | else:
 8 |     import nativerl
 9 | 
10 | 
11 | def test_nativerl_arrays():
12 |     np_arr = np.array([2.0, 3.0, 3.0, 4.0], dtype=np.float32)
13 |     arr = nativerl.Array(np_arr)
14 | 
15 |     assert len(arr) == 4
16 | 
17 |     term_contributions_dict: dict = {}
18 | 
19 |     flat_arr = np.array([2.0, 4.0, 6.0, 8.0], dtype=np.float32)
20 | 
21 |     for i in range(0, 2):
22 |         term_contributions_dict[str(i)] = flat_arr
23 | 
24 |     max_array = np.zeros(len(arr), dtype=np.float32)
25 |     for values in term_contributions_dict.values():
26 |         max_array = np.array(
27 |             [max(max_array[i], abs(values[i])) for i in range(len(arr))]
28 |         )
29 |     term_contributions = nativerl.Array(max_array)
30 |     if hasattr(nativerl, "FloatVector"):
31 |         # only in c++ version
32 |         assert term_contributions.values() == nativerl.FloatVector([2.0, 4.0, 6.0, 8.0])
33 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/main/java/io/skymind/pathmind/analyzer/api/dto/AnalyzeRequestDTO.java:
--------------------------------------------------------------------------------
 1 | package io.skymind.pathmind.analyzer.api.dto;
 2 | 
 3 | import lombok.AllArgsConstructor;
 4 | import lombok.Data;
 5 | import lombok.NoArgsConstructor;
 6 | 
 7 | import com.fasterxml.jackson.annotation.JsonSetter;
 8 | import com.fasterxml.jackson.annotation.Nulls;
 9 | 
10 | @NoArgsConstructor
11 | @AllArgsConstructor
12 | @Data
13 | public class AnalyzeRequestDTO {
14 |     public enum ModelType {
15 |         ANY_LOGIC,
16 |         PYTHON
17 |     }
18 | 
19 |     private String id;
20 |     @JsonSetter(nulls = Nulls.AS_EMPTY)
21 |     private ModelType type = ModelType.ANY_LOGIC;
22 |     @JsonSetter(nulls = Nulls.AS_EMPTY)
23 |     private String mainAgent = "";
24 |     @JsonSetter(nulls = Nulls.AS_EMPTY)
25 |     private String experimentClass = "";
26 |     @JsonSetter(nulls = Nulls.AS_EMPTY)
27 |     private String experimentType = "";
28 |     @JsonSetter(nulls = Nulls.AS_EMPTY)
29 |     private String pathmindHelperClass = "";
30 |     @JsonSetter(nulls = Nulls.AS_EMPTY)
31 |     private String environment = "";
32 | }
33 | 


--------------------------------------------------------------------------------
/nativerl/pr_test/errorCheck.sh:
--------------------------------------------------------------------------------
 1 | grep -m 1 "python3: can't open file 'rllibtrain.py'" process_output.log >> errors.log ;
 2 | grep -m 1 "SyntaxError: invalid syntax" process_output.log >> errors.log ;
 3 | grep -m 1 "Fatal Python error: Segmentation fault" process_output.log >> errors.log ;
 4 | grep -m 1 "Worker crashed during call to train()" process_output.log >> errors.log ;
 5 | grep -m 1 "java.lang.ArrayIndexOutOfBoundsException" process_output.log >> errors.log ;
 6 | grep -m 1 "RuntimeError: java.lang.NoSuchMethodError" process_output.log >> errors.log ;
 7 | grep -m 1 "unzip: cannot find or open model.jar, model.jar.zip or model.jar.ZIP" process_output.log >> errors.log ;
 8 | grep -m 1 "ray.memory_monitor.RayOutOfMemoryError" process_output.log >> errors.log ;
 9 | grep -m 1 "FileNotFoundError: [Errno 2] No such file or directory: 'database/db.properties'" process_output.log >> errors.log ;
10 | grep -m 1 "killed training" process_output.log >> errors.log ;
11 | grep -m 1 "Job running for more than 24 hours, job is killed" process_output.log >> errors.log ;
12 | grep -m 1 "Job crashed more than 3 times, job is killed" process_output.log >> errors.log
13 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/NativeRLPresets.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import org.bytedeco.javacpp.*;
 4 | import org.bytedeco.javacpp.annotation.*;
 5 | import org.bytedeco.javacpp.tools.*;
 6 | 
 7 | /**
 8 |  * This file basically contains the configuration to map the API from nativerl.h to Java using JavaCPP.
 9 |  */
10 | @Properties(
11 |     value = @Platform(
12 |         compiler = "cpp11",
13 |         define = "SHARED_PTR_NAMESPACE std",
14 |         include = "nativerl.h"
15 |     ),
16 |     target = "ai.skymind.nativerl",
17 |     global = "ai.skymind.nativerl.NativeRL"
18 | )
19 | public class NativeRLPresets implements InfoMapper {
20 |     public void map(InfoMap infoMap) {
21 |         infoMap.put(new Info("NATIVERL_EXPORT").annotations().cppTypes())
22 |                .put(new Info("SSIZE_T").cast().valueTypes("long").pointerTypes("SizeTPointer"))
23 |                .put(new Info("std::vector<float>").pointerTypes("FloatVector").define())
24 |                .put(new Info("std::vector<ssize_t>").pointerTypes("SSizeTVector").define())
25 |                .put(new Info("nativerl::Environment").virtualize());
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM centos:7
 2 | 
 3 | # Update centos and install basic dependencies
 4 | RUN yum update -y
 5 | RUN yum install centos-release-scl -y
 6 | RUN yum install \
 7 |     gcc-c++ \
 8 |     java-1.8.0-openjdk-devel \
 9 |     git \
10 |     wget \
11 |     devtoolset-7 \
12 |     bzip2 \
13 |     maven \
14 |     -y
15 | RUN scl enable devtoolset-7 bash
16 | 
17 | # Install cmake3
18 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.21.3/cmake-3.21.3-linux-x86_64.sh \
19 |   && bash cmake-3.21.3-linux-x86_64.sh --prefix=/usr/local --exclude-subdir --skip-license
20 | 
21 | # Install anaconda3
22 | RUN wget https://repo.anaconda.com/archive/Anaconda3-2019.03-Linux-x86_64.sh \
23 |   && bash Anaconda3-2019.03-Linux-x86_64.sh -b && \
24 |     echo "export PATH="/root/anaconda3/bin:$PATH"" >> ~/.bashrc && \
25 |     /bin/bash -c "source ~/.bashrc"
26 | ENV PATH /root/anaconda3/bin:$PATH
27 | 
28 | # Install Python libraries
29 | RUN conda install pybind11 tensorflow \
30 |   && pip install -U pip \
31 |   && pip install ray[rllib]==1.3.0
32 | 
33 | # Create working directory
34 | RUN mkdir -p app
35 | WORKDIR app
36 | 
37 | # Set entry point
38 | CMD ["mvn", "clean", "install", "-Djavacpp.platform=linux-x86_64"]
39 | 


--------------------------------------------------------------------------------
/nativerl/python/.gitignore:
--------------------------------------------------------------------------------
 1 | distribute*
 2 | .noseids
 3 | *.pyc
 4 | *.swp
 5 | *egg-info*
 6 | build/
 7 | dist/
 8 | .cache
 9 | venv
10 | 
11 | # Byte-compiled / optimized / DLL files
12 | __pycache__/
13 | *.py[cod]
14 | 
15 | # C extensions
16 | *.so
17 | 
18 | # Distribution / packaging
19 | .Python
20 | venv
21 | env/
22 | build/
23 | develop-eggs/
24 | dist/
25 | downloads/
26 | eggs/
27 | .eggs/
28 | lib/
29 | lib64/
30 | parts/
31 | sdist/
32 | var/
33 | *.egg-info/
34 | .installed.cfg
35 | *.egg
36 | 
37 | # PyInstaller
38 | #  Usually these files are written by a python script from a template
39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
40 | *.manifest
41 | *.spec
42 | 
43 | # Installer logs
44 | pip-log.txt
45 | pip-delete-this-directory.txt
46 | 
47 | # Unit test / coverage reports
48 | htmlcov/
49 | .tox/
50 | .coverage
51 | .coverage.*
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | *,cover
56 | 
57 | # IDE
58 | .idea/
59 | 
60 | # MacOS
61 | .DS_Store
62 | 
63 | # Generated documentation folders
64 | sources/
65 | site/
66 | 
67 | .vscode
68 | 
69 | # frozen models
70 | model/
71 | 
72 | 
73 | # Algorithms
74 | PPO/
75 | DQN/
76 | MARWIL/
77 | 
78 | 
79 | # JARs etc.
80 | *.jar
81 | *.so
82 | *.dylib
83 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/Space.java:
--------------------------------------------------------------------------------
 1 | // Targeted by JavaCPP version 1.5.4: DO NOT EDIT THIS FILE
 2 | 
 3 | package ai.skymind.nativerl;
 4 | 
 5 | import java.nio.*;
 6 | import org.bytedeco.javacpp.*;
 7 | import org.bytedeco.javacpp.annotation.*;
 8 | 
 9 | import static ai.skymind.nativerl.NativeRL.*;
10 | 
11 | 
12 | /** Base class for the Continuous and Discrete classes. */
13 | @Namespace("nativerl") @Properties(inherit = ai.skymind.nativerl.NativeRLPresets.class)
14 | public class Space extends Pointer {
15 |     static { Loader.load(); }
16 |     /** Default native constructor. */
17 |     public Space() { super((Pointer)null); allocate(); }
18 |     /** Native array allocator. Access with {@link Pointer#position(long)}. */
19 |     public Space(long size) { super((Pointer)null); allocateArray(size); }
20 |     /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
21 |     public Space(Pointer p) { super(p); }
22 |     private native void allocate();
23 |     private native void allocateArray(long size);
24 |     @Override public Space position(long position) {
25 |         return (Space)super.position(position);
26 |     }
27 |     @Override public Space getPointer(long i) {
28 |         return new Space(this).position(position + i);
29 |     }
30 | 
31 |     public native Continuous asContinuous();
32 |     public native Discrete asDiscrete();
33 | }
34 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/test/java/ai/skymind/nativerl/ActionMaskProcessorTest.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import org.junit.Test;
 4 | 
 5 | import static org.junit.Assert.assertArrayEquals;
 6 | import static org.junit.Assert.assertEquals;
 7 | import static org.junit.Assert.fail;
 8 | 
 9 | /**
10 |  *
11 |  * @author saudet
12 |  */
13 | public class ActionMaskProcessorTest {
14 |     boolean data1 = true;
15 |     boolean data2 = false;
16 |     boolean data3[] = {false, true};
17 | 
18 |     class TestActionMasks {
19 |         boolean mask1 = data1;
20 |         boolean mask2 = data2;
21 |         boolean[] mask3 = data3;
22 |     }
23 | 
24 |     void actionMasks(int agentId) {
25 |         class DummyActionMasks extends TestActionMasks {
26 |             boolean mask4 = agentId != 0;
27 |         }
28 |     }
29 | 
30 |     @Test public void testActionMasks() {
31 |         try {
32 |             ActionMaskProcessor ap = new ActionMaskProcessor(this.getClass());
33 |             assertEquals("DummyActionMasks", ap.getActionMaskClass().getSimpleName());
34 |             assertArrayEquals(new String[] {"mask1", "mask2", "mask3[0]", "mask3[1]", "mask4"}, ap.getActionMaskNames(this));
35 |             assertArrayEquals(new boolean[] {true, false, false, true, true}, ap.getActionMasks(this, 64));
36 |         } catch (ReflectiveOperationException ex) {
37 |             fail(ex.getMessage());
38 |         }
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/nativerl-tests/src/test/java/ai/skymind/nativerl/PythonModelTest.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import java.io.File;
 4 | import java.util.Arrays;
 5 | import org.junit.Test;
 6 | 
 7 | import static org.hamcrest.core.AnyOf.anyOf;
 8 | import static org.hamcrest.core.Is.is;
 9 | import static org.hamcrest.MatcherAssert.assertThat;
10 | import static org.junit.Assert.assertEquals;
11 | import static org.junit.Assert.assertTrue;
12 | import static org.junit.Assert.fail;
13 | 
14 | /**
15 |  *
16 |  * @author saudet
17 |  */
18 | public class PythonModelTest extends ModelTest {
19 | 
20 |     @Test public void testCartpole() throws Exception {
21 |         File binDir = new File("target/dependency/nativerl-bin/");
22 |         File modelDir = folder.newFolder("Cartpole");
23 | 
24 |         copy(binDir, modelDir);
25 |         copy(new File(binDir, "examples/traincartpole.sh"), modelDir);
26 |         execute(modelDir, "bash", "traincartpole.sh");
27 | 
28 |         File[] savedModels = find(modelDir, "saved_model.pb");
29 |         assertTrue(savedModels.length > 0);
30 |         for (File f : savedModels) {
31 |             PolicyHelper h = PolicyHelper.load(f.getParentFile());
32 |             double[] o = h.computeActions(new double[] {0, 1, 2, 3});
33 |             System.out.println(Arrays.toString(o));
34 |             assertEquals(o.length, 1);
35 |             assertThat(o[0], anyOf(is(0.0), is(1.0)));
36 |         }
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/generator/src/main/resources/templates/Training.java.hbs:
--------------------------------------------------------------------------------
 1 | {{#if packageName}}package {{packageName}};{{/if}}
 2 | import com.anylogic.engine.AgentConstants;
 3 | import com.anylogic.engine.AnyLogicInternalCodegenAPI;
 4 | import com.anylogic.engine.Engine;
 5 | import com.anylogic.engine.ExperimentCustom;
 6 | import com.anylogic.engine.Utilities;
 7 | {{#if simulationClassName}}import {{simulationClassName}};{{/if}}
 8 | 
 9 | public class Training extends ExperimentCustom {
10 |     @AnyLogicInternalCodegenAPI
11 |     public static String[] COMMAND_LINE_ARGUMENTS_xjal = new String[0];
12 | 
13 |     public Training(Object parentExperiment) {
14 |         super(parentExperiment);
15 |         this.setCommandLineArguments_xjal(COMMAND_LINE_ARGUMENTS_xjal);
16 |     }
17 | 
18 |     public void run() {
19 |     }
20 | 
21 |     @AnyLogicInternalCodegenAPI
22 |     public void setupEngine_xjal(Engine engine) {
23 |         {{simulationClassName}} exp = new {{simulationClassName}}();
24 |         exp.setupEngine(engine);
25 |         exp.initDefaultRandomNumberGenerator(engine);
26 |     }
27 | 
28 |     @AnyLogicInternalCodegenAPI
29 |     public static void main(String[] args) {
30 |         COMMAND_LINE_ARGUMENTS_xjal = args;
31 |         Utilities.prepareBeforeExperimentStart_xjal(Training.class);
32 |         Training ex = new Training((Object)null);
33 |         ex.setCommandLineArguments_xjal(args);
34 |         ex.run();
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/game2048/constants.py:
--------------------------------------------------------------------------------
 1 | SIZE = 400
 2 | GRID_LEN = 4
 3 | GRID_PADDING = 10
 4 | 
 5 | BACKGROUND_COLOR_GAME = "#92877d"
 6 | BACKGROUND_COLOR_CELL_EMPTY = "#9e948a"
 7 | 
 8 | BACKGROUND_COLOR_DICT = {
 9 |     2: "#eee4da",
10 |     4: "#ede0c8",
11 |     8: "#f2b179",
12 |     16: "#f59563",
13 |     32: "#f67c5f",
14 |     64: "#f65e3b",
15 |     128: "#edcf72",
16 |     256: "#edcc61",
17 |     512: "#edc850",
18 |     1024: "#edc53f",
19 |     2048: "#edc22e",
20 |     4096: "#eee4da",
21 |     8192: "#edc22e",
22 |     16384: "#f2b179",
23 |     32768: "#f59563",
24 |     65536: "#f67c5f",
25 | }
26 | 
27 | CELL_COLOR_DICT = {
28 |     2: "#776e65",
29 |     4: "#776e65",
30 |     8: "#f9f6f2",
31 |     16: "#f9f6f2",
32 |     32: "#f9f6f2",
33 |     64: "#f9f6f2",
34 |     128: "#f9f6f2",
35 |     256: "#f9f6f2",
36 |     512: "#f9f6f2",
37 |     1024: "#f9f6f2",
38 |     2048: "#f9f6f2",
39 |     4096: "#776e65",
40 |     8192: "#f9f6f2",
41 |     16384: "#776e65",
42 |     32768: "#776e65",
43 |     65536: "#f9f6f2",
44 | }
45 | 
46 | FONT = ("Verdana", 40, "bold")
47 | 
48 | KEY_UP_ALT = "'\\uf700'"
49 | KEY_DOWN_ALT = "'\\uf701'"
50 | KEY_LEFT_ALT = "'\\uf702'"
51 | KEY_RIGHT_ALT = "'\\uf703'"
52 | 
53 | KEY_UP = "'w'"
54 | KEY_DOWN = "'s'"
55 | KEY_LEFT = "'a'"
56 | KEY_RIGHT = "'d'"
57 | KEY_BACK = "'b'"
58 | 
59 | KEY_J = "'j'"
60 | KEY_K = "'k'"
61 | KEY_L = "'l'"
62 | KEY_H = "'h'"
63 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/Discrete.java:
--------------------------------------------------------------------------------
 1 | // Targeted by JavaCPP version 1.5.4: DO NOT EDIT THIS FILE
 2 | 
 3 | package ai.skymind.nativerl;
 4 | 
 5 | import java.nio.*;
 6 | import org.bytedeco.javacpp.*;
 7 | import org.bytedeco.javacpp.annotation.*;
 8 | 
 9 | import static ai.skymind.nativerl.NativeRL.*;
10 | 
11 | 
12 | /**
13 |  * Describes a discrete space for action spaces.
14 |  * Includes the number of actions supported by the Environment.
15 |  */
16 | @Namespace("nativerl") @NoOffset @Properties(inherit = ai.skymind.nativerl.NativeRLPresets.class)
17 | public class Discrete extends Space {
18 |     static { Loader.load(); }
19 |     /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
20 |     public Discrete(Pointer p) { super(p); }
21 | 
22 |     public native @Cast("ssize_t") long n(); public native Discrete n(long setter);
23 |     public native @Cast("ssize_t") long size(); public native Discrete size(long setter);
24 | 
25 |     public Discrete(@Const @ByRef Discrete d) { super((Pointer)null); allocate(d); }
26 |     private native void allocate(@Const @ByRef Discrete d);
27 |     public Discrete(@Cast("ssize_t") long n, @Cast("ssize_t") long size/*=1*/) { super((Pointer)null); allocate(n, size); }
28 |     private native void allocate(@Cast("ssize_t") long n, @Cast("ssize_t") long size/*=1*/);
29 |     public Discrete(@Cast("ssize_t") long n) { super((Pointer)null); allocate(n); }
30 |     private native void allocate(@Cast("ssize_t") long n);
31 | }
32 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax = docker/dockerfile:1.0-experimental
 2 | 
 3 | FROM azul/zulu-openjdk:11.0.3
 4 | 
 5 | #Define ENV
 6 | ARG S3BUCKET
 7 | ARG AWS_ACCESS_KEY_ID
 8 | ARG AWS_SECRET_ACCESS_KEY
 9 | ARG NATIVERL_FOLDER
10 | 
11 | # Install all required tools and dependencies
12 | RUN apt-get update && apt-get install -y \
13 |     unzip \
14 |     curl \
15 |     maven \
16 |     python-pip \
17 |   && pip install awscli \
18 |   && rm -rf /var/lib/apt/lists/*
19 | 
20 | ARG DOWNLOAD_LIB_SCRIPT=scripts/download_lib.sh
21 | COPY ${DOWNLOAD_LIB_SCRIPT} /bin/
22 | 
23 | WORKDIR /lib/pathmind/conda
24 | 
25 | RUN bash /bin/download_lib.sh ${S3BUCKET} "conda/1_3_0" rllibpack.tar.gz rllibpack.tar.gz
26 | 
27 | WORKDIR /lib/pathmind
28 | 
29 | RUN bash /bin/download_lib.sh ${S3BUCKET} "pathmindhelper/1_7_0" PathmindPolicy.jar PathmindPolicy-1.7.0-SNAPSHOT.jar \
30 |   && bash /bin/download_lib.sh ${S3BUCKET} ${NATIVERL_FOLDER} nativerl-1.8.1-SNAPSHOT-bin.zip nativerl-bin.zip \
31 |   && bash /bin/download_lib.sh ${S3BUCKET} "anylogic/8_7_7" baseEnv.zip baseEnv-8.7.7.zip
32 | 
33 | WORKDIR /
34 | 
35 | #Build pathmind-model-analyzer.jar
36 | COPY . .
37 | 
38 | RUN mvn clean install \
39 |   && cp common/target/ma-common.jar ./ \
40 |   && cp api/target/pathmind-ma-api-spring-boot.jar ./ \
41 |   && cp generator/target/pathmind-ma-code-generator-jar-with-dependencies.jar ./
42 | 
43 | ARG CHECK_MODEL_SCRIPT=scripts/check_model.sh
44 | COPY ${CHECK_MODEL_SCRIPT} bin
45 | 
46 | EXPOSE 8080
47 | ENTRYPOINT ["java","-jar","/pathmind-ma-api-spring-boot.jar"]
48 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/Continuous.java:
--------------------------------------------------------------------------------
 1 | // Targeted by JavaCPP version 1.5.4: DO NOT EDIT THIS FILE
 2 | 
 3 | package ai.skymind.nativerl;
 4 | 
 5 | import java.nio.*;
 6 | import org.bytedeco.javacpp.*;
 7 | import org.bytedeco.javacpp.annotation.*;
 8 | 
 9 | import static ai.skymind.nativerl.NativeRL.*;
10 | 
11 | 
12 | /**
13 |  * Describes a continuous space for both state and action spaces. Includes low and high
14 |  * values for all elements, as well as the shape of the Array required by the Environment.
15 |  */
16 | @Namespace("nativerl") @NoOffset @Properties(inherit = ai.skymind.nativerl.NativeRLPresets.class)
17 | public class Continuous extends Space {
18 |     static { Loader.load(); }
19 |     /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
20 |     public Continuous(Pointer p) { super(p); }
21 | 
22 |     public native @ByRef FloatVector low(); public native Continuous low(FloatVector setter);
23 |     public native @ByRef FloatVector high(); public native Continuous high(FloatVector setter);
24 |     public native @ByRef SSizeTVector shape(); public native Continuous shape(SSizeTVector setter);
25 | 
26 |     public Continuous(@Const @ByRef FloatVector low,
27 |                    @Const @ByRef FloatVector high,
28 |                    @Const @ByRef SSizeTVector shape) { super((Pointer)null); allocate(low, high, shape); }
29 |     private native void allocate(@Const @ByRef FloatVector low,
30 |                    @Const @ByRef FloatVector high,
31 |                    @Const @ByRef SSizeTVector shape);
32 | }
33 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/LearningAgentHelper.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import com.github.jknack.handlebars.Handlebars;
 4 | import com.github.jknack.handlebars.Template;
 5 | import com.github.jknack.handlebars.helper.ConditionalHelpers;
 6 | import com.github.jknack.handlebars.io.ClassPathTemplateLoader;
 7 | import com.github.jknack.handlebars.io.TemplateLoader;
 8 | 
 9 | import java.io.File;
10 | import java.io.IOException;
11 | import java.nio.file.Files;
12 | 
13 | public class LearningAgentHelper {
14 |     public void generateLearningAgent(File file) throws IOException {
15 |         File directory = file.getParentFile();
16 |         if (directory != null) {
17 |             directory.mkdirs();
18 |         }
19 |         Files.write(file.toPath(), generateLearningAgent(file.getName()).getBytes());
20 | 
21 |     }
22 |     public String generateLearningAgent(String fileName) throws IOException {
23 |         TemplateLoader loader = new ClassPathTemplateLoader("/ai/skymind/nativerl", ".hbs");
24 |         Handlebars handlebars = new Handlebars(loader);
25 | 
26 |         handlebars.registerHelpers(ConditionalHelpers.class);
27 |         Template template = handlebars.compile(fileName);
28 | 
29 |         String learningAgent = template.apply(this);
30 |         return learningAgent;
31 |     }
32 |     public static void main(String[] args) throws IOException {
33 |         LearningAgentHelper learningAgentHelper = new LearningAgentHelper();
34 |         learningAgentHelper.generateLearningAgent(new File("com/pathmind/anylogic/PathmindLearningAgent.java"));
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 | 
 6 |     <groupId>io.skymind.pathmind</groupId>
 7 |     <artifactId>model-analyzer-parent</artifactId>
 8 |     <version>0.0.1-SNAPSHOT</version>
 9 |     <name>pathmind-model-analyzer-pom</name>
10 |     <packaging>pom</packaging>
11 | 
12 |     <properties>
13 |         <java.version>11</java.version>
14 |         <maven.compiler.source>${java.version}</maven.compiler.source>
15 |         <maven.compiler.target>${java.version}</maven.compiler.target>
16 |     </properties>
17 | 
18 |     <modules>
19 |         <module>common</module>
20 |         <module>api</module>
21 |         <module>generator</module>
22 |     </modules>
23 | 
24 |     <dependencyManagement>
25 |         <dependencies>
26 |             <dependency>
27 |                 <groupId>io.skymind.pathmind</groupId>
28 |                 <artifactId>model-analyzer-common</artifactId>
29 |                 <version>${project.version}</version>
30 |             </dependency>
31 |         </dependencies>
32 |     </dependencyManagement>
33 | 
34 |     <dependencies>
35 |         <dependency>
36 |             <groupId>org.projectlombok</groupId>
37 |             <artifactId>lombok</artifactId>
38 |             <version>1.18.16</version>
39 |             <optional>true</optional>
40 |             <scope>provided</scope>
41 |         </dependency>
42 |     </dependencies>
43 | 
44 | </project>
45 | 


--------------------------------------------------------------------------------
/nativerl/src/main/assembly/bin.xml:
--------------------------------------------------------------------------------
 1 | <assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 |     xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
 3 |   <id>bin</id>
 4 |   <formats>
 5 |     <format>zip</format>
 6 |   </formats>
 7 |   <baseDirectory>${project.artifactId}-bin</baseDirectory>
 8 |   <fileSets>
 9 |     <fileSet>
10 |       <directory>${project.basedir}</directory>
11 |       <outputDirectory>/</outputDirectory>
12 |       <includes>
13 |         <include>examples/*</include>
14 |         <include>python/*</include>
15 |         <include>python/pathmind_training/*</include>
16 |         <include>python/pathmind/*</include>
17 |         <include>CHANGELOG*</include>
18 |         <include>README*</include>
19 |         <include>LICENSE*</include>
20 |         <include>NOTICE*</include>
21 |       </includes>
22 |     </fileSet>
23 |     <fileSet>
24 |       <directory>${project.build.directory}</directory>
25 |       <outputDirectory>/</outputDirectory>
26 |       <includes>
27 |         <include>*.jar</include>
28 |         <include>*.dll</include>
29 |         <include>*.dylib</include>
30 |         <include>*.pyd</include>
31 |         <include>*.so</include>
32 |         <include>git.properties</include>
33 |       </includes>
34 |       <excludes>
35 |         <exclude>*-javadoc.jar</exclude>
36 |         <exclude>*-sources.jar</exclude>
37 |       </excludes>
38 |     </fileSet>
39 |     <fileSet>
40 |       <directory>${project.build.directory}/site</directory>
41 |       <outputDirectory>docs</outputDirectory>
42 |     </fileSet>
43 |   </fileSets>
44 | </assembly>
45 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/main/java/io/skymind/pathmind/analyzer/config/swagger/SwaggerConfig.java:
--------------------------------------------------------------------------------
 1 | package io.skymind.pathmind.analyzer.config.swagger;
 2 | 
 3 | import lombok.AllArgsConstructor;
 4 | import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
 5 | import org.springframework.boot.context.properties.EnableConfigurationProperties;
 6 | import org.springframework.context.annotation.Bean;
 7 | import org.springframework.context.annotation.Configuration;
 8 | import springfox.documentation.builders.ApiInfoBuilder;
 9 | import springfox.documentation.builders.RequestHandlerSelectors;
10 | import springfox.documentation.service.ApiInfo;
11 | import springfox.documentation.spi.DocumentationType;
12 | import springfox.documentation.spring.web.plugins.Docket;
13 | import springfox.documentation.swagger2.annotations.EnableSwagger2;
14 | 
15 | 
16 | @Configuration
17 | @EnableSwagger2
18 | @AllArgsConstructor
19 | @EnableConfigurationProperties(SwaggerProperties.class)
20 | @ConditionalOnProperty(name = "io.skymind.pathmind.model-analyzer.swagger.title")
21 | public class SwaggerConfig {
22 | 
23 |     private final SwaggerProperties swaggerProperties;
24 | 
25 |     @Bean
26 |     public Docket docket() {
27 |         return new Docket(DocumentationType.SWAGGER_2)
28 |                 .useDefaultResponseMessages(false)
29 |                 .select()
30 |                 .apis(RequestHandlerSelectors.basePackage(swaggerProperties.getBasePackage()))
31 |                 .build()
32 |                 .apiInfo(apiInfo());
33 |     }
34 | 
35 |     private ApiInfo apiInfo() {
36 |         return new ApiInfoBuilder()
37 |                 .title(swaggerProperties.getTitle())
38 |                 .description(swaggerProperties.getDescription())
39 |                 .build();
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/mouse/mouse_env.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | from gym import Env, spaces
 4 | 
 5 | 
 6 | class MouseAndCheese(Env):
 7 | 
 8 |     mouse = (0, 0)
 9 |     cheese = (4, 4)
10 |     number_of_actions = 4
11 |     number_of_observations = 4
12 |     steps = 0
13 | 
14 |     def __init__(self):
15 |         self.action_space = spaces.Discrete(self.number_of_actions)
16 |         self.observation_space = spaces.Box(0, 1, (self.number_of_observations,))
17 | 
18 |     def reset(self):
19 |         self.mouse = (0, 0)
20 |         self.cheese = (4, 4)
21 |         self.steps = 0
22 | 
23 |         return self.get_observation()
24 | 
25 |     def step(self, action):
26 |         self.steps += 1
27 | 
28 |         if action == 0:  # move up
29 |             self.mouse = (min(self.mouse[0] + 1, 5), self.mouse[1])
30 |         elif action == 1:  # move right
31 |             self.mouse = (self.mouse[0], min(self.mouse[1] + 1, 5))
32 |         elif action == 2:  # move down
33 |             self.mouse = (max(self.mouse[0] - 1, 0), self.mouse[1])
34 |         elif action == 3:  # move left
35 |             self.mouse = (self.mouse[0], max(self.mouse[1] - 1, 0))
36 |         else:
37 |             raise ValueError("Invalid action")
38 | 
39 |         print(self.mouse)
40 | 
41 |         return self.get_observation(), self.get_reward(), self.is_done(), {}
42 | 
43 |     def get_observation(self) -> typing.List[float]:
44 |         return [
45 |             float(self.mouse[0]) / 5.0,
46 |             float(self.mouse[1]) / 5.0,
47 |             abs(self.cheese[0] - self.mouse[0]) / 5.0,
48 |             abs(self.cheese[1] - self.mouse[1]) / 5.0,
49 |         ]
50 | 
51 |     def get_reward(self) -> float:
52 |         return 1 if self.mouse == self.cheese else 0
53 | 
54 |     def is_done(self) -> bool:
55 |         return self.mouse == self.cheese
56 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/main/java/ai/skymind/nativerl/AnnotationProcessor.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import ai.skymind.nativerl.annotation.Continuous;
 4 | import ai.skymind.nativerl.annotation.Discrete;
 5 | import java.lang.annotation.Annotation;
 6 | 
 7 | /**
 8 |  * Reads values off {@link Discrete} and {@link Continuous} annotations.
 9 |  * Values that do not apply are either false, -1 or null, depending on their types.
10 |  *
11 |  * @author saudet
12 |  */
13 | public class AnnotationProcessor {
14 |     public final Annotation annotation;
15 |     public final boolean discrete, continuous;
16 |     public final long n, size;
17 |     public final double[] low, high;
18 |     public final long[] shape;
19 | 
20 |     public AnnotationProcessor(Annotation annotation) throws ReflectiveOperationException {
21 |         Class<? extends Annotation> type = annotation.annotationType();
22 |         String name = type.getSimpleName();
23 |         this.annotation = annotation;
24 |         this.discrete = name.equals("Discrete");
25 |         this.continuous = name.equals("Continuous");
26 | 
27 |         if (discrete) {
28 |             this.n = (Long)type.getMethod("n").invoke(annotation);
29 |             this.size = (Long)type.getMethod("size").invoke(annotation);
30 |             this.low = this.high = null;
31 |             this.shape = null;
32 |         } else if (continuous) {
33 |             this.n = this.size = -1;
34 |             this.low = (double[])type.getMethod("low").invoke(annotation);
35 |             this.high = (double[])type.getMethod("high").invoke(annotation);
36 |             this.shape = (long[])type.getMethod("shape").invoke(annotation);
37 |         } else {
38 |             this.n = this.size = -1;
39 |             this.low = this.high = null;
40 |             this.shape = null;
41 |         }
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/NativeRL.java:
--------------------------------------------------------------------------------
 1 | // Targeted by JavaCPP version 1.5.4: DO NOT EDIT THIS FILE
 2 | 
 3 | package ai.skymind.nativerl;
 4 | 
 5 | import java.nio.*;
 6 | import org.bytedeco.javacpp.*;
 7 | import org.bytedeco.javacpp.annotation.*;
 8 | 
 9 | public class NativeRL extends ai.skymind.nativerl.NativeRLPresets {
10 |     static { Loader.load(); }
11 | 
12 | // Targeting FloatVector.java
13 | 
14 | 
15 | // Targeting SSizeTVector.java
16 | 
17 | 
18 | // Parsed from nativerl.h
19 | 
20 | // #ifndef NATIVERL_H
21 | // #define NATIVERL_H
22 | 
23 | // #ifdef _WIN32
24 | // #define NATIVERL_EXPORT __declspec(dllexport)
25 | // #include <BaseTsd.h>
26 | // #else
27 | // #define NATIVERL_EXPORT __attribute__((visibility("default")))
28 | // #endif
29 | 
30 | // #include <vector>
31 | 
32 | /**
33 |  * This is the main C++ interface implemented, for example, in Java via JavaCPP,
34 |  * and used in Python by, for example, RLlib via pybind11.
35 |  */
36 | // Targeting Array.java
37 | 
38 | 
39 | // Targeting Space.java
40 | 
41 | 
42 | // Targeting Continuous.java
43 | 
44 | 
45 | // Targeting Discrete.java
46 | 
47 | 
48 | 
49 | /** Helper method to cast dynamically a Space object into Continuous. */
50 | 
51 | /** Helper method to cast dynamically a Space object into Discrete. */
52 | 
53 | // Targeting Environment.java
54 | 
55 | 
56 | 
57 | // #ifdef _WIN32
58 | // Windows does not support undefined symbols in DLLs, disallowing circular dependencies,
59 | // so we cannot call createEnvironment() defined in nativerl.cpp from Java...
60 | @Namespace("nativerl") public static native @SharedPtr Environment createEnvironment(@Cast("const char*") BytePointer name);
61 | @Namespace("nativerl") public static native @SharedPtr Environment createEnvironment(String name);
62 | // #else
63 | // #endif
64 | 
65 | 
66 | 
67 | // #endif // NATIVERL_H
68 | 
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/mouse/mouse_env_pathmind.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | from pathmind.simulation import Continuous, Discrete, Simulation
 4 | 
 5 | 
 6 | class MouseAndCheese(Simulation):
 7 | 
 8 |     mouse = (0, 0)
 9 |     cheese = (4, 4)
10 |     steps = 0
11 | 
12 |     def number_of_agents(self) -> int:
13 |         return 1
14 | 
15 |     def action_space(self, agent_id) -> typing.Union[Continuous, Discrete]:
16 |         return Discrete(4)
17 | 
18 |     def reset(self) -> None:
19 |         self.mouse = (0, 0)
20 |         self.cheese = (4, 4)
21 |         self.steps = 0
22 | 
23 |     def step(self) -> None:
24 |         self.steps += 1
25 | 
26 |         action = self.action[0]
27 | 
28 |         if action == 0:  # move up
29 |             self.mouse = (min(self.mouse[0] + 1, 5), self.mouse[1])
30 |         elif action == 1:  # move right
31 |             self.mouse = (self.mouse[0], min(self.mouse[1] + 1, 5))
32 |         elif action == 2:  # move down
33 |             self.mouse = (max(self.mouse[0] - 1, 0), self.mouse[1])
34 |         elif action == 3:  # move left
35 |             self.mouse = (self.mouse[0], max(self.mouse[1] - 1, 0))
36 |         else:
37 |             raise ValueError("Invalid action")
38 | 
39 |     def get_observation(self, agent_id) -> typing.Dict[str, float]:
40 |         return {
41 |             "mouse_row": float(self.mouse[0]) / 5.0,
42 |             "mouse_col": float(self.mouse[1]) / 5.0,
43 |             "distance_to_cheese_row": abs(self.cheese[0] - self.mouse[0]) / 5.0,
44 |             "distance_to_cheese_col": abs(self.cheese[1] - self.mouse[1]) / 5.0,
45 |             "cheese_row": float(self.cheese[0]) / 5.0,
46 |             "cheese_col": float(self.cheese[1]) / 5.0,
47 |         }
48 | 
49 |     def get_reward(self, agent_id) -> typing.Dict[str, float]:
50 |         return {"found_cheese": 1 if self.mouse == self.cheese else 0}
51 | 
52 |     def is_done(self, agent_id) -> bool:
53 |         return self.mouse == self.cheese
54 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/scripts/check_model.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | workDir=$1
 3 | mainAgent=$2
 4 | experimentClass=$3
 5 | EXPERIMENT_TYPE=$4
 6 | PATHMIND_HELPER_CLASS=$5
 7 | libDir="/lib"
 8 | 
 9 | if [[ -z "$mainAgent" ]]; then
10 |     echo "main agent missing"
11 |     mainAgent="Main"
12 | fi
13 | if [[ -z "$experimentClass" ]]; then
14 |     experimentClass="Simulation"
15 | fi
16 | if [[ -z "$EXPERIMENT_TYPE" ]]; then
17 |     EXPERIMENT_TYPE="Simulation"
18 | fi
19 | if [[ -z "$PATHMIND_HELPER_CLASS" ]]; then
20 |     PATHMIND_HELPER_CLASS="pathmindHelper"
21 | fi
22 | 
23 | cd ${workDir}
24 | 
25 | export CLASSPATH=$(find ${libDir}/pathmind -iname '*.jar' -print0 | sort -z | xargs --null -i printf "{}:")
26 | export CLASSPATH=$(find $PWD/lib -iname '*.jar' -print0 | sort -z | xargs --null -i printf "{}:"):${CLASSPATH}
27 | export CLASSPATH=$PWD:$PWD/model.jar:${CLASSPATH}
28 | export CLASSPATH=/ma-common.jar:${CLASSPATH}
29 | 
30 | export MODEL_PACKAGE=$(for m in $(ls model.jar lib/model*.jar 2> /dev/null) ; do unzip -l $m | grep /${mainAgent}.class; done | awk '{print $4}' | xargs dirname)
31 | export MODEL_PACKAGE_NAME=$(echo ${MODEL_PACKAGE} | sed 's/\//\./g')
32 | export AGENT_CLASS="$MODEL_PACKAGE_NAME.${mainAgent}"
33 | export SIMULATION_PACKAGE=$(for m in $(ls model.jar lib/model*.jar 2> /dev/null) ; do unzip -l $m | grep /${experimentClass}.class | grep -v pathmind/policyhelper; done | awk '{print $4}' | xargs dirname)
34 | export SIMULATION_PACKAGE_NAME=$(echo $SIMULATION_PACKAGE | sed 's/\//\./g')
35 | export SIMULATION_CLASS="$SIMULATION_PACKAGE_NAME.${experimentClass}"
36 | 
37 | java -jar /pathmind-ma-code-generator-jar-with-dependencies.jar \
38 |     --agent-class-name "$AGENT_CLASS" \
39 |     --simulation-class-name "$SIMULATION_CLASS" \
40 |     --package-name "$MODEL_PACKAGE_NAME" \
41 |     --pathmind-helper-class-name "$PATHMIND_HELPER_CLASS" \
42 |     --experiment-type "$EXPERIMENT_TYPE"
43 | 
44 | javac $(find -iname '*.java')
45 | 
46 | java ${MODEL_PACKAGE_NAME}.ModelAnalyzer
47 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/mouse/two_reward.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | from pathmind.simulation import Continuous, Discrete, Simulation
 4 | 
 5 | 
 6 | class TwoRewardMouseAndCheese(Simulation):
 7 | 
 8 |     mouse = (0, 0)
 9 |     cheese = (4, 4)
10 |     steps = 0
11 |     reward_weights = [1.0, 0.5]
12 |     auto_norm_reward = True
13 | 
14 |     def number_of_agents(self) -> int:
15 |         return 1
16 | 
17 |     def action_space(self, agent_id) -> typing.Union[Continuous, Discrete]:
18 |         return Discrete(4)
19 | 
20 |     def reset(self) -> None:
21 |         self.mouse = (0, 0)
22 |         self.cheese = (4, 4)
23 |         self.steps = 0
24 | 
25 |     def step(self) -> None:
26 |         self.steps += 1
27 | 
28 |         action = self.action[0]
29 | 
30 |         if action == 0:  # move up
31 |             self.mouse = (min(self.mouse[0] + 1, 5), self.mouse[1])
32 |         elif action == 1:  # move right
33 |             self.mouse = (self.mouse[0], min(self.mouse[1] + 1, 5))
34 |         elif action == 2:  # move down
35 |             self.mouse = (max(self.mouse[0] - 1, 0), self.mouse[1])
36 |         elif action == 3:  # move left
37 |             self.mouse = (self.mouse[0], max(self.mouse[1] - 1, 0))
38 |         else:
39 |             raise ValueError("Invalid action")
40 | 
41 |     def get_observation(self, agent_id) -> typing.Dict[str, float]:
42 |         return {
43 |             "mouse_row": float(self.mouse[0]) / 5.0,
44 |             "mouse_col": float(self.mouse[1]) / 5.0,
45 |             "distance_to_cheese_row": abs(self.cheese[0] - self.mouse[0]) / 5.0,
46 |             "distance_to_cheese_col": abs(self.cheese[1] - self.mouse[1]) / 5.0,
47 |             "cheese_row": float(self.cheese[0]) / 5.0,
48 |             "cheese_col": float(self.cheese[1]) / 5.0,
49 |         }
50 | 
51 |     def get_reward(self, agent_id) -> typing.Dict[str, float]:
52 |         return {"found_cheese": 1 if self.mouse == self.cheese else 0, "took_step": -1}
53 | 
54 |     def is_done(self, agent_id) -> bool:
55 |         return self.mouse == self.cheese
56 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [prod, staging, test, dev]
 9 |   pull_request:
10 |     branches: [prod, staging, test, dev]
11 |     types: [opened, ready_for_review, synchronize, reopened]
12 | 
13 | jobs:
14 |   build:
15 |     runs-on: ubuntu-latest
16 |     if: github.event.pull_request.draft == false
17 |     strategy:
18 |       matrix:
19 |         python-version: [3.7]
20 | 
21 |     steps:
22 |       - uses: actions/checkout@v2
23 |       - name: Set up Python ${{ matrix.python-version }}
24 |         uses: actions/setup-python@v2
25 |         with:
26 |           python-version: ${{ matrix.python-version }}
27 |       - name: Install test dependencies
28 |         run: |
29 |           python -m pip install --upgrade pip
30 |           pip install flake8 flake8-debugger
31 |       - name: Lint with flake8
32 |         run: |
33 |           # stop the build if there are Python syntax errors or undefined names
34 |           flake8 . --count --select=E9,F63,F7,F82,T100 --show-source --statistics
35 |           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
36 |           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
37 |       - name: Run pre-commit checks
38 |         run: |
39 |           pip install pre-commit
40 |           pre-commit run --all-files
41 |       - name: Install dependencies
42 |         run: |
43 |           pip install -r nativerl/python/requirements-dev.txt
44 |       - name: Test with pytest - Unit tests
45 |         run: |
46 |           cd nativerl/python
47 |           USE_PY_NATIVERL=True pytest -m "not integration"
48 |       - name: Test with pytest - Integration tests
49 |         run: |
50 |           cd nativerl/python
51 |           USE_PY_NATIVERL=True pytest -m "integration"
52 | 


--------------------------------------------------------------------------------
/nativerl/python/pathmind_training/scheduler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from ray.tune.schedulers import PopulationBasedTraining
 3 | 
 4 | 
 5 | def get_scheduler(scheduler_name, train_batch_size=None):
 6 |     if scheduler_name == "PBT":
 7 |         return PopulationBasedTraining(
 8 |             time_attr="training_iteration",
 9 |             metric="episode_reward_mean",
10 |             mode="max",
11 |             perturbation_interval=20,
12 |             quantile_fraction=0.25,
13 |             resample_probability=0.25,
14 |             log_config=True,
15 |             hyperparam_mutations={
16 |                 "lambda": np.linspace(0.9, 1.0, 5).tolist(),
17 |                 "clip_param": np.linspace(0.01, 0.5, 5).tolist(),
18 |                 "entropy_coeff": np.linspace(0, 0.03, 5).tolist(),
19 |                 "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
20 |                 "num_sgd_iter": [5, 10, 15, 20, 30],
21 |                 "sgd_minibatch_size": [128, 256, 512, 1024, 2048],
22 |                 "train_batch_size": [train_batch_size]
23 |                 if train_batch_size
24 |                 else [4000, 6000, 8000, 10000, 12000],
25 |             },
26 |         )
27 |     elif scheduler_name == "PB2":
28 |         from ray.tune.schedulers.pb2 import PB2
29 | 
30 |         return PB2(
31 |             time_attr="training_iteration",
32 |             metric="episode_reward_mean",
33 |             mode="max",
34 |             perturbation_interval=20,
35 |             quantile_fraction=0.25,
36 |             log_config=True,
37 |             hyperparam_bounds={
38 |                 "lambda": [0.9, 1.0],
39 |                 "clip_param": [0.01, 0.5],
40 |                 "entropy_coeff": [0, 5],
41 |                 "lr": [1e-3, 1e-5],
42 |                 "num_sgd_iter": [5, 30],
43 |                 "sgd_minibatch_size": [128, 2048],
44 |                 "train_batch_size": [train_batch_size]
45 |                 if train_batch_size
46 |                 else [4000, 12000],
47 |             },
48 |         )
49 |     else:
50 |         raise ValueError(f"{scheduler_name} not supported")
51 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/test/java/ai/skymind/nativerl/RewardProcessorTest.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import org.junit.Test;
 4 | 
 5 | import static org.junit.Assert.assertArrayEquals;
 6 | import static org.junit.Assert.assertEquals;
 7 | import static org.junit.Assert.fail;
 8 | 
 9 | /**
10 |  *
11 |  * @author saudet
12 |  */
13 | public class RewardProcessorTest {
14 |     static class TestFunction implements RewardFunction<TestVariables> {
15 |         public double reward(TestVariables before, TestVariables after) {
16 |             return before.var1 + before.var2 + after.var3 + after.var4[4];
17 |         }
18 |     }
19 | 
20 |     int data1 = 37;
21 |     long data2 = 42;
22 |     float data3 = 64;
23 |     double[] data4 = {1, 2, 3, 4, 5};
24 |     boolean[] data5 = {true, false};
25 | 
26 |     class TestVariables {
27 |         int var1 = data1;
28 |         long var2 = data2;
29 |         float var3 = data3;
30 |         double[] var4 = data4;
31 |         boolean[] var5 = data5;
32 |     }
33 | 
34 |     void rewardVariables(int agentId) {
35 |         class DummyVariables extends TestVariables {
36 |             float var6 = agentId;
37 |         }
38 |     }
39 | 
40 |     @Test public void testObservations() {
41 |         try {
42 |             RewardProcessor rp = new RewardProcessor(this.getClass());
43 |             assertEquals("DummyVariables", rp.getRewardClass().getSimpleName());
44 |             assertArrayEquals(new String[] {"var1", "var2", "var3", "var4[0]", "var4[1]", "var4[2]", "var4[3]", "var4[4]", "var5[0]", "var5[1]", "var6"}, rp.getVariableNames(this));
45 |             assertArrayEquals(new String[] {"int", "long", "float", "double", "double", "double", "double", "double", "boolean", "boolean", "float"}, rp.getVariableTypes(this));
46 |             assertArrayEquals(new double[] {37, 42, 64, 1, 2, 3, 4, 5, 1, 0, 24}, rp.getVariables(this, 24), 0.0);
47 |             TestVariables v = rp.getRewardObject(this, 24);
48 |             assertEquals(37 + 42 + 64 + 5, new TestFunction().reward(v, v), 0.0);
49 |         } catch (ReflectiveOperationException  ex) {
50 |             fail(ex.getMessage());
51 |         }
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/factory/util/masking.py:
--------------------------------------------------------------------------------
 1 | from gym.spaces import Box
 2 | from ray.rllib.agents.dqn.distributional_q_tf_model import DistributionalQTFModel
 3 | from ray.rllib.models.tf.fcnet_v2 import FullyConnectedNetwork
 4 | from ray.rllib.utils.framework import try_import_tf
 5 | 
 6 | from ..config import MASK_KEY, OBS_KEY, SIMULATION_CONFIG
 7 | from ..features import get_observations
 8 | from ..util.samples import factory_from_config
 9 | 
10 | tf = try_import_tf()
11 | 
12 | MASKING_MODEL_NAME = "action_masking_tf_model"
13 | low = SIMULATION_CONFIG.get("low")
14 | high = SIMULATION_CONFIG.get("high")
15 | 
16 | 
17 | def get_num_obs():
18 |     factory = factory_from_config(SIMULATION_CONFIG)
19 |     dummy_obs = get_observations(0, factory)
20 |     del factory
21 |     return len(dummy_obs)
22 | 
23 | 
24 | num_obs = get_num_obs()
25 | num_actions = SIMULATION_CONFIG.get("actions")
26 | fcnet_hiddens = SIMULATION_CONFIG.get("fcnet_hiddens")
27 | 
28 | 
29 | class ActionMaskingTFModel(DistributionalQTFModel):
30 |     """Custom TF Model that masks out illegal moves. Works for any
31 |     RLlib algorithm (tested only on PPO and DQN so far, though).
32 |     """
33 | 
34 |     def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw):
35 |         super().__init__(obs_space, action_space, num_outputs, model_config, name, **kw)
36 | 
37 |         model_config["fcnet_hiddens"] = fcnet_hiddens
38 | 
39 |         self.base_model = FullyConnectedNetwork(
40 |             Box(low, high, shape=(num_obs,)),
41 |             action_space,
42 |             num_actions,
43 |             model_config,
44 |             name,
45 |         )
46 | 
47 |         self.register_variables(self.base_model.variables())
48 | 
49 |     def forward(self, input_dict, state, seq_lens):
50 |         logits, _ = self.base_model({"obs": input_dict["obs"][OBS_KEY]})
51 |         action_mask = input_dict["obs"][MASK_KEY]
52 |         inf_mask = tf.maximum(tf.log(action_mask), tf.float32.min)
53 |         return logits + inf_mask, state
54 | 
55 |     def value_function(self):
56 |         return self.base_model.value_function()
57 | 
58 |     def import_from_h5(self, h5_file):
59 |         pass
60 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/generator/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 | 
 6 |     <parent>
 7 |         <groupId>io.skymind.pathmind</groupId>
 8 |         <artifactId>model-analyzer-parent</artifactId>
 9 |         <version>0.0.1-SNAPSHOT</version>
10 |     </parent>
11 | 
12 |     <artifactId>model-analyzer-code-generator</artifactId>
13 | 
14 |     <dependencies>
15 |         <dependency>
16 |             <groupId>io.skymind.pathmind</groupId>
17 |             <artifactId>model-analyzer-common</artifactId>
18 |         </dependency>
19 |         <dependency>
20 |             <groupId>com.github.jknack</groupId>
21 |             <artifactId>handlebars</artifactId>
22 |             <version>4.2.0</version>
23 |         </dependency>
24 |     </dependencies>
25 | 
26 |     <build>
27 |         <finalName>pathmind-ma-code-generator</finalName>
28 |         <plugins>
29 |             <plugin>
30 |                 <groupId>org.apache.maven.plugins</groupId>
31 |                 <artifactId>maven-assembly-plugin</artifactId>
32 |                 <executions>
33 |                     <execution>
34 |                         <phase>package</phase>
35 |                         <goals>
36 |                             <goal>single</goal>
37 |                         </goals>
38 |                         <configuration>
39 |                             <archive>
40 |                                 <manifest>
41 |                                     <mainClass>io.skymind.pathmind.analyzer.code.CodeGenerator</mainClass>
42 |                                 </manifest>
43 |                             </archive>
44 |                             <descriptorRefs>
45 |                                 <descriptorRef>jar-with-dependencies</descriptorRef>
46 |                             </descriptorRefs>
47 |                         </configuration>
48 |                     </execution>
49 |                 </executions>
50 |             </plugin>
51 |         </plugins>
52 |     </build>
53 | 
54 | 
55 | </project>
56 | 


--------------------------------------------------------------------------------
/nativerl-tests/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 | 
 6 |     <parent>
 7 |       <groupId>ai.skymind</groupId>
 8 |       <artifactId>nativerl-parent</artifactId>
 9 |       <version>1.8.1-SNAPSHOT</version>
10 |     </parent>
11 | 
12 |     <groupId>ai.skymind</groupId>
13 |     <artifactId>nativerl-tests</artifactId>
14 | 
15 |     <name>NativeRL Tests</name>
16 | 
17 |     <dependencies>
18 |         <dependency>
19 |             <groupId>ai.skymind</groupId>
20 |             <artifactId>nativerl</artifactId>
21 |             <version>${project.version}</version>
22 |             <classifier>bin</classifier>
23 |             <type>zip</type>
24 |         </dependency>
25 |         <dependency>
26 |             <groupId>ai.skymind</groupId>
27 |             <artifactId>nativerl-policy</artifactId>
28 |             <version>${project.version}</version>
29 |         </dependency>
30 |         <dependency>
31 |             <groupId>junit</groupId>
32 |             <artifactId>junit</artifactId>
33 |             <version>4.13.1</version>
34 |             <optional>true</optional>
35 |         </dependency>
36 |     </dependencies>
37 | 
38 |     <build>
39 |         <plugins>
40 |             <plugin>
41 |                 <artifactId>maven-dependency-plugin</artifactId>
42 |                 <executions>
43 |                     <execution>
44 |                         <id>unpack</id>
45 |                         <goals>
46 |                             <goal>unpack-dependencies</goal>
47 |                         </goals>
48 |                         <configuration>
49 |                             <includeGroupIds>ai.skymind</includeGroupIds>
50 |                             <includeArtifactIds>nativerl</includeArtifactIds>
51 |                             <includeClassifiers>bin</includeClassifiers>
52 |                         </configuration>
53 |                     </execution>
54 |                 </executions>
55 |             </plugin>
56 |         </plugins>
57 |     </build>
58 | 
59 | </project>
60 | 


--------------------------------------------------------------------------------
/nativerl-tests/src/test/java/ai/skymind/nativerl/ModelTest.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import java.io.File;
 4 | import java.io.FilenameFilter;
 5 | import java.nio.file.Files;
 6 | import java.nio.file.StandardCopyOption;
 7 | import java.util.ArrayList;
 8 | import java.util.Arrays;
 9 | import java.util.Map;
10 | import org.junit.Rule;
11 | import org.junit.rules.TemporaryFolder;
12 | 
13 | import static org.junit.Assert.assertEquals;
14 | 
15 | /**
16 |  *
17 |  * @author saudet
18 |  */
19 | public class ModelTest {
20 |     @Rule public TemporaryFolder folder = new TemporaryFolder();
21 | 
22 |     public static void copy(File src, File dst) throws Exception {
23 |         if (src.isDirectory()) {
24 |             for (File f : src.listFiles()) {
25 |                 Files.copy(f.toPath(), dst.toPath().resolve(f.getName()), StandardCopyOption.REPLACE_EXISTING);
26 |             }
27 |         } else {
28 |             Files.copy(src.toPath(), dst.toPath().resolve(src.getName()), StandardCopyOption.REPLACE_EXISTING);
29 |         }
30 |     }
31 | 
32 |     public static void execute(File directory, String... command) throws Exception {
33 |         execute(directory, null, command);
34 |     }
35 |     public static void execute(File directory, Map<String, String> environment, String... command) throws Exception {
36 |         ProcessBuilder pb = new ProcessBuilder(command).directory(directory).inheritIO();
37 |         if (environment != null) {
38 |             pb.environment().putAll(environment);
39 |         }
40 |         assertEquals(0, pb.start().waitFor());
41 |     }
42 | 
43 |     public static File[] find(File root, String filename) {
44 |         ArrayList<File> files = new ArrayList<File>();
45 |         FilenameFilter filter = (File dir, String name) -> {
46 |             File f = new File(dir, name);
47 |             if (name.equals(filename)) {
48 |                 files.add(f);
49 |             }
50 |             return f.isDirectory();
51 |         };
52 | 
53 |         ArrayList<File> dirs = new ArrayList<File>(Arrays.asList(root.listFiles(filter)));
54 |         while (!dirs.isEmpty()) {
55 |             File d = dirs.remove(dirs.size() - 1);
56 |             dirs.addAll(Arrays.asList(d.listFiles(filter)));
57 |         }
58 | 
59 |         return files.toArray(new File[files.size()]);
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/game2048/base_simple.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import time
 3 | import typing
 4 | 
 5 | from . import constants as c
 6 | from . import logic
 7 | 
 8 | 
 9 | class Game2048:
10 | 
11 |     action: float = None  # Dynamically generated for each state by Pathmind
12 | 
13 |     number_of_actions: int = 4  # Needs to be provided
14 |     number_of_observations: int = 176  # Needs to be provided
15 |     number_of_metrics: int = 2
16 | 
17 |     score = 0
18 |     rew = 0
19 |     steps = 0
20 | 
21 |     def __init__(self, random_movements=False, human=False):
22 |         self.random = random_movements
23 |         self.human = human
24 |         self.matrix = logic.new_game(c.GRID_LEN)
25 | 
26 |         self.commands = {
27 |             "0": logic.up,
28 |             "1": logic.down,
29 |             "2": logic.left,
30 |             "3": logic.right,
31 |         }
32 | 
33 |         self.done = False
34 | 
35 |     def reset(self) -> None:
36 |         self.matrix = logic.new_game(c.GRID_LEN)
37 |         self.steps = 0
38 |         self.score = 0
39 |         self.rew = 0
40 |         self.done = False
41 | 
42 |     def step(self) -> None:
43 |         self.steps += 1
44 |         if self.human and self.random:
45 |             time.sleep(0.01)
46 |         action = int(self.action) if not self.random else random.randint(0, 3)
47 |         if self.human:
48 |             print(f'Action: {["up", "down", "left", "right"][action]}')
49 |         self.matrix, valid, self.rew = self.commands[str(action)](self.matrix)
50 |         if valid:
51 |             self.matrix = logic.add_two(self.matrix)
52 |         else:
53 |             self.rew -= 10
54 |         state = logic.game_state(self.matrix)
55 |         if state == "win":
56 |             self.rew += 10000
57 |         elif state == "lose":
58 |             self.rew -= 1000
59 |         self.score += self.rew
60 | 
61 |     def get_observation(self) -> typing.List[float]:
62 |         return [
63 |             1 if cell == num else 0
64 |             for num in [0, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
65 |             for row in self.matrix
66 |             for cell in row
67 |         ]
68 | 
69 |     def get_reward(self) -> float:
70 |         return self.rew
71 | 
72 |     def is_done(self) -> bool:
73 |         return logic.game_state(self.matrix) in ["win", "lose"]
74 | 
75 |     def get_metrics(self) -> typing.List[float]:
76 |         return [float(self.steps), float(self.score)]
77 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/mouse/multi_mouse_env_pathmind.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | from pathmind.simulation import Continuous, Discrete, Simulation
 4 | 
 5 | 
 6 | class MultiMouseAndCheese(Simulation):
 7 | 
 8 |     mouses = [(0, 0), (1, 1), (5, 5)]
 9 |     cheeses = [(4, 4), (3, 2), (0, 1)]
10 |     moved = [False, False, False]
11 |     steps = 0
12 | 
13 |     def number_of_agents(self) -> int:
14 |         return 3
15 | 
16 |     def action_space(self, agent_id) -> typing.Union[Continuous, Discrete]:
17 |         return Discrete(4)
18 | 
19 |     def reset(self) -> None:
20 |         self.mouses = [(0, 0), (1, 1), (5, 5)]
21 |         self.cheeses = [(4, 4), (3, 2), (0, 1)]
22 |         self.steps = 0
23 | 
24 |     def step(self) -> None:
25 |         self.steps += 1
26 | 
27 |         for i in range(self.number_of_agents()):
28 |             if not self.is_done(i):
29 |                 self.moved[i] = True
30 |                 action = self.action[i][0]
31 | 
32 |                 if action == 0:  # move up
33 |                     self.mouses[i] = (min(self.mouses[i][0] + 1, 5), self.mouses[i][1])
34 |                 elif action == 1:  # move right
35 |                     self.mouses[i] = (self.mouses[i][0], min(self.mouses[i][1] + 1, 5))
36 |                 elif action == 2:  # move down
37 |                     self.mouses[i] = (max(self.mouses[i][0] - 1, 0), self.mouses[i][1])
38 |                 elif action == 3:  # move left
39 |                     self.mouses[i] = (self.mouses[i][0], max(self.mouses[i][1] - 1, 0))
40 |                 else:
41 |                     raise ValueError("Invalid action")
42 |             else:
43 |                 self.moved[i] = False
44 | 
45 |     def get_observation(self, agent_id) -> typing.Dict[str, float]:
46 |         return {
47 |             "mouse_row": float(self.mouses[agent_id][0]) / 5.0,
48 |             "mouse_col": float(self.mouses[agent_id][1]) / 5.0,
49 |             "mouse_row_dist": abs(self.cheeses[agent_id][0] - self.mouses[agent_id][0])
50 |             / 5.0,
51 |             "mouse_col_dist": abs(self.cheeses[agent_id][1] - self.mouses[agent_id][1])
52 |             / 5.0,
53 |         }
54 | 
55 |     def get_reward(self, agent_id) -> typing.Dict[str, float]:
56 |         return {
57 |             "found_cheese": 1 if self.is_done(agent_id) and self.moved[agent_id] else 0
58 |         }
59 | 
60 |     def is_done(self, agent_id) -> bool:
61 |         return self.mouses[agent_id] == self.cheeses[agent_id]
62 | 


--------------------------------------------------------------------------------
/nativerl/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.4)
 2 | project(nativerl)
 3 | 
 4 | # Download and unpack pybind11 at configure time
 5 | configure_file(CMakeLists.txt.in pybind11-download/CMakeLists.txt)
 6 | execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
 7 |   RESULT_VARIABLE result
 8 |   WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/pybind11-download )
 9 | if(result)
10 |   message(FATAL_ERROR "CMake step for pybind11 failed: ${result}")
11 | endif()
12 | execute_process(COMMAND ${CMAKE_COMMAND} --build .
13 |   RESULT_VARIABLE result
14 |   WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/pybind11-download )
15 | if(result)
16 |   message(FATAL_ERROR "Build step for pybind11 failed: ${result}")
17 | endif()
18 | 
19 | # Add pybind11 directly to our build. This defines
20 | # the pybind11_add_module() function.
21 | add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/pybind11-src
22 |                  ${CMAKE_CURRENT_BINARY_DIR}/pybind11-build
23 |                  EXCLUDE_FROM_ALL)
24 | 
25 | # Work around MSVC creating "Debug" and "Release" subdirectories
26 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_CURRENT_BINARY_DIR}")
27 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_CURRENT_BINARY_DIR}")
28 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG "${CMAKE_CURRENT_BINARY_DIR}")
29 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_RELEASE "${CMAKE_CURRENT_BINARY_DIR}")
30 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_DEBUG "${CMAKE_CURRENT_BINARY_DIR}")
31 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE "${CMAKE_CURRENT_BINARY_DIR}")
32 | 
33 | # Indicate where to find the JNI library produced by JavaCPP, as required by the Python wrapper
34 | set(PLATFORMDIR "${CMAKE_CURRENT_BINARY_DIR}/classes/ai/skymind/nativerl/$ENV{PLATFORM_HOST}")
35 | set(CXXSOURCES "${PLATFORMDIR}/../nativerl.cpp")
36 | 
37 | # Pick up the include and library directories provided by JavaCPP
38 | string(REPLACE "$ENV{PLATFORM_PATH_SEPARATOR}" ";" PLATFORM_INCLUDEPATH "$ENV{PLATFORM_INCLUDEPATH}")
39 | string(REPLACE "$ENV{PLATFORM_PATH_SEPARATOR}" ";" PLATFORM_LINKPATH "$ENV{PLATFORM_LINKPATH}")
40 | include_directories(${PLATFORM_INCLUDEPATH})
41 | link_directories(${PLATFORMDIR} ${PLATFORM_LINKPATH})
42 | 
43 | # Compile the Python wrappers against the JNI library
44 | file(GLOB JNILIBS ${PLATFORMDIR}/*)
45 | file(COPY ${JNILIBS} DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
46 | pybind11_add_module(nativerl ${CXXSOURCES})
47 | target_link_libraries(nativerl PRIVATE jniNativeRL)
48 | if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
49 |     # Tweak options to handle correctly C++ exceptions thrown from JavaCPP
50 |     target_compile_options(nativerl PRIVATE /EHc-)
51 | endif()
52 | 


--------------------------------------------------------------------------------
/nativerl-tests/src/test/java/ai/skymind/nativerl/AnyLogicModelTest.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import java.io.File;
 4 | import java.util.Arrays;
 5 | import java.util.HashMap;
 6 | import java.util.Map;
 7 | import org.bytedeco.javacpp.Loader;
 8 | import org.junit.Test;
 9 | 
10 | import static org.hamcrest.core.AnyOf.anyOf;
11 | import static org.hamcrest.core.Is.is;
12 | import static org.hamcrest.MatcherAssert.assertThat;
13 | import static org.junit.Assert.assertEquals;
14 | import static org.junit.Assert.assertTrue;
15 | import static org.junit.Assert.fail;
16 | 
17 | /**
18 |  *
19 |  * @author saudet
20 |  */
21 | public class AnyLogicModelTest extends ModelTest {
22 | 
23 |     @Test public void testTrafficPhases() throws Exception {
24 |         File binDir = new File("target/dependency/nativerl-bin/");
25 |         File helperDir = new File("../PathmindPolicyHelper/target/");
26 |         File exportDir = folder.newFolder("TrafficPhases");
27 |         File modelDir = new File(getClass().getResource("trafficphases").toURI());
28 |         File simulationDir = new File(exportDir, "TrafficPhases_Simulation");
29 | 
30 |         execute(modelDir, "anylogic", "-e", "-o", exportDir.getAbsolutePath(), modelDir.getAbsolutePath() + "/TrafficPhases.alp");
31 | 
32 |         copy(binDir, simulationDir);
33 |         copy(helperDir, new File(simulationDir, "lib"));
34 |         copy(new File(binDir, "examples/traintraffic.sh"), simulationDir);
35 |         execute(simulationDir, "bash", "traintraffic.sh");
36 | 
37 |         File[] savedModels = find(simulationDir, "saved_model.pb");
38 |         assertTrue(savedModels.length > 0);
39 |         for (File f : savedModels) {
40 |             File d = f.getParentFile();
41 |             PolicyHelper h = PolicyHelper.load(d);
42 |             double[] o = h.computeActions(new double[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
43 |             System.out.println(Arrays.toString(o));
44 |             assertEquals(o.length, 1);
45 |             assertThat(o[0], anyOf(is(0.0), is(1.0)));
46 | 
47 |             Map<String, String> e = new HashMap<String, String>();
48 |             e.put("NATIVERL_POLICY", d.getAbsolutePath());
49 |             String p = Loader.getPlatform();
50 |             if (p.startsWith("linux")) {
51 |                 execute(simulationDir, e, "bash", "TrafficPhases_linux.sh");
52 |             } else if (p.startsWith("macosx")) {
53 |                 execute(simulationDir, e, "bash", "TrafficPhases_mac");
54 |             } else if (p.startsWith("windows")) {
55 |                 execute(simulationDir, e, "cmd.exe", "/c", "TrafficPhases_windows.bat");
56 |             } else {
57 |                 fail("Unsupported platform: " + p);
58 |             }
59 |         }
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/main/java/ai/skymind/nativerl/ObservationFilter.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import java.io.File;
 4 | import java.io.IOException;
 5 | import java.lang.reflect.Constructor;
 6 | import java.lang.reflect.Method;
 7 | import java.net.URL;
 8 | import java.net.URLClassLoader;
 9 | 
10 | /**
11 |  * An interface that users can implement to filter observations somehow.
12 |  * The loading mechanism currently needs java arguments "--add-opens java.base/jdk.internal.loader=ALL-UNNAMED" for Java 9+.
13 |  *
14 |  * @author saudet
15 |  */
16 | public interface ObservationFilter<O> {
17 |     public static final String POLICY_CLASS_NAME = "PolicyObservationFilter";
18 | 
19 |     /** Returns {@code load(directory, POLICY_CLASS_NAME)}. */
20 |     public static ObservationFilter load(File directory) throws IOException, ReflectiveOperationException {
21 |         return load(directory, POLICY_CLASS_NAME);
22 |     }
23 |     /** Returns an instance of an implementation of ObservationFilter found in the given directory with the given class name. */
24 |     public static ObservationFilter load(File directory, String className) throws IOException, ReflectiveOperationException {
25 |         if (!new File(directory, className.replace('.', '/') + ".class").exists()) {
26 |             return null;
27 |         }
28 |         ClassLoader classLoader = ObservationFilter.class.getClassLoader();
29 |         try {
30 |             // Java 8-
31 |             Method method = classLoader.getClass().getDeclaredMethod("addURL", new Class[]{URL.class});
32 |             method.setAccessible(true);
33 |             method.invoke(classLoader, new Object[]{directory.toURI().toURL()});
34 |         } catch (NoSuchMethodException e) {
35 |             // Java 9+, but requires java arguments "--add-opens java.base/jdk.internal.loader=ALL-UNNAMED"
36 |             try {
37 |                 Method method = classLoader.getClass().getDeclaredMethod("appendToClassPathForInstrumentation", String.class);
38 |                 method.setAccessible(true);
39 |                 method.invoke(classLoader, directory.getPath());
40 |             } catch (RuntimeException e2) {
41 |                 throw new RuntimeException("Java arguments missing: \"--add-opens java.base/jdk.internal.loader=ALL-UNNAMED\"", e2);
42 |             }
43 |         }
44 |         try {
45 |             Class<? extends ObservationFilter> cls = Class.forName(className, true, classLoader).asSubclass(ObservationFilter.class);
46 |             Constructor<? extends ObservationFilter> c = cls.getDeclaredConstructor();
47 |             c.setAccessible(true);
48 |             return c.newInstance();
49 |         } catch (ClassNotFoundException e) {
50 |             return null;
51 |         }
52 |     }
53 | 
54 |     double[] filter(O observations);
55 | }
56 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/game2048/env.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import math
 3 | import os
 4 | import typing
 5 | from collections import OrderedDict
 6 | 
 7 | import yaml
 8 | from pathmind_training import pynativerl as nativerl
 9 | from pathmind_training.pynativerl import Continuous
10 | 
11 | from .base import Game2048
12 | 
13 | dir_path = os.path.dirname(os.path.realpath(__file__))
14 | with open(os.path.join(dir_path, "obs.yaml"), "r") as f:
15 |     schema: OrderedDict = yaml.safe_load(f.read())
16 | OBS = schema.get("observations")
17 | 
18 | 
19 | class Game2048Env(nativerl.Environment):
20 |     def __init__(self, simulation=Game2048()):
21 |         nativerl.Environment.__init__(self)
22 |         self.simulation = simulation
23 | 
24 |     def getActionSpace(self, agent_id=0):
25 |         return (
26 |             nativerl.Discrete(self.simulation.number_of_actions)
27 |             if agent_id == 0
28 |             else None
29 |         )
30 | 
31 |     def getObservationSpace(self):
32 |         obs_shape = [self.simulation.number_of_observations]
33 |         return nativerl.Continuous([-math.inf], [math.inf], obs_shape)
34 | 
35 |     def getNumberOfAgents(self):
36 |         return 1
37 | 
38 |     def getActionMask(self, agent_id=0):
39 |         return None
40 | 
41 |     def getActionMaskSpace(self):
42 |         return None
43 | 
44 |     def getObservation(self, agent_id=0):
45 |         obs_dict = self.simulation.get_observation()
46 | 
47 |         lists = [
48 |             [obs_dict[obs]]
49 |             if not isinstance(obs_dict[obs], typing.List)
50 |             else obs_dict[obs]
51 |             for obs in OBS
52 |         ]
53 |         observations = list(itertools.chain(*lists))
54 | 
55 |         return nativerl.Array(observations)
56 | 
57 |     def reset(self):
58 |         self.simulation.reset()
59 | 
60 |     def setNextAction(self, action, agent_id=0):
61 |         self.simulation.action = action
62 | 
63 |     def isSkip(self, agent_id=0):
64 |         return False
65 | 
66 |     def step(self):
67 |         return self.simulation.step()
68 | 
69 |     def isDone(self, agent_id=0):
70 |         return self.simulation.is_done()
71 | 
72 |     def getReward(self, agent_id=0):
73 |         # TODO: if reward snippet, call it here
74 |         reward_sum = sum(self.simulation.get_reward().values())
75 |         return reward_sum
76 | 
77 |     def getMetrics(self, agent_id=0):
78 |         if self.simulation.get_metrics():
79 |             return self.simulation.get_metrics()
80 |         else:
81 |             return list(self.simulation.get_observation().values())
82 | 
83 |     def getMetricsSpace(self) -> Continuous:
84 |         num_metrics = len(self.getMetrics())
85 |         return nativerl.Continuous(
86 |             low=[-math.inf], high=[math.inf], shape=[num_metrics]
87 |         )
88 | 


--------------------------------------------------------------------------------
/nativerl/examples/traintraffic.sh:
--------------------------------------------------------------------------------
 1 | OUTPUT_DIR="$(pwd)"
 2 | MODEL_PACKAGE="traffic_light_opt"
 3 | ENVIRONMENT_CLASS="$MODEL_PACKAGE.PathmindEnvironment"
 4 | SIMULATION_CLASS="$MODEL_PACKAGE.Simulation"
 5 | AGENT_CLASS="$MODEL_PACKAGE.Main"
 6 | 
 7 | CLASS_SNIPPET='
 8 |     int simCount = 0;
 9 |     String combinations[][] = {
10 |             {"constant_moderate", "constant_moderate"},
11 |             {"none_til_heavy_afternoon_peak", "constant_moderate"},
12 |             {"constant_moderate", "none_til_heavy_afternoon_peak"},
13 |             {"peak_afternoon", "peak_morning"},
14 |             {"peak_morning", "peak_afternoon"}
15 |     };
16 | '
17 | 
18 | RESET_SNIPPET='
19 |     simCount++;
20 |     agent.schedNameNS = combinations[simCount % combinations.length][0];
21 |     agent.schedNameEW = combinations[simCount % combinations.length][1];
22 | '
23 | 
24 | OBSERVATION_SNIPPET='
25 |     out = in.obs;
26 | '
27 | 
28 | $REWARD_TERMS_SNIPPET='
29 |     if (before == null) return 0;
30 |     double[] s0 = before.vars, s1 = after.vars;
31 |     // change in forward + intersection delay
32 |     double delay0 = s0[0] + s0[2] + s0[4] + s0[6] + s0[8];
33 |     double delay1 = s1[0] + s1[2] + s1[4] + s1[6] + s1[8];
34 |     reward = delay0 - delay1;
35 |     if (delay0 > 0 || delay1 > 0) {
36 |         rewardTermsRaw[0] /= Math.max(delay0, delay1);
37 |     }
38 | '
39 | 
40 | METRICS_SNIPPET='
41 |     metrics = new double[] { agent.tisDS.getYMean() };
42 | '
43 | 
44 | mkdir -p $MODEL_PACKAGE
45 | 
46 | export CLASSPATH=$(find . -iname '*.jar' | tr '\n' :)
47 | 
48 | if which cygpath; then
49 |     export CLASSPATH=$(cygpath --path --windows "$CLASSPATH")
50 |     export PATH=$PATH:$(find "$(cygpath "$JAVA_HOME")" -name 'jvm.dll' -printf '%h:')
51 | fi
52 | 
53 | java ai.skymind.nativerl.AnyLogicHelper \
54 |     --environment-class-name "$ENVIRONMENT_CLASS" \
55 |     --simulation-class-name "$SIMULATION_CLASS" \
56 |     --agent-class-name "$AGENT_CLASS" \
57 |     --class-snippet "$CLASS_SNIPPET" \
58 |     --reset-snippet "$RESET_SNIPPET" \
59 |     --observation-snippet "$OBSERVATION_SNIPPET" \
60 |     --reward-terms-snippet "$REWARD_TERMS_SNIPPET" \
61 |     --metrics-snippet "$METRICS_SNIPPET" \
62 |     --policy-helper RLlibPolicyHelper \
63 |     --multi-agent \
64 |     --named-variables
65 | 
66 | javac $(find -iname '*.java')
67 | 
68 | PYTHON=$(which python.exe) || PYTHON=$(which python3)
69 | 
70 | "$PYTHON" run.py training \
71 |     --algorithm "PPO" \
72 |     --output-dir "$OUTPUT_DIR" \
73 |     --environment "$ENVIRONMENT_CLASS" \
74 |     --num-workers 4 \
75 |     --random-seed 42 \
76 |     --max-iterations 10 \
77 |     --max-reward-mean 100 \
78 |     --multi-agent \
79 |     rllibtrain.py
80 | 
81 | # Execute the simulation with all models to get test metrics
82 | #find "$OUTPUT_DIR" -iname model -type d -exec java "$ENVIRONMENT_CLASS" {} \;
83 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/src/main/java/io/skymind/pathmind/analyzer/api/exception/handler/ControllerExceptionHandler.java:
--------------------------------------------------------------------------------
 1 | package io.skymind.pathmind.analyzer.api.exception.handler;
 2 | 
 3 | import io.skymind.pathmind.analyzer.api.exception.dto.ApiErrorsResponse;
 4 | import io.skymind.pathmind.analyzer.dto.SimulationParameter;
 5 | import io.skymind.pathmind.analyzer.exception.InvalidZipFileException;
 6 | import io.skymind.pathmind.analyzer.exception.ProcessingException;
 7 | import io.skymind.pathmind.analyzer.exception.UnexpectedScriptResultException;
 8 | import io.skymind.pathmind.analyzer.exception.ZipExtractionException;
 9 | import lombok.extern.slf4j.Slf4j;
10 | import org.springframework.web.bind.annotation.ExceptionHandler;
11 | import org.springframework.web.bind.annotation.ResponseStatus;
12 | import org.springframework.web.bind.annotation.RestControllerAdvice;
13 | import org.springframework.web.multipart.MultipartException;
14 | 
15 | import static org.springframework.http.HttpStatus.BAD_REQUEST;
16 | import static org.springframework.http.HttpStatus.INTERNAL_SERVER_ERROR;
17 | 
18 | @RestControllerAdvice
19 | @Slf4j
20 | public class ControllerExceptionHandler {
21 | 
22 |     @ExceptionHandler(ZipExtractionException.class)
23 |     @ResponseStatus(BAD_REQUEST)
24 |     public ApiErrorsResponse handleZipExtractionException(final ZipExtractionException ex) {
25 |         log.error(ex.getMessage());
26 |         return new ApiErrorsResponse(BAD_REQUEST.value(), ex.getMessage());
27 |     }
28 | 
29 |     @ExceptionHandler(ProcessingException.class)
30 |     @ResponseStatus(INTERNAL_SERVER_ERROR)
31 |     public ApiErrorsResponse handleProcessingException(final ProcessingException ex) {
32 |         log.error(ex.getMessage());
33 |         return new ApiErrorsResponse(INTERNAL_SERVER_ERROR.value(), ex.getMessage());
34 |     }
35 | 
36 |     @ExceptionHandler(UnexpectedScriptResultException.class)
37 |     @ResponseStatus(INTERNAL_SERVER_ERROR)
38 |     public ApiErrorsResponse handleUnexpectedScriptResultException(final UnexpectedScriptResultException ex) {
39 |         log.error(ex.getMessage());
40 |         return new ApiErrorsResponse(INTERNAL_SERVER_ERROR.value(), ex.getMessage());
41 |     }
42 | 
43 |     @ExceptionHandler(InvalidZipFileException.class)
44 |     @ResponseStatus(BAD_REQUEST)
45 |     public ApiErrorsResponse handleInvalidZipFileException(final InvalidZipFileException ex) {
46 |         log.error(ex.getMessage());
47 |         return new ApiErrorsResponse(BAD_REQUEST.value(), ex.getMessage());
48 |     }
49 | 
50 |     @ExceptionHandler(MultipartException.class)
51 |     @ResponseStatus(BAD_REQUEST)
52 |     public ApiErrorsResponse handleMultipartException(final MultipartException ex) {
53 |         log.error(ex.getMessage());
54 |         return new ApiErrorsResponse(BAD_REQUEST.value(), ex.getMessage());
55 |     }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/test/java/ai/skymind/nativerl/ObservationProcessorTest.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import com.fasterxml.jackson.core.JsonProcessingException;
 4 | import org.junit.Test;
 5 | 
 6 | import static org.junit.Assert.*;
 7 | 
 8 | /**
 9 |  *
10 |  * @author saudet
11 |  */
12 | public class ObservationProcessorTest {
13 |     static class TestFilter implements ObservationFilter<TestObservations> {
14 |         public double[] filter(TestObservations observations) {
15 |             return observations.obs4;
16 |         }
17 |     }
18 | 
19 |     int data1 = 37;
20 |     int[] data2 = {11, 15};
21 |     double data3 = 42;
22 |     double[] data4 = {1, 2, 3, 4, 5};
23 |     boolean data5 = true;
24 |     boolean data6 = false;
25 | 
26 |     class TestObservations {
27 |         int obs1 = data1;
28 |         int[] obs2 = data2;
29 |         double obs3 = data3;
30 |         double[] obs4 = data4;
31 |         boolean obs5 = data5;
32 |         boolean obs6 = data6;
33 |     }
34 | 
35 |     void observations(int agentId) {
36 |         class DummyObservations extends TestObservations {
37 |             float obs7 = agentId;
38 |         }
39 |     }
40 | 
41 |     @Test public void testObservations() {
42 |         try {
43 |             ObservationProcessor op = new ObservationProcessor(this.getClass());
44 |             assertEquals("DummyObservations", op.getObservationClass().getSimpleName());
45 |             assertArrayEquals(new String[] {"obs1", "obs2[0]", "obs2[1]", "obs3", "obs4[0]", "obs4[1]", "obs4[2]", "obs4[3]", "obs4[4]", "obs5", "obs6", "obs7"},
46 |                     op.getObservationNames(this));
47 |             assertArrayEquals(new String[] {"int", "int", "int", "double", "double", "double", "double", "double", "double", "boolean", "boolean", "float"},
48 |                     op.getObservationTypes(this));
49 |             assertEquals(op.getObservationNames(this).length, op.getObservationTypes(this).length);
50 |             assertArrayEquals(new double[] {37, 11, 15, 42, 1, 2, 3, 4, 5, 1, 0, 64}, op.getObservations(this, 64), 0.0);
51 |             TestObservations o = op.getObservationObject(this, 64);
52 |             assertArrayEquals(new double[] {1, 2, 3, 4, 5}, new TestFilter().filter(o), 0.0);
53 | 
54 |             // json test without action masking array
55 |             assertEquals("{\"obs1\":37,\"obs2\":[11,15],\"obs3\":42.0,\"obs4\":[1.0,2.0,3.0,4.0,5.0],\"obs5\":true,\"obs6\":false,\"obs7\":64.0}", op.toJsonString(o, null));
56 | 
57 |             // json test with action masking array
58 |             boolean[] actMasks = {true, true, false, true};
59 |             assertEquals("{\"actionMask\":[1.0,1.0,0.0,1.0],\"obs1\":37,\"obs2\":[11,15],\"obs3\":42.0,\"obs4\":[1.0,2.0,3.0,4.0,5.0],\"obs5\":true,\"obs6\":false,\"obs7\":64.0}", op.toJsonString(o, actMasks));
60 |         } catch (ReflectiveOperationException ex) {
61 |             fail(ex.getMessage());
62 |         } catch (JsonProcessingException e) {
63 |             fail(e.getMessage());
64 |         }
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/factory/controls.py:
--------------------------------------------------------------------------------
 1 | """"Controls specify how objects change state (how)."""
 2 | import enum
 3 | import random
 4 | 
 5 | from .models import ActionResult, Direction, Node, Rail, Table
 6 | from .simulation import Factory
 7 | 
 8 | 
 9 | class Action(enum.IntEnum):
10 |     """Move in a direction or stay where you are."""
11 | 
12 |     up = 0
13 |     right = 1
14 |     down = 2
15 |     left = 3
16 |     none = 4
17 | 
18 |     @staticmethod
19 |     def random_action():
20 |         return Action(random.randrange(0, 5))
21 | 
22 | 
23 | def do_action(table: Table, factory: Factory, action: Action):
24 |     return TableAndRailController(factory).do_action(table, action)
25 | 
26 | 
27 | class TableAndRailController:
28 |     def __init__(self, factory: Factory, name=None):
29 |         self.factory = factory
30 |         self.name = name
31 | 
32 |     @staticmethod
33 |     def _move_table(table: Table, to: Node) -> ActionResult:
34 |         """Move table to an adjacent node. Cores are moved automatically.
35 |         If we move on a rail, also move the shuttle. If the destination
36 |         completes a phase, mark it as such.
37 |         """
38 |         start = table.node
39 | 
40 |         # Remove table from "start" node
41 |         start.remove_table()
42 | 
43 |         # Put table on "to" node
44 |         table.set_node(to)
45 |         to.set_table(table)
46 | 
47 |         if table.get_target() is to:
48 |             table.phase_completed()
49 |             table.is_at_target = True
50 |         else:
51 |             table.is_at_target = False
52 | 
53 |         return ActionResult.MOVED
54 | 
55 |     def _move_to_rail(self, table: Table, rail: Rail, neighbour: Node) -> ActionResult:
56 |         raise NotImplementedError
57 | 
58 |     def do_action(self, table: Table, action: Action) -> ActionResult:
59 |         """Attempt to carry out a specified action."""
60 |         table.is_at_target = False  # Reset target
61 |         if action.value == 4:
62 |             return ActionResult.NONE
63 |         direction = Direction(action.value)
64 |         has_neighbour = table.node.has_neighbour(direction)
65 |         if not has_neighbour:
66 |             return ActionResult.INVALID
67 |         else:
68 |             neighbour = table.node.get_neighbour(direction)
69 |             if neighbour.has_table():
70 |                 return ActionResult.COLLISION
71 |             if neighbour.is_rail:  # node -> rail or rail -> rail
72 |                 # can we hop on the rail?
73 |                 rail = self.factory.get_rail(node=neighbour)
74 |                 assert rail.num_tables() <= 1, "A rail can have at most one table"
75 |                 if rail.is_free() or table.node in rail.nodes:
76 |                     return self._move_table(table, neighbour)
77 |                 else:
78 |                     # target is blocked with a table.
79 |                     return ActionResult.INVALID_RAIL_ENTERING
80 |             else:  # Move table from a) node -> node or b) rail -> node
81 |                 return self._move_table(table, neighbour)
82 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/Array.java:
--------------------------------------------------------------------------------
 1 | // Targeted by JavaCPP version 1.5.4: DO NOT EDIT THIS FILE
 2 | 
 3 | package ai.skymind.nativerl;
 4 | 
 5 | import java.nio.*;
 6 | import org.bytedeco.javacpp.*;
 7 | import org.bytedeco.javacpp.annotation.*;
 8 | 
 9 | import static ai.skymind.nativerl.NativeRL.*;
10 | 
11 | 
12 | /**
13 |  * A generic multidimensional array of 32-bit floating point elements with a very simple interface
14 |  * such that it can be mapped and used easily with tools like JavaCPP and pybind11.
15 |  */
16 | @Namespace("nativerl") @NoOffset @Properties(inherit = ai.skymind.nativerl.NativeRLPresets.class)
17 | public class Array extends Pointer {
18 |     static { Loader.load(); }
19 |     /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
20 |     public Array(Pointer p) { super(p); }
21 |     /** Native array allocator. Access with {@link Pointer#position(long)}. */
22 |     public Array(long size) { super((Pointer)null); allocateArray(size); }
23 |     private native void allocateArray(long size);
24 |     @Override public Array position(long position) {
25 |         return (Array)super.position(position);
26 |     }
27 |     @Override public Array getPointer(long i) {
28 |         return new Array(this).position(position + i);
29 |     }
30 | 
31 |     public native FloatPointer allocated(); public native Array allocated(FloatPointer setter);
32 |     public native FloatPointer data(); public native Array data(FloatPointer setter);
33 |     public native @ByRef SSizeTVector shape(); public native Array shape(SSizeTVector setter);
34 | 
35 |     public Array() { super((Pointer)null); allocate(); }
36 |     private native void allocate();
37 |     public Array(@Const @ByRef Array a) { super((Pointer)null); allocate(a); }
38 |     private native void allocate(@Const @ByRef Array a);
39 |     public Array(FloatPointer data, @Const @ByRef SSizeTVector shape) { super((Pointer)null); allocate(data, shape); }
40 |     private native void allocate(FloatPointer data, @Const @ByRef SSizeTVector shape);
41 |     public Array(FloatBuffer data, @Const @ByRef SSizeTVector shape) { super((Pointer)null); allocate(data, shape); }
42 |     private native void allocate(FloatBuffer data, @Const @ByRef SSizeTVector shape);
43 |     public Array(float[] data, @Const @ByRef SSizeTVector shape) { super((Pointer)null); allocate(data, shape); }
44 |     private native void allocate(float[] data, @Const @ByRef SSizeTVector shape);
45 |     public Array(@Const @ByRef FloatVector values) { super((Pointer)null); allocate(values); }
46 |     private native void allocate(@Const @ByRef FloatVector values);
47 |     public Array(@Const @ByRef SSizeTVector shape) { super((Pointer)null); allocate(shape); }
48 |     private native void allocate(@Const @ByRef SSizeTVector shape);
49 |     public native @ByRef @Name("operator +=") Array addPut(@Const @ByRef Array a);
50 | 
51 |     public native @ByVal FloatVector values();
52 | 
53 |     public native @Cast("ssize_t") long length();
54 | 
55 |     public native @Cast("ssize_t") long py_len();
56 | 
57 |     public native float get_item(int i);
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/SSizeTVector.java:
--------------------------------------------------------------------------------
 1 | // Targeted by JavaCPP version 1.5.4: DO NOT EDIT THIS FILE
 2 | 
 3 | package ai.skymind.nativerl;
 4 | 
 5 | import java.nio.*;
 6 | import org.bytedeco.javacpp.*;
 7 | import org.bytedeco.javacpp.annotation.*;
 8 | 
 9 | import static ai.skymind.nativerl.NativeRL.*;
10 | 
11 | @Name("std::vector<ssize_t>") @Properties(inherit = ai.skymind.nativerl.NativeRLPresets.class)
12 | public class SSizeTVector extends Pointer {
13 |     static { Loader.load(); }
14 |     /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
15 |     public SSizeTVector(Pointer p) { super(p); }
16 |     public SSizeTVector(long ... array) { this(array.length); put(array); }
17 |     public SSizeTVector()       { allocate();  }
18 |     public SSizeTVector(long n) { allocate(n); }
19 |     private native void allocate();
20 |     private native void allocate(@Cast("size_t") long n);
21 |     public native @Name("operator =") @ByRef SSizeTVector put(@ByRef SSizeTVector x);
22 | 
23 |     public boolean empty() { return size() == 0; }
24 |     public native long size();
25 |     public void clear() { resize(0); }
26 |     public native void resize(@Cast("size_t") long n);
27 | 
28 |     @Index(function = "at") public native @Cast("ssize_t") long get(@Cast("size_t") long i);
29 |     public native SSizeTVector put(@Cast("size_t") long i, long value);
30 | 
31 |     public native @ByVal Iterator insert(@ByVal Iterator pos, @Cast("ssize_t") long value);
32 |     public native @ByVal Iterator erase(@ByVal Iterator pos);
33 |     public native @ByVal Iterator begin();
34 |     public native @ByVal Iterator end();
35 |     @NoOffset @Name("iterator") public static class Iterator extends Pointer {
36 |         public Iterator(Pointer p) { super(p); }
37 |         public Iterator() { }
38 | 
39 |         public native @Name("operator ++") @ByRef Iterator increment();
40 |         public native @Name("operator ==") boolean equals(@ByRef Iterator it);
41 |         public native @Name("operator *") @Cast("ssize_t") long get();
42 |     }
43 | 
44 |     public long[] get() {
45 |         long[] array = new long[size() < Integer.MAX_VALUE ? (int)size() : Integer.MAX_VALUE];
46 |         for (int i = 0; i < array.length; i++) {
47 |             array[i] = get(i);
48 |         }
49 |         return array;
50 |     }
51 |     @Override public String toString() {
52 |         return java.util.Arrays.toString(get());
53 |     }
54 | 
55 |     public long pop_back() {
56 |         long size = size();
57 |         long value = get(size - 1);
58 |         resize(size - 1);
59 |         return value;
60 |     }
61 |     public SSizeTVector push_back(long value) {
62 |         long size = size();
63 |         resize(size + 1);
64 |         return put(size, value);
65 |     }
66 |     public SSizeTVector put(long value) {
67 |         if (size() != 1) { resize(1); }
68 |         return put(0, value);
69 |     }
70 |     public SSizeTVector put(long ... array) {
71 |         if (size() != array.length) { resize(array.length); }
72 |         for (int i = 0; i < array.length; i++) {
73 |             put(i, array[i]);
74 |         }
75 |         return this;
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/game2048/base.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import random
 3 | import time
 4 | import typing
 5 | 
 6 | from . import constants as c
 7 | from . import logic
 8 | 
 9 | 
10 | class Game2048:
11 | 
12 |     action: float = None  # Dynamically generated for each state by Pathmind
13 | 
14 |     scores = ["0", "2", "4", "8", "16", "32", "64", "128", "256", "512", "1024"]
15 | 
16 |     number_of_actions: int = 4  # Needs to be provided
17 |     grid_cells = 4 * 4
18 |     number_of_observations: int = grid_cells * len(scores)  # Needs to be provided
19 |     number_of_metrics: int = 2
20 | 
21 |     obs = dict.fromkeys(scores, [0] * grid_cells)
22 | 
23 |     reward_terms = {
24 |         "log_game_score": 0,
25 |         "invalid_move": 0,
26 |     }
27 |     prev_rew_terms = reward_terms
28 |     total_reward = 0
29 |     steps = 0
30 | 
31 |     def __init__(self, random_movements=False, human=False):
32 |         self.random = random_movements
33 |         self.human = human
34 |         self.matrix = logic.new_game(c.GRID_LEN)
35 | 
36 |         self.commands = {
37 |             "0": logic.up,
38 |             "1": logic.down,
39 |             "2": logic.left,
40 |             "3": logic.right,
41 |         }
42 | 
43 |         self.done = False
44 |         self.obs = self.get_observation()
45 |         self.reward_terms = self.get_reward()
46 | 
47 |     def reset(self) -> None:
48 |         self.matrix = logic.new_game(c.GRID_LEN)
49 |         self.steps = 0
50 |         self.total_reward = 0
51 |         self.done = False
52 |         self.obs = self.get_observation()
53 |         self.reward_terms = self.get_reward()
54 | 
55 |     def step(self) -> None:
56 | 
57 |         self.prev_rew_terms = self.reward_terms
58 | 
59 |         self.steps += 1
60 |         if self.human and self.random:
61 |             time.sleep(0.01)
62 |         action = int(self.action) if not self.random else random.randint(0, 3)
63 |         if self.human:
64 |             print(f'Action: {["up", "down", "left", "right"][action]}')
65 |         self.matrix, valid, rew = self.commands[str(action)](self.matrix)
66 | 
67 |         self.reward_terms["log_game_score"] = (
68 |             math.log(rew, 2) if rew > 0 else 0
69 |         ) - self.prev_rew_terms["log_game_score"]
70 |         if valid:
71 |             self.matrix = logic.add_two(self.matrix)
72 |         else:
73 |             self.reward_terms["invalid_move"] = -1 - self.prev_rew_terms["invalid_move"]
74 | 
75 |         self.total_reward += sum(self.reward_terms.values())
76 | 
77 |     def get_observation(self) -> typing.Dict:
78 |         for score in self.scores:
79 |             self.obs[score] = [
80 |                 1 if value == score else 0 for row in self.matrix for value in row
81 |             ]
82 | 
83 |         return self.obs
84 | 
85 |     def get_reward(self) -> typing.Dict:
86 |         return self.reward_terms
87 | 
88 |     def is_done(self) -> bool:
89 |         return logic.game_state(self.matrix) in ["win", "lose"]
90 | 
91 |     def get_metrics(self) -> typing.List[float]:
92 |         return [float(self.steps), float(self.total_reward)]
93 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/FloatVector.java:
--------------------------------------------------------------------------------
 1 | // Targeted by JavaCPP version 1.5.4: DO NOT EDIT THIS FILE
 2 | 
 3 | package ai.skymind.nativerl;
 4 | 
 5 | import java.nio.*;
 6 | import org.bytedeco.javacpp.*;
 7 | import org.bytedeco.javacpp.annotation.*;
 8 | 
 9 | import static ai.skymind.nativerl.NativeRL.*;
10 | 
11 | @Name("std::vector<float>") @Properties(inherit = ai.skymind.nativerl.NativeRLPresets.class)
12 | public class FloatVector extends Pointer {
13 |     static { Loader.load(); }
14 |     /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
15 |     public FloatVector(Pointer p) { super(p); }
16 |     public FloatVector(float value) { this(1); put(0, value); }
17 |     public FloatVector(float ... array) { this(array.length); put(array); }
18 |     public FloatVector()       { allocate();  }
19 |     public FloatVector(long n) { allocate(n); }
20 |     private native void allocate();
21 |     private native void allocate(@Cast("size_t") long n);
22 |     public native @Name("operator =") @ByRef FloatVector put(@ByRef FloatVector x);
23 | 
24 |     public boolean empty() { return size() == 0; }
25 |     public native long size();
26 |     public void clear() { resize(0); }
27 |     public native void resize(@Cast("size_t") long n);
28 | 
29 |     @Index(function = "at") public native float get(@Cast("size_t") long i);
30 |     public native FloatVector put(@Cast("size_t") long i, float value);
31 | 
32 |     public native @ByVal Iterator insert(@ByVal Iterator pos, float value);
33 |     public native @ByVal Iterator erase(@ByVal Iterator pos);
34 |     public native @ByVal Iterator begin();
35 |     public native @ByVal Iterator end();
36 |     @NoOffset @Name("iterator") public static class Iterator extends Pointer {
37 |         public Iterator(Pointer p) { super(p); }
38 |         public Iterator() { }
39 | 
40 |         public native @Name("operator ++") @ByRef Iterator increment();
41 |         public native @Name("operator ==") boolean equals(@ByRef Iterator it);
42 |         public native @Name("operator *") float get();
43 |     }
44 | 
45 |     public float[] get() {
46 |         float[] array = new float[size() < Integer.MAX_VALUE ? (int)size() : Integer.MAX_VALUE];
47 |         for (int i = 0; i < array.length; i++) {
48 |             array[i] = get(i);
49 |         }
50 |         return array;
51 |     }
52 |     @Override public String toString() {
53 |         return java.util.Arrays.toString(get());
54 |     }
55 | 
56 |     public float pop_back() {
57 |         long size = size();
58 |         float value = get(size - 1);
59 |         resize(size - 1);
60 |         return value;
61 |     }
62 |     public FloatVector push_back(float value) {
63 |         long size = size();
64 |         resize(size + 1);
65 |         return put(size, value);
66 |     }
67 |     public FloatVector put(float value) {
68 |         if (size() != 1) { resize(1); }
69 |         return put(0, value);
70 |     }
71 |     public FloatVector put(float ... array) {
72 |         if (size() != array.length) { resize(array.length); }
73 |         for (int i = 0; i < array.length; i++) {
74 |             put(i, array[i]);
75 |         }
76 |         return this;
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/main/java/ai/skymind/nativerl/PolicyHelper.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import java.io.File;
 4 | import java.io.IOException;
 5 | import java.util.Arrays;
 6 | 
 7 | /**
 8 |  * This is an interface that needs to be implemented by helper classes
 9 |  * that help users execute already trained reinforcement learning policies.
10 |  * We can disable it at runtime by setting the "ai.skymind.nativerl.disablePolicyHelper"
11 |  * system property to true, for example, during training.
12 |  */
13 | public interface PolicyHelper {
14 |     static final boolean disablePolicyHelper = Boolean.getBoolean("ai.skymind.nativerl.disablePolicyHelper");
15 |     static final String[] policyHelpers = {"ai.skymind.nativerl.RLlibPolicyHelper", "ai.skymind.nativerl.RLlibV2PolicyHelper"};
16 | 
17 |     /** Returns a new instance of the first PolicyHelper found within {@link #policyHelpers}. */
18 |     public static PolicyHelper load(File savedModel) throws IOException, ReflectiveOperationException {
19 |         Class<? extends PolicyHelper> c = null;
20 |         for (String name : policyHelpers) {
21 |             try {
22 |                 c = Class.forName(name).asSubclass(PolicyHelper.class);
23 |                 break;
24 |             } catch (ClassNotFoundException e) {
25 |                 // continue
26 |             }
27 |         }
28 |         if (c == null) {
29 |             throw new ClassNotFoundException(Arrays.deepToString(policyHelpers));
30 |         }
31 |         return c.getConstructor(File.class).newInstance(savedModel);
32 |     }
33 | 
34 |     /** Adapter from float to double array for {@link #computeActions(float[])}. */
35 |     default public double[] computeActions(double[] state) {
36 |         float[] s = new float[state.length];
37 |         for (int i = 0; i < state.length; i++) {
38 |             s[i] = (float)state[i];
39 |         }
40 |         float[] a = computeActions(s);
41 |         double[] action = new double[a.length];
42 |         for (int i = 0; i < action.length; i++) {
43 |             action[i] = a[i];
44 |         }
45 |         return action;
46 |     }
47 | 
48 |     /** Adapter from float to double array for {@link #computeDiscreteAction(float[])}. */
49 |     default public long[] computeDiscreteAction(double[] state) {
50 |         float[] s = new float[state.length];
51 |         for (int i = 0; i < state.length; i++) {
52 |             s[i] = (float)state[i];
53 |         }
54 |         return computeDiscreteAction(s);
55 |     }
56 | 
57 |     /** Returns the continuous or discrete actions that should be performed in the given state. (Single Policy, Continuous or Discrete Actions)
58 |      * Continuous values should be clipped to always fall in the range [0, 1]. */
59 |     float[] computeActions(float[] state);
60 | 
61 |     /** Returns the discrete actions that should be performed in the given state. (Single Policy, Tuple Decisions) */
62 |     long[] computeDiscreteAction(float[] state);
63 | 
64 |     /**
65 |      *
66 |      * @param baseUrl policy server url
67 |      *                ex) https://api.test.devpathmind.com/policy/id4350
68 |      * @param token
69 |      * @param postBody
70 |      * @return
71 |      */
72 |     double[] computeActions(String baseUrl, String token, String postBody);
73 | }
74 | 


--------------------------------------------------------------------------------
/nativerl/python/pathmind_training/pynativerl.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | from abc import ABC, abstractmethod
  3 | from typing import List, Optional, Union
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | def init(args):
  9 |     pass
 10 | 
 11 | 
 12 | class Space:
 13 |     pass
 14 | 
 15 | 
 16 | class Discrete(Space):
 17 |     def __init__(self, n: int, size: int = 1):
 18 |         self.n = n
 19 |         self.size = size
 20 | 
 21 | 
 22 | class Continuous(Space):
 23 |     def __init__(self, low: List[float], high: List[float], shape: List[int]):
 24 |         self.low = low
 25 |         self.high = high
 26 |         self.shape = shape
 27 | 
 28 | 
 29 | # Smart hack: use a pass-through function to act as Array constructor (already have numpy)
 30 | def Array(arr: Union[np.array, List]):
 31 |     return np.asarray(arr)
 32 | 
 33 | 
 34 | class Environment(ABC):
 35 |     @abstractmethod
 36 |     def getActionSpace(self, agent_id: int = 0) -> Optional[Space]:
 37 |         return NotImplemented
 38 | 
 39 |     # TODO: why is this not per agent if action space is?
 40 |     @abstractmethod
 41 |     def getActionMaskSpace(self) -> Continuous:
 42 |         return NotImplemented
 43 | 
 44 |     # TODO: Going forward this should be per agent, too
 45 |     @abstractmethod
 46 |     def getObservationSpace(self) -> Continuous:
 47 |         return NotImplemented
 48 | 
 49 |     @abstractmethod
 50 |     def getMetricsSpace(self) -> Continuous:
 51 |         return NotImplemented
 52 | 
 53 |     @abstractmethod
 54 |     def getNumberOfAgents(self) -> int:
 55 |         return NotImplemented
 56 | 
 57 |     @abstractmethod
 58 |     def getActionMask(self, agent_id: int = 0) -> Array:
 59 |         return NotImplemented
 60 | 
 61 |     @abstractmethod
 62 |     def getObservation(self, agent_id: int = 0) -> Array:
 63 |         return NotImplemented
 64 | 
 65 |     @abstractmethod
 66 |     def reset(self) -> None:
 67 |         pass
 68 | 
 69 |     @abstractmethod
 70 |     def setNextAction(self, action: Array, agent_id: int = 0) -> None:
 71 |         pass
 72 | 
 73 |     @abstractmethod
 74 |     def step(self) -> None:
 75 |         pass
 76 | 
 77 |     @abstractmethod
 78 |     def isSkip(self, agent_id: int = 0) -> bool:
 79 |         return NotImplemented
 80 | 
 81 |     @abstractmethod
 82 |     def isDone(self, agent_id: int = 0) -> bool:
 83 |         return NotImplemented
 84 | 
 85 |     @abstractmethod
 86 |     def getReward(self, agent_id: int = 0) -> float:
 87 |         return NotImplemented
 88 | 
 89 |     @abstractmethod
 90 |     def getMetrics(self, agent_id: int = 0) -> Array:
 91 |         return NotImplemented
 92 | 
 93 |     @abstractmethod
 94 |     def getRewardTerms(self, agent_id: int = 0) -> Array:
 95 |         return NotImplemented
 96 | 
 97 | 
 98 | def get_environment_class(env_name):
 99 |     """Get environment class instance from a string, interpreted as Python module
100 |     :param env_name:
101 |     :return:
102 |     """
103 |     class_name = env_name.split(".")[-1]
104 |     module = env_name.replace(f".{class_name}", "")
105 |     lib = importlib.import_module(module)
106 |     return getattr(lib, class_name)
107 | 
108 | 
109 | def createEnvironment(env_name):
110 |     clazz = get_environment_class(env_name)
111 |     obj = clazz()
112 |     return obj
113 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/test_training.py:
--------------------------------------------------------------------------------
  1 | from random import randint
  2 | 
  3 | import pytest
  4 | import ray
  5 | import run
  6 | 
  7 | 
  8 | @pytest.mark.integration
  9 | def test_gym_training():
 10 |     ray.shutdown()
 11 |     output_dir = f"testoutputs/test-gym-training-{randint(0,1000)}"
 12 |     run.main(
 13 |         environment="CartPole-v0", is_gym=True, max_episodes=1, output_dir=output_dir
 14 |     )
 15 | 
 16 | 
 17 | @pytest.mark.integration
 18 | def test_or_gym_training():
 19 |     ray.shutdown()
 20 |     output_dir = f"testoutputs/test-or-gym-training-{randint(0,1000)}"
 21 |     run.main(
 22 |         environment="Knapsack-v0", is_gym=True, max_episodes=1, output_dir=output_dir
 23 |     )
 24 | 
 25 | 
 26 | @pytest.mark.integration
 27 | def test_freezing():
 28 |     ray.shutdown()
 29 |     output_dir = f"testoutputs/test-freezing-{randint(0,1000)}"
 30 |     run.main(
 31 |         environment="Knapsack-v0",
 32 |         is_gym=True,
 33 |         max_episodes=1,
 34 |         freezing=True,
 35 |         output_dir=output_dir,
 36 |     )
 37 | 
 38 | 
 39 | @pytest.mark.integration
 40 | def test_pathmind_env_module():
 41 |     ray.shutdown()
 42 |     output_dir = f"testoutputs/test-pathmind-env-module-{randint(0,1000)}"
 43 |     run.main(
 44 |         environment="tests.cartpole.PathmindEnvironment",
 45 |         max_episodes=1,
 46 |         output_dir=output_dir,
 47 |     )
 48 | 
 49 | 
 50 | @pytest.mark.integration
 51 | def test_pathmind_sim_module():
 52 |     ray.shutdown()
 53 |     output_dir = f"testoutputs/test-pathmind-sim-module-{randint(0,1000)}"
 54 |     run.main(
 55 |         is_pathmind_simulation=True,
 56 |         environment="tests.mouse.two_reward.TwoRewardMouseAndCheese",
 57 |         max_episodes=1,
 58 |         output_dir=output_dir,
 59 |     )
 60 | 
 61 | 
 62 | @pytest.mark.integration
 63 | def test_pathmind_alphas_module():
 64 |     ray.shutdown()
 65 |     output_dir = f"testoutputs/test-pathmind-alphas-module-{randint(0,1000)}"
 66 |     run.main(
 67 |         is_pathmind_simulation=True,
 68 |         environment="tests.mouse.two_reward.TwoRewardMouseAndCheese",
 69 |         max_episodes=1,
 70 |         output_dir=output_dir,
 71 |         alphas=[1.0, 5.0],
 72 |         num_reward_terms=2,
 73 |     )
 74 | 
 75 | 
 76 | @pytest.mark.integration
 77 | def test_pathmind_bad_alphas_module():
 78 |     ray.shutdown()
 79 |     output_dir = f"testoutputs/test-pathmind-bad-alphas-module-{randint(0,1000)}"
 80 |     with pytest.raises(AssertionError) as execinfo:
 81 |         run.main(
 82 |             is_pathmind_simulation=True,
 83 |             environment="tests.mouse.two_reward.TwoRewardMouseAndCheese",
 84 |             max_episodes=1,
 85 |             output_dir=output_dir,
 86 |             alphas=[1.0, 1.0, 1.0, 1.0],
 87 |             num_reward_terms=2,
 88 |         )
 89 |     assert "alphas array size (4) must be == num_reward_terms (2)" in str(
 90 |         execinfo.value
 91 |     )
 92 | 
 93 | 
 94 | @pytest.mark.integration
 95 | def test_gym_module():
 96 |     ray.shutdown()
 97 |     output_dir = f"testoutputs/test-gym-module-{randint(0,1000)}"
 98 |     run.main(
 99 |         environment="tests.gym_cartpole.CartPoleEnv",
100 |         is_gym=True,
101 |         max_episodes=1,
102 |         output_dir=output_dir,
103 |     )
104 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/README.md:
--------------------------------------------------------------------------------
 1 | # pathmind-model-analyzer
 2 | 
 3 | A service that processes an AnyLogic model to extract data we need to use in Pathmind
 4 | 
 5 | ## API
 6 | 
 7 | Service exposes one endpoint for retrieving a ZIP file contains `model.jar` and all other dependencies needed to perform an extraction hyperparameters process. <br/>
 8 | API specification is also available via [Swagger](https://swagger.io/), which can is accessible at `/swagger-ui.html` path.
 9 | 
10 | ### POST `/extract-hyperparametrs`
11 | 
12 | It requires one input `file` which has to be a valid ZIP file. Server will return JSON contains hyperparameters, reward function, information if model is single-agent or multi-agent (and a list of errors if any occurred):
13 | 
14 | ```
15 | {
16 | 	"actions": "4",
17 | 	"observations": "5",
18 | 	"rewardFunction": "new double[]{this.kitchenCleanliness, this.payBill.out.count(), this.custFailExit.countPeds(), this.serviceTime_min.mean()}",
19 | 	"mode": "single"
20 | }
21 | ```
22 | 
23 | ## Setting up local env
24 | 
25 | ### Docker container
26 | 
27 | Build the`pathmind-model-analyzer` docker image using [these instructions](https://github.com/SkymindIO/nativerl/tree/dev/nativerl-analyzer#building-docker-image).<br/>
28 | To run a service into docker container run:
29 | 
30 | ```bash
31 | $ docker run -p <HOST_PORT>:8080 pathmind-model-analyzer
32 | ```
33 | 
34 | where `<HOST_PORT>` is a port on which you want to communicate with container. <br/>
35 | To check if service started (or if it is running) use [actuator](https://docs.spring.io/spring-boot/docs/current/reference/html/production-ready-features.html) healthcheck endpoint:
36 | 
37 | ```bash
38 | $ curl localhost:<HOST_PORT>/actuator/health
39 |   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
40 |                                  Dload  Upload   Total   Spent    Left  Speed
41 | 100    15    0    15    0     0    238      0 --:--:-- --:--:-- --:--:--   238{"status":"UP"}
42 | ```
43 | 
44 | ### IDE
45 | 
46 | **NOTE: Current implementation uses shared library `jniNativeRL.so` which is built for Unix systems, therefore, it is impossible to run it locally on Windows OS without containerizing or using a Virtual Machine** <br/>
47 | 
48 | To run local service instance using IDE:
49 | 
50 | - prepare `/lib/pathmind` directory contains:
51 |   - unzipped content of `nativerl-1.8.1-SNAPSHOT-bin.zip`
52 |   - unzipped content of `baseEnv.zip`
53 |   - `cfr-0.148.jar` (curl -s https://www.benf.org/other/cfr/cfr-0.148.jar -o cfr-0.148.jar)
54 | - prepare `/lib/policy` directory contains (naming is important):
55 |   - PathmindPolicy_single.jar
56 |   - PathmindPolicy_multi.jar
57 | - copy both `check_model.sh` and `check_single_or_multi.sh` from `<repo>/resources/scripts` to `/bin`
58 | - copy both `multi_extractor.jar` and `single_extractor.jar` from `<repo>/resources/` to `/bin`
59 | 
60 | You can also manually modify hardcoded paths in scripts, `FileService#CHECK_MODEL_SCRIPT` and `FileService#SINGLE_OR_MULTI_SCRIPT` variable to match your local ones.
61 | 
62 | ### Building docker image
63 | 
64 | - At `Dockerfile` directory level run
65 |   ```
66 |   $ docker build -t <image_name> \
67 |    --build-arg S3BUCKET='<s3_bucket>' \
68 |    --build-arg AWS_ACCESS_KEY_ID='<key_id>' \
69 |    --build-arg AWS_SECRET_ACCESS_KEY='<access_key>' \
70 |    --build-arg NATIVERL_FOLDER='nativerl/1_8_1' \
71 |    .
72 |   ```
73 | 


--------------------------------------------------------------------------------
/nativerl/pr_test/script.sh:
--------------------------------------------------------------------------------
 1 | set -eo pipefail ;
 2 | aws s3 cp s3://${ENVIRONMENT}-training-static-files.pathmind.com/jdk/8_222/OpenJDK8U-jdk_x64_linux_hotspot_8u222b10.tar.gz OpenJDK8U-jdk_x64_linux_hotspot_8u222b10.tar.gz > /dev/null ;
 3 | aws s3 cp s3://${ENVIRONMENT}-training-static-files.pathmind.com/conda/0_8_7/rllibpack.tar.gz rllibpack.tar.gz > /dev/null ;
 4 | aws s3 cp s3://${ENVIRONMENT}-training-static-files.pathmind.com/nativerl/1_8_1/nativerl-1.8.1-SNAPSHOT-bin.zip nativerl-1.8.1-SNAPSHOT-bin.zip > /dev/null ;
 5 | aws s3 cp s3://${ENVIRONMENT}-training-static-files.pathmind.com/anylogic/8_6_1/baseEnv.zip baseEnv.zip > /dev/null ;
 6 | aws s3 cp s3://${ENVIRONMENT}-training-static-files.pathmind.com/pathmindhelper/1_2_0/PathmindPolicy.jar PathmindPolicy.jar > /dev/null ;
 7 | aws s3 cp s3://dh-training-dynamic-files.pathmind.com/model_file/386 model.zip > /dev/null ;
 8 | tar xf OpenJDK8U-jdk_x64_linux_hotspot_8u222b10.tar.gz > /dev/null ;
 9 | rm -rf OpenJDK8U-jdk_x64_linux_hotspot_8u222b10.tar.gz ;
10 | export JAVA_HOME=`pwd`/jdk8u222-b10 ;
11 | export JDK_HOME=$JAVA_HOME ;
12 | export JRE_HOME=$JAVA_HOME/jre ;
13 | export PATH=$JAVA_HOME/bin:$PATH ;
14 | export LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/amd64/server:$JAVA_HOME/jre/lib/amd64/:$LD_LIBRARY_PATH ;
15 | mkdir -p conda ;
16 | cd conda ;
17 | tar xf ../rllibpack.tar.gz > /dev/null ;
18 | rm ../rllibpack.tar.gz ;
19 | source bin/activate ;
20 | cd .. ;
21 | mkdir -p work ;
22 | cd work ;
23 | unzip ../nativerl-1.8.1-SNAPSHOT-bin.zip > /dev/null ;
24 | rm ../nativerl-1.8.1-SNAPSHOT-bin.zip ;
25 | mv nativerl-bin/* . ;
26 | mv examples/train.sh . ;
27 | cd .. ;
28 | unzip baseEnv.zip > /dev/null ;
29 | rm baseEnv.zip ;
30 | mv baseEnv/* work/ ;
31 | rm -r baseEnv ;
32 | mv PathmindPolicy.jar work/lib/ ;
33 | cd work ;
34 | unzip ../model.zip > /dev/null ;
35 | rm ../model.zip ;
36 | export CLASS_SNIPPET='' ;
37 | export RESET_SNIPPET='' ;
38 | export REWARD_SNIPPET='reward += after.fuelRemaining - before.fuelRemaining;
39 | reward += Math.abs(before.distanceToX) - Math.abs(after.distanceToX);
40 | reward += Math.abs(before.distanceToY) - Math.abs(after.distanceToY);
41 | reward += before.distanceToZ - after.distanceToZ;
42 | 
43 | reward += after.landed == 1 ? 3 : 0;
44 | reward -= after.crashed == 1 ? 0.3 : 0;
45 | reward -= after.gotAway == 1 ? 1 : 0;
46 | 
47 | reward -= before.distanceToZ <= 100. / 1500. && Math.abs(after.speedX) > 200 ? 0.01 : 0;
48 | reward -= before.distanceToZ <= 100. / 1500. && Math.abs(after.speedY) > 200 ? 0.01 : 0;
49 | reward -= before.distanceToZ <= 100. / 1500. && Math.abs(after.speedZ) > 200 ? 0.01 : 0;' ;
50 | export OBSERVATION_SNIPPET='out = new double[9];
51 | out[0] = in.powerXYZ[0];
52 | out[1] = in.powerXYZ[1];
53 | out[2] = in.powerXYZ[2];
54 | out[3] = in.moduleXYZ[0];
55 | out[4] = in.moduleXYZ[1];
56 | out[5] = in.moduleXYZ[2];
57 | out[6] = in.distanceXYZ[0];
58 | out[7] = in.distanceXYZ[1];
59 | out[8] = in.distanceXYZ[2];' ;
60 | export METRICS_SNIPPET='' ;
61 | export MAX_ITERATIONS='500' ;
62 | export TEST_ITERATIONS='0' ;
63 | export MAX_TIME_IN_SEC='43200' ;
64 | export NUM_SAMPLES='4' ;
65 | export MULTIAGENT='false' ;
66 | export RESUME=${RESUME:='false'} ;
67 | export CHECKPOINT_FREQUENCY='50' ;
68 | export EPISODE_REWARD_RANGE='0.01' ;
69 | export ENTROPY_SLOPE='0.01' ;
70 | export VF_LOSS_RANGE='0.1' ;
71 | export VALUE_PRED='1' ;
72 | export USER_LOG='false' ;
73 | export DEBUGMETRICS='true' ;
74 | export NAMED_VARIABLE='true' ;
75 | echo > setup.sh ;
76 | mkdir -p database ;
77 | touch database/db.properties ;
78 | source train.sh ;
79 | exit $?
80 | 


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | def SLACK_URL = "https://hooks.slack.com/services/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"
 2 | def icon = ":heavy_check_mark:"
 3 | /*
 4 |     nativerl pipeline
 5 |     The pipeline is made up of following steps
 6 |     1. Git clone and setup
 7 |     2. Build and s3 push
 8 |     4. Optionally deploy to production and test
 9 |  */
10 | 
11 | /*
12 |     Build a docker image
13 | */
14 | def buildNativerl(image_name) {
15 |     echo "Building the nativerl Docker Image for branch ${env.BRANCH_NAME}"
16 |     sh """
17 |         set +x
18 |         docker image ls | grep nativerl | awk '{print \$3}' | xargs -I {} docker rmi {} -f
19 |         docker build -t ${image_name} -f ${WORKSPACE}/Dockerfile ${WORKSPACE}
20 |     """
21 |     sh "docker run --mount \"src=${WORKSPACE}/,target=/app,type=bind\" nativerl mvn clean install -Djavacpp.platform=linux-x86_64"
22 |     sh "aws s3 cp ${WORKSPACE}/nativerl/target/nativerl-1.8.1-SNAPSHOT-bin.zip s3://${env.BRANCH_NAME}-training-static-files.pathmind.com/nativerl/1_8_1/nativerl-1.8.1-SNAPSHOT-bin.zip"
23 | }
24 | 
25 | /*
26 |     This is the main pipeline section with the stages of the CI/CD
27 |  */
28 | pipeline {
29 | 
30 |     options {
31 |         // Build auto timeout
32 |         timeout(time: 60, unit: 'MINUTES')
33 |         disableConcurrentBuilds()
34 |     }
35 | 
36 |     // Some global default variables
37 |     environment {
38 |         IMAGE_NAME = 'nativerl'
39 |         DEPLOY_PROD = false
40 |     }
41 | 
42 |     //all is built and run from the master
43 |     agent { node { label 'master' } }
44 | 
45 |     // Pipeline stages
46 |     stages {
47 |         stage('Git clone and setup') {
48 |             when {
49 |                 anyOf {
50 |                     environment name: 'GIT_BRANCH', value: 'dev'
51 |                     environment name: 'GIT_BRANCH', value: 'test'
52 |                     environment name: 'GIT_BRANCH', value: 'staging'
53 |                     environment name: 'GIT_BRANCH', value: 'prod'
54 |                 }
55 |             }
56 |             steps {
57 |                 echo "Notifying slack"
58 |                 sh "set +x; curl -X POST -H 'Content-type: application/json' --data '{\"text\":\":building_construction: Starting Jenkins Job\nBranch: ${env.BRANCH_NAME}\nUrl: ${env.RUN_DISPLAY_URL}\"}' ${SLACK_URL}"
59 |                 echo "Check out code"
60 |                 checkout scm
61 |             }
62 |         }
63 | 
64 |         stage('Build Docker Images') {
65 |             parallel {
66 |                 stage('Build nativerl image') {
67 | 		    when {
68 | 			anyOf {
69 | 			    environment name: 'GIT_BRANCH', value: 'dev'
70 | 			    environment name: 'GIT_BRANCH', value: 'test'
71 | 			    environment name: 'GIT_BRANCH', value: 'staging'
72 | 			    environment name: 'GIT_BRANCH', value: 'prod'
73 | 			}
74 | 		    }
75 |                     steps {
76 |                         buildNativerl("${IMAGE_NAME}")
77 |                     }
78 |                 }
79 |             }
80 |         }
81 |     }
82 |     post {
83 |         always {
84 |             echo 'Notifying Slack'
85 |             script {
86 |                 if (currentBuild.result != "SUCCESS") {
87 |                     icon = ":x:"
88 |                 }
89 |             }
90 |             echo "Notifying slack"
91 |             sh "set +x; curl -X POST -H 'Content-type: application/json' --data '{\"text\":\"${icon} Jenkins Job Finished\nBranch: ${env.BRANCH_NAME}\nUrl: ${env.RUN_DISPLAY_URL}\nStatus: ${currentBuild.result}\"}' ${SLACK_URL}"
92 |         }
93 |     }
94 | }
95 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/test/java/ai/skymind/nativerl/ActionProcessorTest.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import ai.skymind.nativerl.annotation.Discrete;
 4 | import ai.skymind.nativerl.annotation.Continuous;
 5 | import java.util.Random;
 6 | import org.junit.Test;
 7 | 
 8 | import static org.junit.Assert.assertArrayEquals;
 9 | import static org.junit.Assert.assertEquals;
10 | import static org.junit.Assert.assertTrue;
11 | import static org.junit.Assert.fail;
12 | 
13 | /**
14 |  *
15 |  * @author saudet
16 |  */
17 | public class ActionProcessorTest {
18 |     boolean didIt = false;
19 | 
20 |     class TestActions {
21 |         @Discrete(n = 50) int action1;
22 |         @Discrete(n = 50, size = 2) long[] action2;
23 |         @Continuous(low = {10, 20}, high = {30, 40}, shape = 2) float[] action3;
24 |         @Continuous(low = 0, high = 1, shape = {2, 2}) double[] action4;
25 | 
26 |         public void doIt() {
27 |             if (didIt) {
28 |                 // random values
29 |                 assertTrue(action1 >= 0 && action1 < 50);
30 |                 assertTrue(action2[0] >= 0 && action2[0] < 50);
31 |                 assertTrue(action2[1] >= 0 && action2[1] < 50);
32 |                 assertTrue(action3[0] >= 10 && action3[0] < 30);
33 |                 assertTrue(action3[1] >= 20 && action3[1] < 40);
34 |                 assertTrue(action4[0] >= 0.0 && action4[0] < 1.0);
35 |                 assertTrue(action4[1] >= 0.0 && action4[1] < 1.0);
36 |                 assertTrue(action4[2] >= 0.0 && action4[2] < 1.0);
37 |                 assertTrue(action4[3] >= 0.0 && action4[3] < 1.0);
38 |                 return;
39 |             }
40 |             assertEquals(37, action1);
41 |             assertArrayEquals(new long[] {42, 64}, action2);
42 |             assertArrayEquals(new float[] {20, 30}, action3, 0);
43 |             assertArrayEquals(new double[] {0.1, 0.2, 0.3, 0.4}, action4, 0);
44 |             didIt = true;
45 |         }
46 |     }
47 | 
48 |     void actions(int agentId) {
49 |         class DummyActions extends TestActions {
50 |             @Discrete(n = 25) long action5 = agentId;
51 |             private boolean ignoreMe = true;
52 |         }
53 |     }
54 | 
55 |     @Test public void testActions() {
56 |         try {
57 |             ActionProcessor ap = new ActionProcessor(this.getClass());
58 |             assertEquals("DummyActions", ap.getActionClass().getSimpleName());
59 |             assertArrayEquals(new String[] {"action1", "action2[0]", "action2[1]", "action3[0]", "action3[1]",
60 |                                             "action4[0]", "action4[1]", "action4[2]", "action4[3]", "action5"}, ap.getActionNames(this));
61 |             AnnotationProcessor[] spaces = ap.getActionSpaces();
62 |             assertEquals(5, spaces.length);
63 |             assertEquals(50, spaces[0].n);
64 |             assertEquals(50, spaces[1].n);
65 |             assertArrayEquals(new long[] {2}, spaces[2].shape);
66 |             assertArrayEquals(new long[] {2, 2}, spaces[3].shape);
67 |             ap.doActions(this, new double[] {37, 42, 64, 20, 30, 0.1, 0.2, 0.3, 0.4, 24}, false, 24);
68 |             assertTrue(didIt);
69 |             didIt = false;
70 |             ap.doActions(this, new double[] {37, 42, 64, 0.5, 0.5, 0.1, 0.2, 0.3, 0.4, 24}, true, 24);
71 |             assertTrue(didIt);
72 |             for (int i = 0; i < 100; i++) {
73 |                 double[] a = ap.getActions(this, new Random(i), 24);
74 |                 ap.doActions(this, a, false, 24);
75 |             }
76 |         } catch (ReflectiveOperationException ex) {
77 |             fail(ex.getMessage());
78 |         }
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/main/java/ai/skymind/nativerl/ServerPolicyHelper.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import ai.skymind.nativerl.exception.PathmindInvalidResponseException;
 4 | import ai.skymind.nativerl.util.ObjectMapperHolder;
 5 | import com.fasterxml.jackson.databind.ObjectMapper;
 6 | import okhttp3.*;
 7 | 
 8 | import java.util.List;
 9 | 
10 | import static java.net.HttpURLConnection.*;
11 | 
12 | public class ServerPolicyHelper implements PolicyHelper {
13 |     private static class Action {
14 |         private List<Integer> actions;
15 |         private Double probability;
16 |     }
17 | 
18 |     private ObjectMapper objectMapper = ObjectMapperHolder.getJsonMapper();
19 |     private OkHttpClient client = null;
20 | 
21 |     @Override
22 |     public float[] computeActions(float[] state) {
23 |         throw new UnsupportedOperationException("Unsupported method for ServerPolicyHelper");
24 |     }
25 | 
26 |     @Override
27 |     public long[] computeDiscreteAction(float[] state) {
28 |         throw new UnsupportedOperationException("Unsupported method for ServerPolicyHelper");
29 |     }
30 | 
31 |     @Override
32 |     public double[] computeActions(String baseUrl, String token, String postBody) {
33 |         if (disablePolicyHelper) {
34 |             return null;
35 |         }
36 | 
37 |         try {
38 |             if (client == null) {
39 |                 client = new OkHttpClient();
40 |             }
41 | 
42 |             RequestBody requestBody = RequestBody.create(
43 |                     MediaType.parse("application/json; charset=utf-8"), postBody);
44 | 
45 |             Request.Builder builder = new Request.Builder().url(buildPredictPath(baseUrl))
46 |                     .addHeader("access-token", token)
47 |                     .post(requestBody);
48 |             Request request = builder.build();
49 | 
50 |             Response response = client.newCall(request).execute();
51 |             if (response.isSuccessful()) {
52 |                 ResponseBody body = response.body();
53 |                 String bodyStr = body.string();
54 |                 if (body != null) {
55 |                     int k = 0;
56 |                     Action action = objectMapper.readValue(bodyStr, Action.class);
57 |                     double[] actionArray = new double[action.actions.size()];
58 |                     for (Integer a : action.actions) {
59 |                         actionArray[k++] = (double)a;
60 |                     }
61 |                     return actionArray;
62 |                 }
63 |             } else {
64 |                 switch (response.code()) {
65 |                     case HTTP_UNAUTHORIZED:
66 |                         throw new PathmindInvalidResponseException("Make sure your Policy Server is up and Policy Server URL is valid.");
67 |                     case HTTP_FORBIDDEN:
68 |                         throw new PathmindInvalidResponseException("Make sure your token is valid.");
69 |                     case HTTP_NOT_FOUND:
70 |                         throw new PathmindInvalidResponseException("You reached out to wrong path. Please contact Pathmind team.");
71 |                     case 422:   // observation mismatch
72 |                         throw new PathmindInvalidResponseException("Make sure your AL model's Observation is the same with Policy Server's expected Observation");
73 |                     default:
74 |                         throw new PathmindInvalidResponseException("Error Occurred " + response);
75 |                 }
76 |             }
77 |         } catch (Exception e) {
78 |             e.printStackTrace();
79 |         }
80 |         return null;
81 |     }
82 | 
83 |     public static String buildPredictPath(String baseURL) {
84 |         baseURL = baseURL.replaceAll("/$", "");
85 | 
86 |         return baseURL + "/predict/";
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/factory/util/writer.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | 
  4 | from ..models import Node
  5 | from ..simulation import Factory
  6 | 
  7 | SCALE_X, SCALE_Y = 4, 2
  8 | 
  9 | 
 10 | def load_image(file_name="../assets/large_factory.jpg"):
 11 |     image = cv2.imread(file_name, cv2.IMREAD_COLOR)
 12 |     # image.astype(np.uint8)
 13 |     return image
 14 | 
 15 | 
 16 | def node_type(node: Node) -> str:
 17 |     """Node, Table or Core?"""
 18 |     text = "N"
 19 |     if node.has_table():
 20 |         text = "T"
 21 |         if node.table.has_core():
 22 |             text = "C"
 23 |     return text
 24 | 
 25 | 
 26 | def draw_boxes(factory: Factory, original_img: np.array):
 27 |     img = np.copy(original_img)
 28 |     for node in factory.nodes:
 29 |         coords = node.coordinates
 30 |         pos = (coords[0] * 100, coords[1] * 100)
 31 |         text = node.name
 32 |         nd_type = node_type(node)
 33 |         img = draw_box(img, pos, text, nd_type)
 34 |     return img
 35 | 
 36 | 
 37 | def draw_box(img: np.array, pos=(0, 0), text: str = "pt_01", nd_type: str = "N"):
 38 |     top_left = pos
 39 |     bottom_right = (pos[0] + 100, pos[1] + 100)
 40 |     if nd_type == "C":
 41 |         thickness = -1
 42 |         color = (255, 0, 0)
 43 |     elif nd_type == "T":
 44 |         thickness = -1
 45 |         color = (0, 255, 0)
 46 |     else:
 47 |         thickness = 2
 48 |         color = (0, 0, 0)
 49 | 
 50 |     img = cv2.rectangle(img, top_left, bottom_right, color, int(thickness))
 51 | 
 52 |     top_left = (pos[0] + 2, pos[1] + 10)
 53 |     bottom_right = (pos[0] + 98, pos[1] + 40)
 54 |     img = cv2.rectangle(img, top_left, bottom_right, (255, 255, 255), cv2.FILLED)
 55 |     position = (pos[0] + 10, pos[1] + 30)
 56 |     img = cv2.putText(
 57 |         img,
 58 |         text,
 59 |         position,
 60 |         cv2.FONT_HERSHEY_COMPLEX_SMALL,
 61 |         1,
 62 |         (0, 0, 0),
 63 |         2,
 64 |         cv2.LINE_AA,
 65 |     )
 66 | 
 67 |     if nd_type != "N":
 68 |         position = (pos[0] + 43, pos[1] + 70)
 69 |         img = cv2.putText(
 70 |             img,
 71 |             nd_type,
 72 |             position,
 73 |             cv2.FONT_HERSHEY_SIMPLEX,
 74 |             1,
 75 |             (0, 0, 0),
 76 |             4,
 77 |             cv2.LINE_AA,
 78 |         )
 79 | 
 80 |     return img
 81 | 
 82 | 
 83 | def factory_string(factory: Factory, fill_char="·", line_break="\n") -> str:
 84 |     nodes = factory.nodes
 85 |     max_x = max([n.coordinates[0] for n in nodes]) + 1
 86 |     max_y = max([n.coordinates[1] for n in nodes]) + 1
 87 | 
 88 |     grid = [[fill_char] * max_x * SCALE_X for _ in range(max_y * SCALE_Y)]
 89 | 
 90 |     for node in nodes:
 91 |         x, y = node.coordinates
 92 |         text = node_type(node)
 93 |         grid[y * SCALE_Y][x * SCALE_X] = text
 94 |         for direction, nb in node.neighbours.items():
 95 |             if nb:
 96 |                 if direction == "left":
 97 |                     grid[y * SCALE_Y][x * SCALE_X - SCALE_X + 1 : x * SCALE_X] = [
 98 |                         "="
 99 |                     ] * (SCALE_X - 1)
100 |                 elif direction == "right":
101 |                     grid[y * SCALE_Y][x * SCALE_X + 1 : x * SCALE_X + SCALE_X] = [
102 |                         "="
103 |                     ] * (SCALE_X - 1)
104 |                 elif direction == "up":
105 |                     grid[y * SCALE_Y - 1][x * SCALE_X] = "║"
106 |                 elif direction == "down":
107 |                     grid[y * SCALE_Y + 1][x * SCALE_X] = "║"
108 |     grid = ["".join(line) for line in grid]
109 |     return line_break.join(grid)
110 | 
111 | 
112 | def print_factory(factory: Factory, clear=True):
113 |     if clear:
114 |         clear_screen()
115 |     print(factory_string(factory))
116 | 
117 | 
118 | def clear_screen():
119 |     print(chr(27) + "[2J")
120 | 


--------------------------------------------------------------------------------
/nativerl/python/pathmind_training/models.py:
--------------------------------------------------------------------------------
  1 | from ray.rllib.models import MODEL_DEFAULTS, ModelCatalog
  2 | from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
  3 | from ray.rllib.models.tf.tf_modelv2 import TFModelV2
  4 | from ray.rllib.utils import try_import_tf
  5 | 
  6 | tf1_module, tf, version = try_import_tf()
  7 | 
  8 | 
  9 | def get_custom_model(
 10 |     num_hidden_nodes: int,
 11 |     num_hidden_layers: int,
 12 |     autoregressive: bool,
 13 |     action_masking: bool,
 14 |     discrete: bool,
 15 | ):
 16 |     model = MODEL_DEFAULTS.copy()
 17 | 
 18 |     if action_masking and not discrete:
 19 |         msg = "Action masking only supported for discrete actions."
 20 |         raise ValueError(msg)
 21 | 
 22 |     if action_masking and autoregressive:
 23 |         msg = "Action masking and auto-regression can't be enabled simultaneously."
 24 |         raise ValueError(msg)
 25 | 
 26 |     hidden_layers = [num_hidden_nodes for _ in range(num_hidden_layers)]
 27 |     model["fcnet_hiddens"] = hidden_layers
 28 |     model["vf_share_layers"] = False
 29 | 
 30 |     if autoregressive:
 31 |         from pathmind_training.autoregression import (
 32 |             get_autoregressive_action_distribution,
 33 |             get_autoregressive_actions_model,
 34 |         )
 35 | 
 36 |         # TODO: need input arguments on run in general
 37 |         num_actions = 2
 38 |         tuple_length = 3
 39 | 
 40 |         model = get_autoregressive_actions_model(
 41 |             num_actions=num_actions, tuple_length=tuple_length
 42 |         )
 43 |         distro = get_autoregressive_action_distribution(tuple_length=tuple_length)
 44 | 
 45 |         ModelCatalog.register_custom_model("autoregressive_model", model)
 46 |         ModelCatalog.register_custom_action_dist("n_ary_autoreg_output", distro)
 47 | 
 48 |         model = {
 49 |             "custom_model": "autoregressive_model",
 50 |             "custom_action_dist": "nary_autoreg_output",
 51 |         }
 52 | 
 53 |     if action_masking:
 54 |         masking_model = get_action_masking_model(hidden_layers)
 55 |         ModelCatalog.register_custom_model("action_masking_tf_model", masking_model)
 56 |         model = {"custom_model": "action_masking_tf_model"}
 57 | 
 58 |     return model
 59 | 
 60 | 
 61 | def get_action_masking_model(hidden_layers):
 62 |     class ActionMaskingTFModel(TFModelV2):
 63 |         """Custom TF Model that masks out illegal moves. Works for any
 64 |         RLlib algorithm (tested only on PPO and DQN so far, though).
 65 |         """
 66 | 
 67 |         def __init__(
 68 |             self, obs_space, action_space, num_outputs, model_config, name, **kw
 69 |         ):
 70 |             super().__init__(
 71 |                 obs_space, action_space, num_outputs, model_config, name, **kw
 72 |             )
 73 | 
 74 |             model_config["fcnet_hiddens"] = hidden_layers
 75 |             model_config["vf_share_layers"] = False
 76 | 
 77 |             self.base_model = FullyConnectedNetwork(
 78 |                 obs_space.original_space["real_obs"],
 79 |                 action_space,
 80 |                 num_outputs,
 81 |                 model_config,
 82 |                 name,
 83 |             )
 84 | 
 85 |             # Necessary for Ray 1.0.0. Remove for Ray 1.3.0+.
 86 |             # self.register_variables(self.base_model.variables())
 87 | 
 88 |         def forward(self, input_dict, state, seq_lens):
 89 |             logits, _ = self.base_model({"obs": input_dict["obs"]["real_obs"]})
 90 |             action_mask = input_dict["obs"]["action_mask"]
 91 |             inf_mask = tf.math.maximum(tf.math.log(action_mask), tf.float32.min)
 92 |             return logits + inf_mask, state
 93 | 
 94 |         def value_function(self):
 95 |             return self.base_model.value_function()
 96 | 
 97 |         def import_from_h5(self, h5_file):
 98 |             pass
 99 | 
100 |     return ActionMaskingTFModel
101 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/common/src/main/java/io/skymind/pathmind/analyzer/dto/HyperparametersDTO.java:
--------------------------------------------------------------------------------
 1 | package io.skymind.pathmind.analyzer.dto;
 2 | 
 3 | import lombok.AllArgsConstructor;
 4 | import lombok.Data;
 5 | import lombok.NoArgsConstructor;
 6 | 
 7 | import java.util.List;
 8 | import java.util.Set;
 9 | import java.util.stream.Collectors;
10 | 
11 | @NoArgsConstructor
12 | @AllArgsConstructor
13 | @Data
14 | public class HyperparametersDTO {
15 | 
16 |     private final static Set<String> KNOWN_OUTPUT = Set.of(
17 |             "model-analyzer-mode",
18 |             "model-analyzer-error",
19 |             "DTOPath"
20 |             );
21 | 
22 | //    @ApiModelProperty(value = "Whether the pathmind helper is enabled or not", example = "true")
23 |     private boolean isEnabled = false;
24 | 
25 | //    @ApiModelProperty(value = "Flag for when old model versions are found", example = "true")
26 |     private boolean oldVersionFound = false;
27 | 
28 |     private List<SimulationParameter> agentParams;
29 | 
30 | //    @ApiModelProperty(value = "Number of observations extracted from model", example = "10", required = true)
31 | //    @NotBlank(message = "Number of observations cannot be blank")
32 |     private String observations;
33 | 
34 | //    @ApiModelProperty(value = "Observations names extracted from model", example = "[\"orderQueueSize\", \"collectQueueSize\"]", required = true)
35 | //    @NotBlank(message = "Observation names cannot be empty")
36 |     private List<String> observationNames;
37 | 
38 | //    @NotBlank(message = "Observation types cannot be empty")
39 |     private List<String> observationTypes;
40 | 
41 | //    @ApiModelProperty(value = "Number of actions extracted from model", example = "5", required = true)
42 | //    @NotBlank(message = "Number of actions cannot be blank")
43 |     private String actions;
44 | 
45 | //    @ApiModelProperty(value = "Whether the action mask is enabled or not", example = "true")
46 |     private boolean isActionMask;
47 | 
48 | //    @ApiModelProperty(value = "Length of reward variables array extracted from model", example = "7", required = true)
49 | //    @NotBlank(message = "Reward variables count cannot be blank")
50 |     private String rewardVariablesCount;
51 | 
52 | //    @ApiModelProperty(value = "Reward variable names extracted from model", example = "[\"var1\", \"var2\"]", required = true)
53 | //    @NotNull(message = "Reward variable names is required")
54 | //    @NotEmpty(message = "Reward variable names cannot be empty")
55 |     private List<String> rewardVariableNames;
56 | 
57 | //    @ApiModelProperty(value = "Reward variable types extracted from model", example = "[\"int\", \"boolean\"]", required = true)
58 | //    @NotNull(message = "Reward variable names is required")
59 | //    @NotEmpty(message = "Reward variable types cannot be empty")
60 |     private List<String> rewardVariableTypes;
61 | 
62 | //    @ApiModelProperty(value = "Reward function definition", required = true)
63 | //    @NotBlank(message = "Reward function definition cannot be blank") TODO: validate not empty reward function
64 |     private String rewardFunction;
65 | 
66 | //    @JsonInclude(JsonInclude.Include.NON_EMPTY)
67 | //    @ApiModelProperty(value = "Steps which failed while extracting hyperparameters")
68 |     private String failedSteps;
69 | 
70 | //    @ApiModelProperty(value = "the number of agents", required = true)
71 | //    @NotBlank(message = "Agents cannot be blank")
72 |     private String agents;
73 | 
74 | //    @ApiModelProperty(value = "Extraction mode (single/multi)", required = true)
75 | //    @NotBlank(message = "Mode cannot be blank")
76 |     private String mode;
77 | 
78 |     private static List<String> filterOutEmpty(List<String> source) {
79 |         return source.stream().filter(s -> !s.isEmpty()).collect(Collectors.toList());
80 |     }
81 | 
82 |     public static boolean isHyperparameters(String parameterCandidate) {
83 |         return KNOWN_OUTPUT.contains(parameterCandidate);
84 |     }
85 | 
86 | }
87 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/factory/config.yml:
--------------------------------------------------------------------------------
  1 | config:
  2 |   # Fixed settings (don't touch)
  3 |   env_name: factory
  4 |   actions: 5
  5 |   low: -1
  6 |   high: 11
  7 | 
  8 |   # Factory settings
  9 |   layout: big # either "big", "medium", or "small"
 10 |   scenario: "fixed_6" # either random, random_fixed_targets, fixed_2, fixed_4, fixed_6, fixed_8, fixed_10, or fixed_12
 11 |   num_tables: 6
 12 |   num_cores: 6
 13 |   num_phases: 1
 14 |   with_rails: true # If "false", remove all rails from the grid
 15 |   random_init: true # if random initialization is False, the factory will reset to the same state after each episode.
 16 |   # seed: 1337  # Optional random seed to have full control over initial factory state.
 17 | 
 18 |   # RL-specific configuration
 19 |   env: "TupleFactoryEnv" # "FactoryEnv", "RoundRobinFactoryEnv", "MultiAgentFactoryEnv" and "TupleFactoryEnv"
 20 |   max_num_steps: 10000 # NOTE: need to set this high enough for the big factory
 21 |   masking: false # whether to use action masking TODO: does not work with TupleFactoryEnv (ray issue?)
 22 |   autoregressive: false # whether to use autoregressive model TODO: only works with 6-tuple
 23 |   algorithm: PPO # Choose from PPO, DQN, MARWIL
 24 |   use_offline_data: false # Use previously generated offline data (don't use for now, experimental)
 25 |   offline_data_ratio: 0.5
 26 |   num_samples: 4 # Ray rllib's "num_samples" extracted for convenience
 27 |   multi_policy: false # Using multiple policies or not. This only works for "MultiAgentEnv"
 28 |   fcnet_hiddens: [256, 256, 128, 128, 64] # [512, 512]
 29 |   use_lstm: false
 30 | 
 31 |   # Observation selection
 32 |   ## Agent & core obs
 33 |   obs_agent_id: false
 34 |   obs_agent_coordinates: false
 35 |   obs_agent_has_core: false
 36 |   obs_agent_core_target_coordinates: false
 37 |   obs_all_table_coordinates: false
 38 | 
 39 |   ## Neighbour obs (unnecessary with action masking)
 40 |   obs_agent_has_neighbour: false
 41 |   obs_agent_free_neighbour: false
 42 | 
 43 |   ## One-hot representation obs: current id and target, plus all tables, cores and targets
 44 |   obs_agent_id_one_hot: false
 45 |   obs_agent_core_target_one_hot: false
 46 | 
 47 |   obs_all_tables_one_hot: false
 48 |   obs_all_cores_one_hot: false
 49 |   obs_all_targets_one_hot: false
 50 | 
 51 |   ## For using round-robin with tuple observations
 52 |   obs_agent_table_id_one_hot: false
 53 | 
 54 |   ## Overall layout observations
 55 |   obs_all_node_target_pairs_one_hot: false
 56 | 
 57 |   ## Tuple observations
 58 |   obs_all_table_node_pairs_one_hot: true
 59 |   obs_all_table_target_pairs_one_hot: true
 60 | 
 61 |   ## Reward selection
 62 | 
 63 |   ## Positive rewards
 64 |   rew_found_target:
 65 |     value: false
 66 |     weight: 100
 67 |   rew_found_target_squared:
 68 |     value: false
 69 |     weight: 10
 70 | 
 71 |   rew_found_target_physical:
 72 |     value: true
 73 |     weight: 100
 74 |   rew_found_target_physical_squared:
 75 |     value: false
 76 |     weight: 10
 77 | 
 78 |   ## Negative rewards
 79 |   rew_collisions:
 80 |     # it seems for multi-agent there isn't much effect here anyways, masking works well already.
 81 |     # collisions can be avoided altogether with tuple + auto-regression, making this term obsolete.
 82 |     value: true
 83 |     weight: .1
 84 | 
 85 |   # Those two will *always* happen in a "crowded" factory, no matter how smart the algorithm.
 86 |   # If these terms are useful, we should set the positive rewards high enough to counteract this.
 87 |   rew_blocking_path:
 88 |     value: false
 89 |     weight: 1
 90 |   rew_blocking_target:
 91 |     value: false
 92 |     weight: 5
 93 | 
 94 |   rew_avoid_cores: # does not seem very useful in the current formulation, superseded by "rew_blocking_path"
 95 |     value: false
 96 |     weight: 1
 97 |   rew_punish_slow_tables:
 98 |     value: true
 99 |     weight: 300
100 |   tighten_max_steps: false # Set to "false" if you don't want to allow less and less steps per episode
101 |   discount_episodes_by: 400
102 |   discount_episodes_until: 0.25
103 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/generator/src/main/java/io/skymind/pathmind/analyzer/code/CodeGenerator.java:
--------------------------------------------------------------------------------
  1 | package io.skymind.pathmind.analyzer.code;
  2 | 
  3 | import com.github.jknack.handlebars.Handlebars;
  4 | import com.github.jknack.handlebars.Template;
  5 | import com.github.jknack.handlebars.helper.ConditionalHelpers;
  6 | import com.github.jknack.handlebars.io.ClassPathTemplateLoader;
  7 | import com.github.jknack.handlebars.io.TemplateLoader;
  8 | import lombok.Builder;
  9 | import lombok.Getter;
 10 | import lombok.Setter;
 11 | 
 12 | import java.io.File;
 13 | import java.io.IOException;
 14 | import java.nio.file.Files;
 15 | 
 16 | @Getter
 17 | @Builder
 18 | public class CodeGenerator {
 19 | 
 20 |     @Builder.Default
 21 |     String agentClassName;
 22 |     @Setter
 23 |     String packageName;
 24 |     @Setter
 25 |     String simulationClassName;
 26 |     @Setter
 27 |     String pathmindHelperClassName;
 28 |     @Setter
 29 |     String experimentType;
 30 |     @Setter
 31 |     boolean isRLExperiment;
 32 | 
 33 |     public boolean getIsRLExperiment() {
 34 |         return isRLExperiment;
 35 |     }
 36 | 
 37 |     private final static String MODEL_ANALYZER_NAME = "ModelAnalyzer.java";
 38 |     private final static String TRAINING_NAME = "Training.java";
 39 |     private final static String LEARNING_AGENT = "PathmindLearningAgent.java";
 40 | 
 41 |     public void generateEnvironment(File file) throws IOException {
 42 |         if (!file.exists()) {
 43 |             file.mkdirs();
 44 |         }
 45 | 
 46 |         File modelAnalyzer = new File(file, MODEL_ANALYZER_NAME);
 47 |         Files.write(modelAnalyzer.toPath(), generateEnvironment(MODEL_ANALYZER_NAME).getBytes());
 48 | 
 49 |         if (!isRLExperiment) {
 50 |             File training = new File(file, TRAINING_NAME);
 51 |             Files.write(training.toPath(), generateEnvironment(TRAINING_NAME).getBytes());
 52 |         }
 53 | 
 54 |         File pathmindLearningAgentPath = new File("com/pathmind/anylogic");
 55 |         if (!pathmindLearningAgentPath.exists()) {
 56 |             pathmindLearningAgentPath.mkdirs();
 57 |         }
 58 |         File learningAgent = new File(pathmindLearningAgentPath, LEARNING_AGENT);
 59 |         Files.write(learningAgent.toPath(), generateEnvironment(LEARNING_AGENT).getBytes());
 60 |     }
 61 | 
 62 |     public String generateEnvironment(String fileName) throws IOException {
 63 |         this.setRLExperiment(experimentType.equals("RLExperiment"));
 64 | 
 65 |         TemplateLoader loader = new ClassPathTemplateLoader();
 66 |         loader.setPrefix("/templates/");
 67 |         loader.setSuffix(".hbs");
 68 |         Handlebars handlebars = new Handlebars(loader);
 69 | 
 70 |         handlebars.registerHelpers(ConditionalHelpers.class);
 71 |         Template template = handlebars.compile(fileName);
 72 | 
 73 |         String env = template.apply(this);
 74 |         return env;
 75 |     }
 76 | 
 77 |     public static void main(String[] args) throws IOException {
 78 |         CodeGenerator.CodeGeneratorBuilder builder = CodeGenerator.builder();
 79 | 
 80 |         for (int i = 0; i < args.length; i++) {
 81 |             if ("--agent-class-name".equals(args[i])) {
 82 |                 builder.agentClassName(args[++i]);
 83 |             } else if ("--package-name".equals(args[i])) {
 84 |                 builder.packageName(args[++i]);
 85 |             } else if ("--simulation-class-name".equals(args[i])) {
 86 |                 builder.simulationClassName(args[++i]);
 87 |             } else if ("--pathmind-helper-class-name".equals(args[i])) {
 88 |                 builder.pathmindHelperClassName(args[++i]);
 89 |             } else if ("--experiment-type".equals(args[i])) {
 90 |                 builder.experimentType(args[++i]);
 91 |             }
 92 |         }
 93 | 
 94 |         CodeGenerator codeGenerator = builder.build();
 95 |         String path = codeGenerator.getPackageName();
 96 |         if (path == null || path.isEmpty()) {
 97 |             path = ".";
 98 |         } else {
 99 |             path = path.replaceAll("\\.", File.separator);
100 |         }
101 |         codeGenerator.generateEnvironment(new File(path));
102 |     }
103 | }
104 | 


--------------------------------------------------------------------------------
/nativerl-policy/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  4 |     <modelVersion>4.0.0</modelVersion>
  5 | 
  6 |     <parent>
  7 |       <groupId>ai.skymind</groupId>
  8 |       <artifactId>nativerl-parent</artifactId>
  9 |       <version>1.8.1-SNAPSHOT</version>
 10 |     </parent>
 11 | 
 12 |     <groupId>ai.skymind</groupId>
 13 |     <artifactId>nativerl-policy</artifactId>
 14 | 
 15 |     <name>NativeRL Policy</name>
 16 | 
 17 |     <dependencies>
 18 |         <dependency>
 19 |             <groupId>org.bytedeco</groupId>
 20 |             <artifactId>mkl-dnn-platform</artifactId>
 21 |             <version>${mkldnn.version}</version>
 22 |         </dependency>
 23 |         <dependency>
 24 |             <groupId>junit</groupId>
 25 |             <artifactId>junit</artifactId>
 26 |             <version>4.13.1</version>
 27 |             <optional>true</optional>
 28 |         </dependency>
 29 |         <dependency>
 30 |             <groupId>com.squareup.okhttp3</groupId>
 31 |             <artifactId>okhttp</artifactId>
 32 |             <version>3.10.0</version>
 33 |         </dependency>
 34 |         <dependency>
 35 |             <groupId>com.fasterxml.jackson.core</groupId>
 36 |             <artifactId>jackson-databind</artifactId>
 37 |             <version>2.10.5.1</version>
 38 |         </dependency>
 39 |     </dependencies>
 40 | 
 41 |     <build>
 42 |         <plugins>
 43 |             <plugin>
 44 |                 <artifactId>maven-compiler-plugin</artifactId>
 45 |             </plugin>
 46 |             <plugin>
 47 |                 <artifactId>maven-javadoc-plugin</artifactId>
 48 |             </plugin>
 49 |             <plugin>
 50 |                 <artifactId>maven-shade-plugin</artifactId>
 51 |             </plugin>
 52 |         </plugins>
 53 |     </build>
 54 | 
 55 |     <profiles>
 56 |         <profile>
 57 |             <id>tfv1</id>
 58 |             <activation>
 59 |                 <property>
 60 |                     <name>tfv2</name>
 61 |                     <value>false</value>
 62 |                 </property>
 63 |             </activation>
 64 |             <dependencies>
 65 |                 <dependency>
 66 |                     <groupId>org.bytedeco</groupId>
 67 |                     <artifactId>tensorflow-platform</artifactId>
 68 |                     <version>${tensorflow.version}</version>
 69 |                 </dependency>
 70 |             </dependencies>
 71 |             <build>
 72 |                 <plugins>
 73 |                     <plugin>
 74 |                         <artifactId>maven-compiler-plugin</artifactId>
 75 |                         <configuration>
 76 |                             <excludes>
 77 |                                 <exclude>ai/skymind/nativerl/RLlibV2PolicyHelper.java</exclude>
 78 |                             </excludes>
 79 |                         </configuration>
 80 |                     </plugin>
 81 |                 </plugins>
 82 |             </build>
 83 |         </profile>
 84 |         <profile>
 85 |             <id>tfv2</id>
 86 |             <activation>
 87 |                 <property>
 88 |                     <name>tfv2</name>
 89 |                     <value>!false</value>
 90 |                 </property>
 91 |             </activation>
 92 |             <dependencies>
 93 |                 <dependency>
 94 |                     <groupId>org.tensorflow</groupId>
 95 |                     <artifactId>tensorflow-core-platform</artifactId>
 96 |                     <version>${tensorflow2.version}</version>
 97 |                 </dependency>
 98 |             </dependencies>
 99 |             <build>
100 |                 <plugins>
101 |                     <plugin>
102 |                         <artifactId>maven-compiler-plugin</artifactId>
103 |                         <configuration>
104 |                             <excludes>
105 |                                 <exclude>ai/skymind/nativerl/RLlibPolicyHelper.java</exclude>
106 |                             </excludes>
107 |                         </configuration>
108 |                     </plugin>
109 |                 </plugins>
110 |             </build>
111 |         </profile>
112 |     </profiles>
113 | </project>
114 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/cartpole.py:
--------------------------------------------------------------------------------
  1 | # Have you ever seen a more complicated way to massage a gym env into something
  2 | # else, only to make it a gym env again internally? You're welcome.
  3 | 
  4 | import os
  5 | 
  6 | if os.environ.get("USE_PY_NATIVERL"):
  7 |     import pathmind_training.pynativerl as nativerl
  8 | else:
  9 |     import nativerl
 10 | 
 11 | import math
 12 | import random
 13 | 
 14 | import numpy as np
 15 | 
 16 | 
 17 | class PathmindEnvironment(nativerl.Environment):
 18 |     def __init__(self):
 19 |         self.state = None
 20 |         self.steps = 0
 21 |         self.steps_beyond_done = None
 22 |         self.action = None
 23 | 
 24 |         self.gravity = 9.8
 25 |         self.mass_cart = 1.0
 26 |         self.mass_pole = 0.1
 27 |         self.total_mass = self.mass_pole + self.mass_cart
 28 |         self.length = 0.5  # actually half the pole's length
 29 |         self.pole_mass_length = self.mass_pole * self.length
 30 |         self.force_mag = 10.0
 31 |         self.tau = 0.02  # seconds between state updates
 32 |         self.kinematics_integrator = "euler"
 33 | 
 34 |         # Angle at which to fail the episode
 35 |         self.theta_threshold_radians = 12 * 2 * math.pi / 360
 36 |         self.x_threshold = 2.4
 37 | 
 38 |     def getActionSpace(self, agent_id=0):
 39 |         return nativerl.Discrete(n=2) if agent_id == 0 else None
 40 | 
 41 |     def getActionMaskSpace(self):
 42 |         return None
 43 | 
 44 |     def getObservationSpace(self):
 45 |         return nativerl.Continuous([-math.inf], [math.inf], [4])
 46 | 
 47 |     def getMetricsSpace(self):
 48 |         return nativerl.Continuous([-math.inf], [math.inf], [1])
 49 | 
 50 |     def getNumberOfAgents(self):
 51 |         return 1
 52 | 
 53 |     def getActionMask(self, agent_id=0):
 54 |         return None
 55 | 
 56 |     def getObservation(self, agent_id=0):
 57 |         return np.asarray(self.state)
 58 | 
 59 |     def reset(self):
 60 |         self.state = [
 61 |             random.uniform(-0.05, 0.05),
 62 |             random.uniform(-0.05, 0.05),
 63 |             random.uniform(-0.05, 0.05),
 64 |             random.uniform(-0.05, 0.05),
 65 |         ]
 66 |         self.steps = 0
 67 |         self.steps_beyond_done = None
 68 | 
 69 |     def setNextAction(self, action, agent_id=0):
 70 |         self.action = action
 71 | 
 72 |     def step(self):
 73 |         x, x_dot, theta, theta_dot = self.state
 74 |         force = self.force_mag if self.action == 1 else -self.force_mag
 75 |         cos_theta = math.cos(theta)
 76 |         sin_theta = math.sin(theta)
 77 | 
 78 |         temp = (
 79 |             force + self.pole_mass_length * theta_dot ** 2 * sin_theta
 80 |         ) / self.total_mass
 81 |         theta_acc = (self.gravity * sin_theta - cos_theta * temp) / (
 82 |             self.length
 83 |             * (4.0 / 3.0 - self.mass_pole * cos_theta ** 2 / self.total_mass)
 84 |         )
 85 |         x_acc = temp - self.pole_mass_length * theta_acc * cos_theta / self.total_mass
 86 | 
 87 |         x = x + self.tau * x_dot
 88 |         x_dot = x_dot + self.tau * x_acc
 89 |         theta = theta + self.tau * theta_dot
 90 |         theta_dot = theta_dot + self.tau * theta_acc
 91 | 
 92 |         self.state = [x, x_dot, theta, theta_dot]
 93 |         self.steps += 1
 94 | 
 95 |     def isSkip(self, agent_id=0):
 96 |         return False
 97 | 
 98 |     def isDone(self, agent_id=0):
 99 |         x, x_dot, theta, theta_dot = self.state
100 |         return bool(
101 |             x < -self.x_threshold
102 |             or x > self.x_threshold
103 |             or theta < -self.theta_threshold_radians
104 |             or theta > self.theta_threshold_radians
105 |             or self.steps > 1000
106 |         )
107 | 
108 |     def getReward(self, agent_id=0):
109 |         if not self.isDone(agent_id):
110 |             reward = 1.0
111 |         elif self.steps_beyond_done is None:
112 |             # Pole just fell!
113 |             self.steps_beyond_done = 0
114 |             reward = 1.0
115 |         else:
116 |             self.steps_beyond_done += 1
117 |             reward = 0.0
118 |         return reward
119 | 
120 |     def getRewardTerms(self):
121 |         return
122 | 
123 |     def getMetrics(self, agent_id=0):
124 |         return (
125 |             np.asarray([self.steps_beyond_done])
126 |             if self.steps_beyond_done
127 |             else np.asarray([])
128 |         )
129 | 


--------------------------------------------------------------------------------
/nativerl/python/pathmind_training/callbacks.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | from typing import Dict
  3 | 
  4 | import ray
  5 | from pathmind_training.exports import export_policy_from_checkpoint
  6 | from ray.rllib.agents.callbacks import DefaultCallbacks
  7 | from ray.rllib.env import BaseEnv
  8 | from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker
  9 | from ray.rllib.policy import Policy
 10 | 
 11 | 
 12 | def get_callback_function(callback_function_name):
 13 |     """Get callback function from a string interpreted as Python module
 14 |     :param callback_function_name: name of the python module and function as string
 15 |     :return: callback function
 16 |     """
 17 |     class_name = callback_function_name.split(".")[-1]
 18 |     module = callback_function_name.replace(f".{class_name}", "")
 19 |     lib = importlib.import_module(module)
 20 |     return getattr(lib, class_name)
 21 | 
 22 | 
 23 | def get_callbacks(debug_metrics, use_reward_terms, is_gym, checkpoint_frequency):
 24 |     class Callbacks(DefaultCallbacks):
 25 |         def on_episode_start(
 26 |             self,
 27 |             worker: RolloutWorker,
 28 |             base_env: BaseEnv,
 29 |             policies: Dict[str, Policy],
 30 |             episode: MultiAgentEpisode,
 31 |             **kwargs,
 32 |         ):
 33 |             episode.hist_data["metrics_raw"] = []
 34 | 
 35 |         def on_episode_end(
 36 |             self,
 37 |             worker: RolloutWorker,
 38 |             base_env: BaseEnv,
 39 |             policies: Dict[str, Policy],
 40 |             episode: MultiAgentEpisode,
 41 |             **kwargs,
 42 |         ):
 43 |             if not is_gym:
 44 |                 metrics = worker.env.getMetrics().tolist()
 45 |                 if debug_metrics:
 46 |                     episode.hist_data["metrics_raw"] = metrics
 47 | 
 48 |                 for i, val in enumerate(metrics):
 49 |                     episode.custom_metrics[f"metrics_{str(i)}"] = metrics[i]
 50 | 
 51 |                 if use_reward_terms:
 52 |                     term_contributions = (
 53 |                         worker.env.getRewardTermContributions().tolist()
 54 |                     )
 55 |                     for i, val in enumerate(term_contributions):
 56 |                         episode.custom_metrics[
 57 |                             f"metrics_term_{str(i)}"
 58 |                         ] = term_contributions[i]
 59 | 
 60 |         def on_train_result(self, trainer, result: dict, **kwargs):
 61 |             if not is_gym:
 62 |                 results = ray.get(
 63 |                     [
 64 |                         w.apply.remote(lambda worker: worker.env.getMetrics())
 65 |                         for w in trainer.workers.remote_workers()
 66 |                     ]
 67 |                 )
 68 | 
 69 |                 use_auto_norm = trainer.config["env_config"]["use_auto_norm"]
 70 | 
 71 |                 if use_auto_norm:
 72 |                     period = trainer.config["env_config"]["reward_balance_period"]
 73 |                     num_reward_terms = trainer.config["env_config"]["num_reward_terms"]
 74 | 
 75 |                     if result["training_iteration"] % period == 0:
 76 |                         # First "num_reward_terms" amount of custom metrics will be reserved for raw reward term contributions
 77 |                         betas = [
 78 |                             1.0
 79 |                             / abs(
 80 |                                 result["custom_metrics"][f"metrics_term_{str(i)}_mean"]
 81 |                             )
 82 |                             if result["custom_metrics"][f"metrics_term_{str(i)}_mean"]
 83 |                             != 0.0
 84 |                             else 0.0
 85 |                             for i in range(num_reward_terms)
 86 |                         ]
 87 |                         for w in trainer.workers.remote_workers():
 88 |                             w.apply.remote(lambda worker: worker.env.updateBetas(betas))
 89 | 
 90 |                 if (
 91 |                     result["training_iteration"] % checkpoint_frequency == 0
 92 |                     and result["training_iteration"] > 1
 93 |                 ):
 94 |                     export_policy_from_checkpoint(trainer)
 95 | 
 96 |                 result["last_metrics"] = (
 97 |                     results[0].tolist()
 98 |                     if results is not None and len(results) > 0
 99 |                     else -1
100 |                 )
101 | 
102 |     return Callbacks
103 | 


--------------------------------------------------------------------------------
/nativerl/src/main/java/ai/skymind/nativerl/Environment.java:
--------------------------------------------------------------------------------
 1 | // Targeted by JavaCPP version 1.5.4: DO NOT EDIT THIS FILE
 2 | 
 3 | package ai.skymind.nativerl;
 4 | 
 5 | import java.nio.*;
 6 | import org.bytedeco.javacpp.*;
 7 | import org.bytedeco.javacpp.annotation.*;
 8 | 
 9 | import static ai.skymind.nativerl.NativeRL.*;
10 | 
11 | 
12 | /**
13 |  * The pure virtual (abstract) interface of a "native" environment. This gets mapped,
14 |  * for example, with JavaCPP and implemented by a Java class. The implementation needs
15 |  * to export functions to create and release Environment objects. In the case of JavaCPP,
16 |  * the createJavaEnvironment() and releaseJavaEnvironment() are available in the generated
17 |  * jniNativeRL.h header file.
18 |  * <p>
19 |  * However, we can just as well implement it in pure C++, which we would do in the case of,
20 |  * for example, ROS or MATLAB Simulink.
21 |  * <p>
22 |  * On the Python side, these functions are picked up by, for example, pybind11 and used
23 |  * to implement Python interfaces of environments, such as gym.Env, for RLlib, etc.
24 |  */
25 | @Namespace("nativerl") @Properties(inherit = ai.skymind.nativerl.NativeRLPresets.class)
26 | public class Environment extends Pointer {
27 |     static { Loader.load(); }
28 |     /** Default native constructor. */
29 |     public Environment() { super((Pointer)null); allocate(); }
30 |     /** Native array allocator. Access with {@link Pointer#position(long)}. */
31 |     public Environment(long size) { super((Pointer)null); allocateArray(size); }
32 |     /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
33 |     public Environment(Pointer p) { super(p); }
34 |     private native void allocate();
35 |     private native void allocateArray(long size);
36 |     @Override public Environment position(long position) {
37 |         return (Environment)super.position(position);
38 |     }
39 |     @Override public Environment getPointer(long i) {
40 |         return new Environment(this).position(position + i);
41 |     }
42 | 
43 |     //    /** Passes a new random seed that should be used for reproducibility. */
44 |     //    virtual void setSeed(long long seed) = 0;
45 |     /** Returns the i-th action Space supported. */
46 |     @Virtual(true) public native @Const Space getActionSpace(@Cast("ssize_t") long i/*=0*/);
47 |     /** Returns the action mask Space supported. */
48 |     @Virtual(true) public native @Const Space getActionMaskSpace();
49 |     /** Returns the observation Space supported. */
50 |     @Virtual(true) public native @Const Space getObservationSpace();
51 |     /** Returns the metrics Space supported. */
52 |     @Virtual(true) public native @Const Space getMetricsSpace();
53 |     /** Returns the number of agents in this environment. */
54 |     @Virtual(true) public native @Cast("ssize_t") long getNumberOfAgents();
55 |     /** Returns the current state of the possible actions for the given agent. */
56 |     @Virtual(true) public native @Const @ByRef Array getActionMask(@Cast("ssize_t") long agentId/*=0*/);
57 |     /** Returns the current state of the simulation for the given agent. */
58 |     @Virtual(true) public native @Const @ByRef Array getObservation(@Cast("ssize_t") long agentId/*=0*/);
59 |     /** Indicates when the given agent is not available to have its state queried, do actions, etc. */
60 |     @Virtual(true) public native @Cast("bool") boolean isSkip(@Cast("ssize_t") long agentId/*=-1*/);
61 |     /** Indicates when a simulation episode is over for the given agent, or -1 for all. */
62 |     @Virtual(true) public native @Cast("bool") boolean isDone(@Cast("ssize_t") long agentId/*=-1*/);
63 |     /** Used to reset the simulation, preferably starting a new random sequence. */
64 |     @Virtual(true) public native void reset();
65 |     /** Sets the next action for the given agent to be done during the next step. */
66 |     @Virtual(true) public native void setNextAction(@Const @ByRef Array action, @Cast("ssize_t") long agentId/*=0*/);
67 |     /** Used to advance the simulation by a single step. */
68 |     @Virtual(true) public native void step();
69 |     /** Returns the reward based on variables for the given agent before and after the last step. */
70 |     @Virtual(true) public native float getReward(@Cast("ssize_t") long agentId/*=0*/);
71 |     /** Returns the last values of observationForReward() */
72 |     @Virtual(true) public native @Const @ByRef Array getMetrics(@Cast("ssize_t") long agentId/*=0*/);
73 |     /** Returns the reward terms */
74 |     @Virtual(true) public native @Const @ByRef Array getRewardTerms(@Cast("ssize_t") long agentId/*=0*/);
75 | }
76 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/gym_cartpole.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Classic cart-pole system implemented by Rich Sutton et al.
  3 | Copied from http://incompleteideas.net/sutton/book/code/pole.c
  4 | permalink: https://perma.cc/C9ZM-652R
  5 | """
  6 | import math
  7 | 
  8 | import gym
  9 | import numpy as np
 10 | from gym import logger, spaces
 11 | from gym.utils import seeding
 12 | 
 13 | 
 14 | class CartPoleEnv(gym.Env):
 15 |     def __init__(self):
 16 |         self.gravity = 9.8
 17 |         self.masscart = 1.0
 18 |         self.masspole = 0.1
 19 |         self.total_mass = self.masspole + self.masscart
 20 |         self.length = 0.5  # actually half the pole's length
 21 |         self.polemass_length = self.masspole * self.length
 22 |         self.force_mag = 10.0
 23 |         self.tau = 0.02  # seconds between state updates
 24 |         self.kinematics_integrator = "euler"
 25 | 
 26 |         # Angle at which to fail the episode
 27 |         self.theta_threshold_radians = 12 * 2 * math.pi / 360
 28 |         self.x_threshold = 2.4
 29 | 
 30 |         # Angle limit set to 2 * theta_threshold_radians so failing observation
 31 |         # is still within bounds.
 32 |         high = np.array(
 33 |             [
 34 |                 self.x_threshold * 2,
 35 |                 np.finfo(np.float32).max,
 36 |                 self.theta_threshold_radians * 2,
 37 |                 np.finfo(np.float32).max,
 38 |             ],
 39 |             dtype=np.float32,
 40 |         )
 41 | 
 42 |         self.action_space = spaces.Discrete(2)
 43 |         self.observation_space = spaces.Box(-high, high, dtype=np.float32)
 44 | 
 45 |         self.seed()
 46 |         self.viewer = None
 47 |         self.state = None
 48 | 
 49 |         self.steps_beyond_done = None
 50 | 
 51 |     def seed(self, seed=None):
 52 |         self.np_random, seed = seeding.np_random(seed)
 53 |         return [seed]
 54 | 
 55 |     def step(self, action):
 56 |         err_msg = "%r (%s) invalid" % (action, type(action))
 57 |         assert self.action_space.contains(action), err_msg
 58 | 
 59 |         x, x_dot, theta, theta_dot = self.state
 60 |         force = self.force_mag if action == 1 else -self.force_mag
 61 |         costheta = math.cos(theta)
 62 |         sintheta = math.sin(theta)
 63 | 
 64 |         # For the interested reader:
 65 |         # https://coneural.org/florian/papers/05_cart_pole.pdf
 66 |         temp = (
 67 |             force + self.polemass_length * theta_dot ** 2 * sintheta
 68 |         ) / self.total_mass
 69 |         thetaacc = (self.gravity * sintheta - costheta * temp) / (
 70 |             self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass)
 71 |         )
 72 |         xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
 73 | 
 74 |         if self.kinematics_integrator == "euler":
 75 |             x = x + self.tau * x_dot
 76 |             x_dot = x_dot + self.tau * xacc
 77 |             theta = theta + self.tau * theta_dot
 78 |             theta_dot = theta_dot + self.tau * thetaacc
 79 |         else:  # semi-implicit euler
 80 |             x_dot = x_dot + self.tau * xacc
 81 |             x = x + self.tau * x_dot
 82 |             theta_dot = theta_dot + self.tau * thetaacc
 83 |             theta = theta + self.tau * theta_dot
 84 | 
 85 |         self.state = (x, x_dot, theta, theta_dot)
 86 | 
 87 |         done = bool(
 88 |             x < -self.x_threshold
 89 |             or x > self.x_threshold
 90 |             or theta < -self.theta_threshold_radians
 91 |             or theta > self.theta_threshold_radians
 92 |         )
 93 | 
 94 |         if not done:
 95 |             reward = 1.0
 96 |         elif self.steps_beyond_done is None:
 97 |             # Pole just fell!
 98 |             self.steps_beyond_done = 0
 99 |             reward = 1.0
100 |         else:
101 |             if self.steps_beyond_done == 0:
102 |                 logger.warn(
103 |                     "You are calling 'step()' even though this "
104 |                     "environment has already returned done = True. You "
105 |                     "should always call 'reset()' once you receive 'done = "
106 |                     "True' -- any further steps are undefined behavior."
107 |                 )
108 |             self.steps_beyond_done += 1
109 |             reward = 0.0
110 | 
111 |         return np.array(self.state), reward, done, {}
112 | 
113 |     def reset(self):
114 |         self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,))
115 |         self.steps_beyond_done = None
116 |         return np.array(self.state)
117 | 
118 |     def render(self, mode="human"):
119 |         pass
120 | 


--------------------------------------------------------------------------------
/nativerl/examples/traincartpole.sh:
--------------------------------------------------------------------------------
  1 | OUTPUT_DIR="$(pwd)"
  2 | MODEL_MODULE="cartpole"
  3 | ENVIRONMENT_CLASS="PathmindEnvironment"
  4 | 
  5 | cat <<EOF > $MODEL_MODULE.py
  6 | import math
  7 | import nativerl
  8 | import random
  9 | 
 10 | # based on: https://github.com/openai/gym/blob/master/gym/envs/classic_control/cartpole.py
 11 | class $ENVIRONMENT_CLASS(nativerl.Environment):
 12 |     def __init__(self):
 13 |         nativerl.Environment.__init__(self)
 14 | 
 15 |         self.gravity = 9.8
 16 |         self.masscart = 1.0
 17 |         self.masspole = 0.1
 18 |         self.total_mass = (self.masspole + self.masscart)
 19 |         self.length = 0.5  # actually half the pole's length
 20 |         self.polemass_length = (self.masspole * self.length)
 21 |         self.force_mag = 10.0
 22 |         self.tau = 0.02  # seconds between state updates
 23 |         self.kinematics_integrator = 'euler'
 24 | 
 25 |         # Angle at which to fail the episode
 26 |         self.theta_threshold_radians = 12 * 2 * math.pi / 360
 27 |         self.x_threshold = 2.4
 28 | 
 29 |     def getActionSpace(self, i):
 30 |         return nativerl.Discrete(2) if i == 0 else None
 31 | 
 32 |     def getActionMaskSpace(self):
 33 |         return None
 34 | 
 35 |     def getObservationSpace(self):
 36 |         return nativerl.Continuous(nativerl.FloatVector([-math.inf]), nativerl.FloatVector([math.inf]), nativerl.SSizeTVector([4]))
 37 | 
 38 |     def getMetricsSpace(self):
 39 |         return nativerl.Continuous(nativerl.FloatVector([-math.inf]), nativerl.FloatVector([math.inf]), nativerl.SSizeTVector([1]))
 40 | 
 41 |     def getNumberOfAgents(self):
 42 |         return 1
 43 | 
 44 |     def getActionMask(self, agentId):
 45 |         return None;
 46 | 
 47 |     def getObservation(self, agentId):
 48 |         return nativerl.Array(nativerl.FloatVector(self.state));
 49 | 
 50 |     def reset(self):
 51 |         self.state = [random.uniform(-0.05, 0.05), random.uniform(-0.05, 0.05), random.uniform(-0.05, 0.05), random.uniform(-0.05, 0.05)]
 52 |         self.steps = 0
 53 |         self.steps_beyond_done = None
 54 | 
 55 |     def setNextAction(self, action, agentId):
 56 |         self.action = action.values()[0]
 57 | 
 58 |     def step(self):
 59 |         x, x_dot, theta, theta_dot = self.state
 60 |         force = self.force_mag if self.action == 1 else -self.force_mag
 61 |         costheta = math.cos(theta)
 62 |         sintheta = math.sin(theta)
 63 | 
 64 |         temp = (force + self.polemass_length * theta_dot ** 2 * sintheta) / self.total_mass
 65 |         thetaacc = (self.gravity * sintheta - costheta * temp) / (self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass))
 66 |         xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
 67 | 
 68 |         x = x + self.tau * x_dot
 69 |         x_dot = x_dot + self.tau * xacc
 70 |         theta = theta + self.tau * theta_dot
 71 |         theta_dot = theta_dot + self.tau * thetaacc
 72 | 
 73 |         self.state = [x, x_dot, theta, theta_dot]
 74 |         self.steps += 1
 75 | 
 76 |     def isSkip(self, agentId):
 77 |         return False
 78 | 
 79 |     def isDone(self, agentId):
 80 |         x, x_dot, theta, theta_dot = self.state
 81 |         return bool(
 82 |             x < -self.x_threshold
 83 |             or x > self.x_threshold
 84 |             or theta < -self.theta_threshold_radians
 85 |             or theta > self.theta_threshold_radians
 86 |             or self.steps > 1000
 87 |         )
 88 | 
 89 |     def getReward(self, agentId):
 90 |         if not self.isDone(agentId):
 91 |             reward = 1.0
 92 |         elif self.steps_beyond_done is None:
 93 |             # Pole just fell!
 94 |             self.steps_beyond_done = 0
 95 |             reward = 1.0
 96 |         else:
 97 |             self.steps_beyond_done += 1
 98 |             reward = 0.0
 99 |         return reward
100 | 
101 |     def getMetrics(self, agentId):
102 |         return nativerl.Array(nativerl.FloatVector([] if self.steps_beyond_done is None else [self.steps_beyond_done]));
103 | EOF
104 | 
105 | export CLASSPATH=$(find . -iname '*.jar' | tr '\n' :)
106 | 
107 | 
108 | if which cygpath; then
109 |     export CLASSPATH=$(cygpath --path --windows "$CLASSPATH")
110 |     export PATH=$PATH:$(find "$(cygpath "$JAVA_HOME")" -name 'jvm.dll' -printf '%h:')
111 | fi
112 | 
113 | PYTHON=$(which python.exe) || PYTHON=$(which python3)
114 | 
115 | "$PYTHON" run.py training \
116 |     --algorithm "PPO" \
117 |     --output-dir "$OUTPUT_DIR" \
118 |     --environment "$MODEL_MODULE.$ENVIRONMENT_CLASS" \
119 |     --num-workers 4 \
120 |     --max-iterations 10 \
121 |     --multi-agent
122 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/factory/simulation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pprint
  3 | from collections import Counter
  4 | from csv import writer
  5 | from typing import Dict, List, Optional, TypeVar
  6 | 
  7 | from .models import *
  8 | 
  9 | PRINTER = pprint.PrettyPrinter(indent=2)
 10 | VERBOSE = True
 11 | 
 12 | F = TypeVar("F", bound="Factory")
 13 | 
 14 | 
 15 | class Factory:
 16 |     """A Factory sets up all components (nodes, rails, tables) needed to
 17 |     solve the problem of delivering cores to their destinations. Note that
 18 |     this is just a "model", the application logic and agent interaction is
 19 |     separated"""
 20 | 
 21 |     def __init__(
 22 |         self,
 23 |         nodes: List[Node],
 24 |         rails: List[Rail],
 25 |         tables: List[Table],
 26 |         max_num_steps: int = 1000,
 27 |         name: str = None,
 28 |     ):
 29 |         self.nodes = nodes
 30 |         self.rails = rails
 31 |         self.tables = tables
 32 |         self.name = name
 33 |         self.cores = [t.core for t in self.tables if t.has_core()]
 34 |         self.max_num_steps = max_num_steps
 35 |         self.initial_max_num_steps = max_num_steps
 36 | 
 37 |         # Stats counter
 38 |         self.step_count = 0
 39 |         self.agent_step_counter: Dict[int, int] = {
 40 |             t: 0 for t in range(len(self.tables))
 41 |         }
 42 |         self.moves: Dict[int, List[ActionResult]] = {
 43 |             t: [] for t in range(len(self.tables))
 44 |         }
 45 |         self.move_counter = Counter()
 46 |         self.action_counter = Counter()
 47 |         self.step_completion_counter: Dict[int, List[int]] = {
 48 |             t: [] for t in range(len(self.tables))
 49 |         }
 50 | 
 51 |     def is_solved(self):
 52 |         """A factory is solved if no table has a core anymore."""
 53 |         return len([t for t in self.tables if t.has_core()]) == 0
 54 | 
 55 |     def done(self):
 56 |         return all([c.done() for c in self.cores])
 57 | 
 58 |     def set_tables(self, tables: List[Table]):
 59 |         self.tables = tables
 60 | 
 61 |     def get_rail(self, node: Node) -> Optional[Rail]:
 62 |         for rail in self.rails:
 63 |             if node in rail.nodes:
 64 |                 return rail
 65 |         return None
 66 | 
 67 |     def add_move(self, agent_id: int, action, move: ActionResult):
 68 |         self.step_count += 1
 69 |         self.moves.get(agent_id).append(move)
 70 |         self.agent_step_counter[agent_id] += 1
 71 |         self.move_counter[move.name] += 1
 72 |         self.action_counter[action.name] += 1
 73 | 
 74 |     def add_completed_step_count(self):
 75 |         for agent_id in range(len(self.tables)):
 76 |             counter = self.step_completion_counter.get(agent_id)
 77 |             counter.append(self.agent_step_counter[agent_id])
 78 | 
 79 |     def print_stats(self, episodes=None):
 80 |         """Print statistics to stdout for quick sanity checks."""
 81 |         if VERBOSE:
 82 |             PRINTER.pprint(">>> Completed an episode")
 83 |             PRINTER.pprint("   >>> Number of episodes completed:")
 84 |             PRINTER.pprint(episodes)
 85 |             PRINTER.pprint("   >>> Number of cores left to deliver:")
 86 |             cores_left = len([t for t in self.tables if t.has_core()])
 87 |             PRINTER.pprint(cores_left)
 88 |             PRINTER.pprint("   >>> Move counter")
 89 |             PRINTER.pprint(dict(self.move_counter))
 90 |             PRINTER.pprint("   >>> Action counter")
 91 |             PRINTER.pprint(dict(self.action_counter))
 92 |             PRINTER.pprint("   >>> Steps taken to completion")
 93 |             PRINTER.pprint(self.step_completion_counter)
 94 | 
 95 |     def record_stats(self):
 96 |         """Record statistics in a CSV file for later visualisation."""
 97 |         move_dict = dict(self.move_counter)
 98 |         move_dict["CORES_REMAIN"] = len([t for t in self.tables if t.has_core()])
 99 |         key_list = sorted(move_dict)
100 |         elements = []
101 |         for item in key_list:
102 |             elements.append(move_dict[item])
103 |         if os.path.exists(os.path.join(os.path.abspath("PPO/"), "Move_Stats.csv")):
104 |             with open(
105 |                 os.path.join(os.path.abspath("PPO/"), "Move_Stats.csv"), "a", newline=""
106 |             ) as f:
107 |                 writer(f).writerow(elements)
108 |         else:
109 |             with open(
110 |                 os.path.join(os.path.abspath("PPO/"), "Move_Stats.csv"),
111 |                 "w+",
112 |                 newline="",
113 |             ) as f:
114 |                 writer(f).writerow(key_list)
115 |                 writer(f).writerow(elements)
116 | 


--------------------------------------------------------------------------------
/nativerl-policy/src/main/java/ai/skymind/nativerl/ActionMaskProcessor.java:
--------------------------------------------------------------------------------
 1 | package ai.skymind.nativerl;
 2 | 
 3 | import ai.skymind.nativerl.util.Reflect;
 4 | import java.lang.reflect.Constructor;
 5 | import java.lang.reflect.Field;
 6 | 
 7 | /**
 8 |  * Finds the class containing values for the action masks within the agent class,
 9 |  * and provides a few methods to obtain information about its fields as well as to access them.
10 |  *
11 |  * @author saudet
12 |  */
13 | public class ActionMaskProcessor {
14 |     /** The name of the method where the local inner class needs to be defined in. */
15 |     public static final String METHOD_NAME = "actionMasks";
16 | 
17 |     Class agentClass;
18 |     Class actionMaskClass;
19 |     Field[] actionMaskFields;
20 |     Constructor actionMaskConstructor;
21 |     boolean usesAgentId;
22 | 
23 |     /** Calls {@code this(Class.forName(agentClassName, false, this.getClassLoader()))}. */
24 |     public ActionMaskProcessor(String agentClassName) throws ReflectiveOperationException {
25 |         this(Class.forName(agentClassName, false, ActionMaskProcessor.class.getClassLoader()));
26 |     }
27 |     /** Looks inside the {@link #METHOD_NAME} method of the agent class given. */
28 |     public ActionMaskProcessor(Class agentClass) throws ReflectiveOperationException {
29 |         this.agentClass = agentClass;
30 |         this.actionMaskClass = Reflect.findLocalClass(agentClass, METHOD_NAME);
31 |         this.actionMaskFields = Reflect.getFields(actionMaskClass);
32 |         try {
33 |             this.actionMaskConstructor = actionMaskClass.getDeclaredConstructor(agentClass, long.class);
34 |             this.usesAgentId = true;
35 |         } catch (NoSuchMethodException e) {
36 |             try {
37 |                 this.actionMaskConstructor = actionMaskClass.getDeclaredConstructor(agentClass, int.class);
38 |                 this.usesAgentId = true;
39 |             } catch (NoSuchMethodException e2) {
40 |                 this.actionMaskConstructor = actionMaskClass.getDeclaredConstructor(agentClass);
41 |                 this.usesAgentId = false;
42 |             }
43 |         }
44 |         this.actionMaskConstructor.setAccessible(true);
45 |     }
46 | 
47 |     /** Returns the class we found within the {@link #METHOD_NAME} method of the agent class. */
48 |     public Class getActionMaskClass() {
49 |         return actionMaskClass;
50 |     }
51 | 
52 |     /** Returns the fields of the class we found within the {@link #METHOD_NAME} method of the agent class. */
53 |     public Field[] getActionMaskFields() {
54 |         return actionMaskFields;
55 |     }
56 | 
57 |     /** Returns {@code getActionMaskNames(agent, 0)}. */
58 |     public String[] getActionMaskNames(Object agent) throws ReflectiveOperationException {
59 |         return getActionMaskNames(agent, 0);
60 |     }
61 |     /** Returns {@code toNames(getActionMaskObject(agent, agentId))}. */
62 |     public String[] getActionMaskNames(Object agent, int agentId) throws ReflectiveOperationException {
63 |         return toNames(getActionMaskObject(agent, agentId));
64 |     }
65 | 
66 |     /** Returns {@code getActionMasks(agent, 0)}. */
67 |     public boolean[] getActionMasks(Object agent) throws ReflectiveOperationException {
68 |         return getActionMasks(agent, 0);
69 |     }
70 |     /** Returns {@code toBooleans(getActionMaskObject(agent, agentId))}. */
71 |     public boolean[] getActionMasks(Object agent, int agentId) throws ReflectiveOperationException {
72 |         return toBooleans(getActionMaskObject(agent, agentId));
73 |     }
74 | 
75 |     /** Returns {@code getActionMaskObject(agent, 0)}. */
76 |     public <M> M getActionMaskObject(Object agent) throws ReflectiveOperationException {
77 |         return getActionMaskObject(agent, 0);
78 |     }
79 |     /** Returns a new instance of the action mask class, passing the given agentId to the constructor. */
80 |     public <M> M getActionMaskObject(Object agent, int agentId) throws ReflectiveOperationException {
81 |         return usesAgentId ? (M)actionMaskConstructor.newInstance(agent, agentId) : (M)actionMaskConstructor.newInstance(agent);
82 |     }
83 | 
84 |     /** Returns the values that was assigned to the fields, with arrays flattened to booleans. */
85 |     public <M> boolean[] toBooleans(M actionMaskObject) throws ReflectiveOperationException {
86 |         return Reflect.getFieldBooleans(actionMaskFields, actionMaskObject);
87 |     }
88 |     /** Returns the names of the fields in the order listed within the class found, with arrays flattened and suffixed with [0], [1], etc. */
89 |     public <M> String[] toNames(M actionMaskObject) throws ReflectiveOperationException {
90 |         return Reflect.getFieldNames(actionMaskFields, actionMaskObject);
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/nativerl-analyzer/api/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
  4 |     <modelVersion>4.0.0</modelVersion>
  5 | 
  6 |     <parent>
  7 |         <groupId>io.skymind.pathmind</groupId>
  8 |         <artifactId>model-analyzer-parent</artifactId>
  9 |         <version>0.0.1-SNAPSHOT</version>
 10 |     </parent>
 11 | 
 12 |     <artifactId>model-analyzer-api</artifactId>
 13 | 
 14 |     <properties>
 15 |         <java.version>11</java.version>
 16 |         <commons-io.version>2.7</commons-io.version>
 17 |         <zip4j.version>2.2.4</zip4j.version>
 18 |         <swagger2.version>2.9.2</swagger2.version>
 19 |     </properties>
 20 | 
 21 |     <dependencyManagement>
 22 |         <dependencies>
 23 |             <dependency><!-- Import dependency management from Spring Boot -->
 24 |                 <groupId>org.springframework.boot</groupId>
 25 |                 <artifactId>spring-boot-dependencies</artifactId>
 26 |                 <version>2.2.0.RELEASE</version>
 27 |                 <type>pom</type>
 28 |                 <scope>import</scope>
 29 |             </dependency>
 30 |         </dependencies>
 31 |     </dependencyManagement>
 32 | 
 33 |     <dependencies>
 34 | 
 35 |         <dependency>
 36 |             <groupId>io.skymind.pathmind</groupId>
 37 |             <artifactId>model-analyzer-common</artifactId>
 38 |         </dependency>
 39 | 
 40 |         <dependency>
 41 |             <groupId>org.springframework.boot</groupId>
 42 |             <artifactId>spring-boot-starter-web</artifactId>
 43 |         </dependency>
 44 |         <dependency>
 45 |             <groupId>org.springframework.boot</groupId>
 46 |             <artifactId>spring-boot-starter-actuator</artifactId>
 47 |         </dependency>
 48 |         <dependency>
 49 |             <groupId>org.projectlombok</groupId>
 50 |             <artifactId>lombok</artifactId>
 51 |             <optional>true</optional>
 52 |         </dependency>
 53 |         <dependency>
 54 |             <groupId>commons-io</groupId>
 55 |             <artifactId>commons-io</artifactId>
 56 |             <version>${commons-io.version}</version>
 57 |         </dependency>
 58 |         <dependency>
 59 |             <groupId>net.lingala.zip4j</groupId>
 60 |             <artifactId>zip4j</artifactId>
 61 |             <version>${zip4j.version}</version>
 62 |         </dependency>
 63 |         <dependency>
 64 |             <groupId>io.springfox</groupId>
 65 |             <artifactId>springfox-swagger2</artifactId>
 66 |             <version>${swagger2.version}</version>
 67 |         </dependency>
 68 |         <dependency>
 69 |             <groupId>io.springfox</groupId>
 70 |             <artifactId>springfox-swagger2</artifactId>
 71 |             <version>${swagger2.version}</version>
 72 |         </dependency>
 73 |         <dependency>
 74 |             <groupId>io.springfox</groupId>
 75 |             <artifactId>springfox-swagger-ui</artifactId>
 76 |             <version>${swagger2.version}</version>
 77 |         </dependency>
 78 |         <dependency>
 79 |             <groupId>org.springframework.boot</groupId>
 80 |             <artifactId>spring-boot-configuration-processor</artifactId>
 81 |         </dependency>
 82 |         <dependency>
 83 |             <groupId>org.springframework.boot</groupId>
 84 |             <artifactId>spring-boot-starter-test</artifactId>
 85 |             <scope>test</scope>
 86 |             <exclusions>
 87 |                 <exclusion>
 88 |                     <groupId>org.junit.vintage</groupId>
 89 |                     <artifactId>junit-vintage-engine</artifactId>
 90 |                 </exclusion>
 91 |             </exclusions>
 92 |         </dependency>
 93 |     </dependencies>
 94 | 
 95 |     <build>
 96 |         <finalName>pathmind-ma-api</finalName>
 97 |         <plugins>
 98 |             <plugin>
 99 |                 <groupId>org.springframework.boot</groupId>
100 |                 <artifactId>spring-boot-maven-plugin</artifactId>
101 |                 <executions>
102 |                     <execution>
103 |                         <goals>
104 |                             <goal>repackage</goal>
105 |                         </goals>
106 |                         <configuration>
107 |                             <classifier>spring-boot</classifier>
108 |                             <mainClass>
109 |                                 io.skymind.pathmind.analyzer.PathmindModelAnalyzerApplication
110 |                             </mainClass>
111 |                         </configuration>
112 |                     </execution>
113 |                 </executions>
114 |             </plugin>
115 |         </plugins>
116 |     </build>
117 | 
118 | </project>
119 | 


--------------------------------------------------------------------------------
/nativerl/python/tests/game2048/visual_play.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from tkinter import CENTER, Frame, Label
  3 | 
  4 | import constants as c
  5 | import logic
  6 | 
  7 | 
  8 | def gen():
  9 |     return random.randint(0, c.GRID_LEN - 1)
 10 | 
 11 | 
 12 | class GameGrid(Frame):
 13 |     def __init__(self):
 14 |         Frame.__init__(self)
 15 | 
 16 |         self.grid()
 17 |         self.master.title("2048")
 18 |         self.master.bind("<Key>", self.key_down)
 19 | 
 20 |         self.commands = {
 21 |             c.KEY_UP: logic.up,
 22 |             c.KEY_DOWN: logic.down,
 23 |             c.KEY_LEFT: logic.left,
 24 |             c.KEY_RIGHT: logic.right,
 25 |             c.KEY_UP_ALT: logic.up,
 26 |             c.KEY_DOWN_ALT: logic.down,
 27 |             c.KEY_LEFT_ALT: logic.left,
 28 |             c.KEY_RIGHT_ALT: logic.right,
 29 |             c.KEY_H: logic.left,
 30 |             c.KEY_L: logic.right,
 31 |             c.KEY_K: logic.up,
 32 |             c.KEY_J: logic.down,
 33 |         }
 34 | 
 35 |         self.grid_cells = []
 36 |         self.init_grid()
 37 |         self.matrix = logic.new_game(c.GRID_LEN)
 38 |         self.history_matrixs = []
 39 |         self.update_grid_cells()
 40 | 
 41 |         self.mainloop()
 42 | 
 43 |     def init_grid(self):
 44 |         background = Frame(
 45 |             self, bg=c.BACKGROUND_COLOR_GAME, width=c.SIZE, height=c.SIZE
 46 |         )
 47 |         background.grid()
 48 | 
 49 |         for i in range(c.GRID_LEN):
 50 |             grid_row = []
 51 |             for j in range(c.GRID_LEN):
 52 |                 cell = Frame(
 53 |                     background,
 54 |                     bg=c.BACKGROUND_COLOR_CELL_EMPTY,
 55 |                     width=c.SIZE / c.GRID_LEN,
 56 |                     height=c.SIZE / c.GRID_LEN,
 57 |                 )
 58 |                 cell.grid(row=i, column=j, padx=c.GRID_PADDING, pady=c.GRID_PADDING)
 59 |                 t = Label(
 60 |                     master=cell,
 61 |                     text="",
 62 |                     bg=c.BACKGROUND_COLOR_CELL_EMPTY,
 63 |                     justify=CENTER,
 64 |                     font=c.FONT,
 65 |                     width=5,
 66 |                     height=2,
 67 |                 )
 68 |                 t.grid()
 69 |                 grid_row.append(t)
 70 | 
 71 |             self.grid_cells.append(grid_row)
 72 | 
 73 |     def update_grid_cells(self):
 74 |         for i in range(c.GRID_LEN):
 75 |             for j in range(c.GRID_LEN):
 76 |                 new_number = self.matrix[i][j]
 77 |                 if new_number == 0:
 78 |                     self.grid_cells[i][j].configure(
 79 |                         text="", bg=c.BACKGROUND_COLOR_CELL_EMPTY
 80 |                     )
 81 |                 else:
 82 |                     self.grid_cells[i][j].configure(
 83 |                         text=str(new_number),
 84 |                         bg=c.BACKGROUND_COLOR_DICT[new_number],
 85 |                         fg=c.CELL_COLOR_DICT[new_number],
 86 |                     )
 87 |         self.update_idletasks()
 88 | 
 89 |     def key_down(self, event):
 90 |         key = repr(event.char)
 91 |         if key == c.KEY_BACK and len(self.history_matrixs) > 1:
 92 |             self.matrix = self.history_matrixs.pop()
 93 |             self.update_grid_cells()
 94 |             print("back on step total step:", len(self.history_matrixs))
 95 |         elif key in self.commands:
 96 |             self.matrix, done, rew = self.commands[repr(event.char)](self.matrix)
 97 |             if done:
 98 |                 self.matrix = logic.add_two(self.matrix)
 99 |                 # record last move
100 |                 self.history_matrixs.append(self.matrix)
101 |                 self.update_grid_cells()
102 |                 if logic.game_state(self.matrix) == "win":
103 |                     self.grid_cells[1][1].configure(
104 |                         text="You", bg=c.BACKGROUND_COLOR_CELL_EMPTY
105 |                     )
106 |                     self.grid_cells[1][2].configure(
107 |                         text="Win!", bg=c.BACKGROUND_COLOR_CELL_EMPTY
108 |                     )
109 |                 if logic.game_state(self.matrix) == "lose":
110 |                     self.grid_cells[1][1].configure(
111 |                         text="You", bg=c.BACKGROUND_COLOR_CELL_EMPTY
112 |                     )
113 |                     self.grid_cells[1][2].configure(
114 |                         text="Lose!", bg=c.BACKGROUND_COLOR_CELL_EMPTY
115 |                     )
116 | 
117 |     def generate_next(self):
118 |         index = (gen(), gen())
119 |         while self.matrix[index[0]][index[1]] != 0:
120 |             index = (gen(), gen())
121 |         self.matrix[index[0]][index[1]] = 2
122 | 
123 | 
124 | game_grid = GameGrid()
125 | 


--------------------------------------------------------------------------------
/PathmindPolicyHelper/README.md:
--------------------------------------------------------------------------------
 1 | # PathmindHelper
 2 | 
 3 | ## Introduction
 4 | 
 5 | This is the "PathmindHelper" palette item that users can import in their AnyLogic model to make them ready to be uploaded to the Pathmind Web App. It assumes they will get processed by NativeRL.
 6 | 
 7 | ## Required Software
 8 | 
 9 | - AnyLogic on Linux, Mac, or Windows
10 | 
11 | ## Build Instructions
12 | 
13 | 1.  Run the build for NativeRL to get `../nativerl-policy/target/nativerl-policy-1.8.1-SNAPSHOT.jar`
14 | 2.  Launch AnyLogic and inside it:
15 |     1. Open the `PathmindPolicyHelper.alp` model
16 |     2. Make sure `../nativerl-policy/target/nativerl-policy-1.8.1-SNAPSHOT.jar` is found as a dependency
17 |     3. Click on "Pathmind" in the Projects view
18 |     4. Go to Exporting -> Export the Library -> Finish
19 | 
20 | By default, this outputs a `PathmindHelper.jar` file and a copy of its dependencies. We can further add to that JAR the files from `../nativerl-policy/target/nativerl-policy-1.8.1-SNAPSHOT.jar` to simplify the end user experience, but this also requires modifying the `library.xml` file manually to remove the dependency on the JAR file.
21 | 
22 | - The `bundle.sh` script file automates this process and outputs the final archive to `target/PathmindHelper.jar`
23 | - We can also call `fixup.sh` instead to rename the JAR file for NativeRL Policy to `PathmindPolicy.jar` and fix up the class path in `PathmindHelper.jar` accordingly. This way, however, AnyLogic won't copy `PathmindPolicy.jar` or its content on export.
24 | 
25 | ## End User Workflow
26 | 
27 | This is an overall end user workflow as reference about how the user experience is meant to be, in this case for the Traffic Light Phases example:
28 | 
29 | 1.  Drag PathmindHelper from the palette to the Main Agent's drawing space
30 | 2.  Fill up Observations, Reward Variables, Actions, Action Masks, etc (among other options), like this:
31 | 
32 |     ```java
33 |         class Observations {
34 |             double obs[] = getObservation(false);
35 |         }
36 |         class Reward {
37 |             double vars[] = getObservation(true);
38 |         }
39 |         class Actions {
40 |             @Discrete(n = 2) long action;
41 |             void doIt() { doAction(action); }
42 |         }
43 |         class ActionMasks {
44 |             boolean[] mask = getMask();
45 |         }
46 |     ```
47 | 
48 |     - Here we're asking users to define _private_ inner classes since AnyLogic doesn't offer any way to let them define _public_ inner classes. **(This is something they need to fix.)**
49 | 
50 | 3.  Export model via the dummy Simulation and upload to the Pathmind Web App
51 | 4.  Write code snippets like this in the web app:
52 | 
53 |     ```java
54 |         CLASS_SNIPPET='
55 |             int simCount = 0;
56 |             String combinations[][] = {
57 |                     {"constant_moderate", "constant_moderate"},
58 |                     {"none_til_heavy_afternoon_peak", "constant_moderate"},
59 |                     {"constant_moderate", "none_til_heavy_afternoon_peak"},
60 |                     {"peak_afternoon", "peak_morning"},
61 |                     {"peak_morning", "peak_afternoon"}
62 |             };
63 |         '
64 | 
65 |         RESET_SNIPPET='
66 |             simCount++;
67 |             agent.schedNameNS = combinations[simCount % combinations.length][0];
68 |             agent.schedNameEW = combinations[simCount % combinations.length][1];
69 |         '
70 | 
71 |         OBSERVATION_SNIPPET='
72 |             out = in.obs;
73 |         '
74 | 
75 |         REWARD_SNIPPET='
76 |             double[] s0 = before.vars, s1 = after.vars;
77 |             // change in forward + intersection delay
78 |             double delay0 = s0[0] + s0[2] + s0[4] + s0[6] + s0[8];
79 |             double delay1 = s1[0] + s1[2] + s1[4] + s1[6] + s1[8];
80 |             reward = delay0 - delay1;
81 |             if (delay0 > 0 || delay1 > 0) {
82 |                 reward /= Math.max(delay0, delay1);
83 |             }
84 |         '
85 | 
86 |         METRICS_SNIPPET='
87 |             metrics = new double[] { agent.tisDS.getYMean() };
88 |         '
89 |     ```
90 | 
91 | 5.  Perform training, etc (the web app doesn't need to do anything more than it's already doing for RLlib)
92 | 6.  Export and download policies back to AnyLogic
93 |     - The web app here needs to take the `PolicyObservationFilter.class` file generated and compiled by NativeRL, which contains the `OBSERVATION_SNIPPET` and implements `ObservationFilter`, and bundle it in the zip file along with the TensorFlow SavedModel. Right now, because AnyLogic doesn't support _public_ inner classes, we need to make do with _private_ inner classes, and that involves a couple of ugly hacks, but it's workable for now. **(Again, this is something they need to fix!)**
94 | 7.  Run the Simulation as usual and everything just works!
95 | 
96 | ## End User Guide
97 | 
98 | Currently maintained on [Basecamp](https://3.basecamp.com/3684163/buckets/11875773/messages/2017431518).
99 | 


--------------------------------------------------------------------------------