├── .dockerignore ├── .github └── workflows │ ├── base_py38.yml │ ├── gpu.yml │ └── python-publish.yml ├── .gitignore ├── 3rd └── winehq.key ├── CHANGELOG.md ├── GAMECORE.LICENSE ├── LICENSE ├── README.md ├── aiarena ├── 1v1 │ ├── __init__.py │ ├── actor │ │ ├── __init__.py │ │ ├── actor.py │ │ ├── agent.py │ │ ├── agent_demo.py │ │ ├── config.json │ │ ├── custom.py │ │ ├── entry.py │ │ ├── model.py │ │ ├── rl_data_info.py │ │ ├── sample_manager.py │ │ └── server.py │ ├── common │ │ ├── algorithm_tf.py │ │ ├── algorithm_torch.py │ │ └── config.py │ └── learner │ │ ├── __init__.py │ │ ├── config │ │ └── common.conf │ │ └── train.py ├── 3v3 │ ├── __init__.py │ ├── actor │ │ ├── __init__.py │ │ ├── actor.py │ │ ├── agent │ │ │ ├── __init__.py │ │ │ ├── agent.py │ │ │ ├── agent_demo.py │ │ │ ├── common_ai_agent.py │ │ │ └── random_agent.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ └── model_config.py │ │ ├── entry.py │ │ ├── frozen.py │ │ ├── kaiwu.py │ │ ├── model │ │ │ ├── pytorch │ │ │ │ └── model.py │ │ │ └── tensorflow │ │ │ │ └── model.py │ │ ├── rl_data_info.py │ │ ├── sample_manager.py │ │ └── server.py │ └── learner │ │ ├── __init__.py │ │ ├── config │ │ ├── Config.py │ │ ├── DimConfig.py │ │ ├── __init__.py │ │ └── common.conf │ │ ├── kaiwu.py │ │ ├── networkmodel │ │ ├── pytorch │ │ │ └── NetworkModel.py │ │ └── tensorflow │ │ │ └── NetworkModel.py │ │ └── train.py ├── __init__.py ├── battle │ └── battle.py ├── grafana │ ├── dashboards │ │ ├── 1v1-cpu.json │ │ ├── 1v1-gpu.json │ │ └── hok.json │ └── etc │ │ ├── grafana.ini │ │ └── provisioning │ │ ├── dashboards │ │ └── 3v3.yaml │ │ └── datasources │ │ └── 3v3.yaml ├── process │ ├── __init__.py │ ├── actor_process.py │ ├── config_process.py │ ├── learner.iplist │ ├── learner.py │ ├── model_pool.py │ ├── monitor.py │ ├── process_base.py │ ├── run.py │ ├── send_model.py │ └── sshd.py ├── remote-gc-server │ ├── monitor_defunct.sh │ ├── process.py │ ├── run_and_monitor_gamecore_server.sh │ ├── run_gamecore_server.sh │ ├── sgame_simulator_remote_zmq │ ├── sgame_simulator_repeated_zmq │ ├── start_gamecore_server.sh │ └── test_client.py └── scripts │ ├── actor │ ├── kill.sh │ ├── learner.iplist │ ├── monitor_actor.sh │ ├── parse_iplist.py │ └── start_actor.sh │ ├── build_code.sh │ ├── check_file_update.sh │ ├── kill_all.sh │ ├── learner │ ├── kill.sh │ ├── learner.iplist │ ├── parse_iplist.py │ ├── ssh-copy-id.expect │ ├── start_learner.sh │ └── start_monitor.sh │ ├── start_dev.sh │ ├── start_test.sh │ └── stop_dev.sh ├── archive └── 3v3 │ ├── cppo │ └── NetworkModel.py │ ├── mappo │ └── NetworkModel.py │ └── ppo │ └── NetworkModel.py ├── build.sh ├── dockerfile ├── dockerfile.base ├── dockerfile.base.cpu ├── dockerfile.base.gpu ├── dockerfile.base.torch ├── dockerfile.dev └── dockerfile.gamecore ├── docs ├── cluster.md ├── hok_1v1.png ├── replay-tool.gif ├── run_windows_gamecore_on_linux.md ├── run_with_prebuilt_image.md └── sgame_folder.png ├── hok_env ├── MANIFEST.in ├── __init__.py ├── env.yaml ├── hok │ ├── __init__.py │ ├── common │ │ ├── __init__.py │ │ ├── camp.py │ │ ├── gamecore_client.py │ │ ├── log.py │ │ └── server_process.py │ ├── hok1v1 │ │ ├── __init__.py │ │ ├── agent.py │ │ ├── config.dat │ │ ├── config.json │ │ ├── default_hero_config.json │ │ ├── env1v1.py │ │ ├── hero_config.py │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── interface.cpython-36m-x86_64-linux-gnu.so │ │ │ ├── interface.cpython-37m-x86_64-linux-gnu.so │ │ │ ├── interface.cpython-38-x86_64-linux-gnu.so │ │ │ └── interface.cpython-39-x86_64-linux-gnu.so │ │ ├── server.py │ │ ├── server_delay.py │ │ ├── unit_test │ │ │ ├── __init__.py │ │ │ ├── config.json │ │ │ └── test_env.py │ │ └── version.py │ ├── hok3v3 │ │ ├── __init__.py │ │ ├── action_space.py │ │ ├── action_space_test.py │ │ ├── config.dat │ │ ├── default_hero_config.json │ │ ├── env.py │ │ ├── hero_config.py │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── lib3v3.cpython-36m-x86_64-linux-gnu.so │ │ │ ├── lib3v3.cpython-37m-x86_64-linux-gnu.so │ │ │ ├── lib3v3.cpython-38-x86_64-linux-gnu.so │ │ │ ├── lib3v3.cpython-39-x86_64-linux-gnu.so │ │ │ └── libinterface.so │ │ ├── reward.py │ │ ├── server.py │ │ └── unit_test │ │ │ ├── __init__.py │ │ │ └── test_env.py │ └── version.py ├── pyproject.toml └── setup.py └── rl_framework ├── common ├── rl_framework │ ├── __init__.py │ └── common │ │ ├── __init__.py │ │ ├── algorithms │ │ ├── __init__.py │ │ └── base │ │ │ ├── __init__.py │ │ │ ├── algorithm.py │ │ │ └── model.py │ │ ├── lib_socket │ │ ├── __init__.py │ │ ├── tcp_socket.py │ │ ├── tcp_socket_noblocking.py │ │ ├── utils.py │ │ └── zmq_socket.py │ │ ├── logging │ │ └── __init__.py │ │ ├── stat │ │ ├── __init__.py │ │ ├── sys_stat.py │ │ └── sys_stat_test.py │ │ └── utils │ │ ├── __init__.py │ │ ├── cmd_argparser.py │ │ ├── cmd_argparser_test.py │ │ ├── common_func.py │ │ ├── config_control.py │ │ ├── config_control_test.py │ │ ├── trace_malloc.py │ │ └── trace_malloc_test.py └── setup.py ├── learner ├── rl_framework │ ├── __init__.py │ └── learner │ │ ├── __init__.py │ │ ├── algorithms │ │ ├── __init__.py │ │ └── base │ │ │ ├── __init__.py │ │ │ ├── algorithm.py │ │ │ └── model.py │ │ ├── dataset │ │ ├── __init__.py │ │ ├── lock_free_queue │ │ │ ├── __init__.py │ │ │ └── lock_free_queue_shallow.py │ │ ├── network_dataset │ │ │ ├── __init__.py │ │ │ ├── common │ │ │ │ ├── __init__.py │ │ │ │ ├── batch_process.py │ │ │ │ └── sample_manager.py │ │ │ ├── pytorch │ │ │ │ ├── __init__.py │ │ │ │ ├── network_dataset_random.py │ │ │ │ └── network_dataset_zmq.py │ │ │ └── tensorflow │ │ │ │ ├── __init__.py │ │ │ │ ├── network_dataset_random.py │ │ │ │ ├── network_dataset_socket_async.py │ │ │ │ └── network_dataset_zmq.py │ │ └── sample_generation │ │ │ ├── __init__.py │ │ │ └── offline_rlinfo_adapter.py │ │ ├── example │ │ └── __init__.py │ │ └── framework │ │ ├── __init__.py │ │ ├── common │ │ ├── __init__.py │ │ ├── config_control.py │ │ └── log_manager.py │ │ ├── pytorch │ │ ├── __init__.py │ │ ├── apd_benchmark.py │ │ ├── apd_datasets.py │ │ ├── model_manager.py │ │ ├── node_info_ddp.py │ │ ├── node_info_hvd.py │ │ └── step_context.py │ │ └── tensorflow │ │ ├── __init__.py │ │ ├── apd_benchmark.py │ │ ├── apd_datasets.py │ │ ├── apd_model.py │ │ ├── gradient_fusion.py │ │ └── model_manager.py └── setup.py ├── mem_pool ├── rl_framework │ ├── __init__.py │ └── mem_pool │ │ ├── __init__.py │ │ ├── mem_pool_api │ │ ├── __init__.py │ │ ├── mem_pool_apis.py │ │ └── mem_pool_protocol.py │ │ └── zmq_mem_pool_server │ │ ├── __init__.py │ │ └── zmq_mem_pool.py ├── setup.py └── test │ ├── test_mem_pool_apis.py │ └── test_mem_pool_apis_short.py ├── model_pool ├── pkg │ └── model_pool_pkg │ │ ├── bin │ │ ├── modelpool │ │ ├── modelpool_proxy │ │ └── mv.sh │ │ ├── config │ │ ├── trpc_go.yaml.cpu │ │ └── trpc_go.yaml.gpu │ │ └── op │ │ ├── set_cpu_config.sh │ │ ├── set_gpu_config.sh │ │ ├── start.sh │ │ └── stop.sh ├── rl_framework │ ├── __init__.py │ └── model_pool │ │ ├── __init__.py │ │ └── model_pool_api │ │ ├── __init__.py │ │ ├── modelPoolClient.py │ │ └── model_pool_apis.py └── setup.py ├── monitor ├── MANIFEST.in ├── pyproject.toml ├── rl_framework │ ├── __init__.py │ └── monitor │ │ ├── __init__.py │ │ └── loglib │ │ ├── __init__.py │ │ ├── influxdb_handler.py │ │ └── loglib.conf └── setup.cfg ├── predictor ├── README.md ├── rl_framework │ ├── __init__.py │ └── predictor │ │ ├── __init__.py │ │ ├── predictor │ │ ├── __init__.py │ │ ├── base_predictor.py │ │ ├── infer_input_output.py │ │ ├── local_predictor.py │ │ ├── local_torch_predictor.py │ │ └── remote_predictor.py │ │ └── utils │ │ └── __init__.py └── setup.py └── send_model ├── check_and_send_checkpoint.py ├── model_no_syn.py ├── model_syn_base.py └── model_syn_model_pool.py /.dockerignore: -------------------------------------------------------------------------------- 1 | **/.git/ 2 | **/*.egg-info/ 3 | **/__pycache__/ 4 | **/GameAiMgr_*.txt 5 | aiarena/checkpoints 6 | aiarena/logs 7 | aiarena/scripts/actor/learner.iplist.new 8 | aiarena/scripts/learner/learner.iplist.new 9 | aiarena/code 10 | -------------------------------------------------------------------------------- /.github/workflows/base_py38.yml: -------------------------------------------------------------------------------- 1 | name: py38base 2 | on: 3 | push: 4 | tags: 5 | - 'base_v*.*.*' 6 | 7 | jobs: 8 | docker: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - 12 | name: Checkout 13 | uses: actions/checkout@v3 14 | - 15 | name: Set up Docker Buildx 16 | uses: docker/setup-buildx-action@v2 17 | - 18 | name: Login to DockerHub 19 | if: github.event_name != 'pull_request' 20 | uses: docker/login-action@v2 21 | with: 22 | username: ${{ secrets.DOCKERHUB_USERNAME }} 23 | password: ${{ secrets.DOCKERHUB_TOKEN }} 24 | - 25 | name: Docker meta 26 | id: meta_base_common_py38 27 | uses: docker/metadata-action@v4 28 | with: 29 | images: | 30 | tencentailab/hok_env 31 | flavor: | 32 | prefix=base_ 33 | latest=false 34 | tags: | 35 | type=sha,prefix=base_common_py38_ 36 | type=raw,value={{tag}},prefix=base_common_py38_ 37 | - 38 | name: Build and push 39 | uses: docker/build-push-action@v3 40 | with: 41 | context: . 42 | push: ${{ github.event_name != 'pull_request' }} 43 | tags: ${{ steps.meta_base_common_py38.outputs.tags }} 44 | labels: ${{ steps.meta_base_common_py38.outputs.labels }} 45 | file: dockerfile/dockerfile.base 46 | cache-from: type=registry,ref=tencentailab/hok_env:base_common_py38_buildcache 47 | cache-to: type=registry,ref=tencentailab/hok_env:base_common_py38_buildcache,mode=max 48 | -------------------------------------------------------------------------------- /.github/workflows/gpu.yml: -------------------------------------------------------------------------------- 1 | name: gpu 2 | on: 3 | push: 4 | branches: 5 | - 'master' 6 | tags: 7 | - 'v*.*.*' 8 | pull_request: 9 | branches: 10 | - 'master' 11 | 12 | jobs: 13 | docker: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - 17 | name: Checkout 18 | uses: nschloe/action-cached-lfs-checkout@v1 19 | - 20 | name: Set up Docker Buildx 21 | uses: docker/setup-buildx-action@v2 22 | - 23 | name: Login to DockerHub 24 | if: github.event_name != 'pull_request' 25 | uses: docker/login-action@v2 26 | with: 27 | username: ${{ secrets.DOCKERHUB_USERNAME }} 28 | password: ${{ secrets.DOCKERHUB_TOKEN }} 29 | - 30 | name: Docker meta 31 | id: meta_gpu_base 32 | uses: docker/metadata-action@v4 33 | with: 34 | images: | 35 | tencentailab/hok_env 36 | flavor: | 37 | prefix=gpu_base_ 38 | tags: | 39 | type=semver,pattern={{raw}} 40 | type=sha,prefix=gpu_base_ 41 | - 42 | name: Build and push 43 | uses: docker/build-push-action@v3 44 | with: 45 | context: . 46 | push: ${{ github.event_name != 'pull_request' }} 47 | tags: ${{ steps.meta_gpu_base.outputs.tags }} 48 | labels: ${{ steps.meta_gpu_base.outputs.labels }} 49 | file: dockerfile/dockerfile.base.torch 50 | target: gpu 51 | cache-from: type=registry,ref=tencentailab/hok_env:buildcache_gpu_base 52 | cache-to: type=registry,ref=tencentailab/hok_env:buildcache_gpu_base,mode=max 53 | build-args: | 54 | BASE_IMAGE=tencentailab/hok_env:base_common_py38_base_v2.0.2 55 | - 56 | name: Docker meta 57 | id: meta_gpu 58 | uses: docker/metadata-action@v4 59 | with: 60 | images: | 61 | tencentailab/hok_env 62 | flavor: | 63 | prefix=gpu_ 64 | tags: | 65 | type=semver,pattern={{raw}} 66 | type=sha,prefix=gpu_ 67 | - 68 | name: Build and push 69 | uses: docker/build-push-action@v3 70 | with: 71 | context: . 72 | push: ${{ github.event_name != 'pull_request' }} 73 | tags: ${{ steps.meta_gpu.outputs.tags }} 74 | labels: ${{ steps.meta_gpu.outputs.labels }} 75 | file: dockerfile/dockerfile.dev 76 | cache-from: type=registry,ref=tencentailab/hok_env:buildcache_gpu 77 | cache-to: type=registry,ref=tencentailab/hok_env:buildcache_gpu,mode=max 78 | target: battle 79 | build-args: | 80 | BASE_IMAGE=tencentailab/hok_env:gpu_base_v2.0.2 81 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | runs-on: ubuntu-latest 21 | defaults: 22 | run: 23 | working-directory: ./hok_env 24 | steps: 25 | - uses: actions/checkout@v3 26 | - name: Set up Python 27 | uses: actions/setup-python@v3 28 | with: 29 | python-version: '3.x' 30 | - name: Install dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install build 34 | - name: Build package 35 | run: python -m build 36 | - name: Publish package 37 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 38 | with: 39 | packages_dir: hok_env/dist 40 | user: ${{ secrets.PYPI_API_USER }} 41 | password: ${{ secrets.PYPI_API_TOKEN }} 42 | 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | license.dat 2 | hok_env.zip 3 | dist 4 | *.egg-info 5 | __pycache__ 6 | GameAiMgr_*.txt 7 | aiarena/checkpoints 8 | aiarena/logs 9 | aiarena/scripts/actor/learner.iplist.new 10 | aiarena/scripts/learner/learner.iplist.new 11 | aiarena/code 12 | -------------------------------------------------------------------------------- /3rd/winehq.key: -------------------------------------------------------------------------------- 1 | -----BEGIN PGP PUBLIC KEY BLOCK----- 2 | 3 | mQGNBFwOmrgBDAC9FZW3dFpew1hwDaqRfdQQ1ABcmOYu1NKZHwYjd+bGvcR2LRGe 4 | R5dfRqG1Uc/5r6CPCMvnWxFprymkqKEADn8eFn+aCnPx03HrhA+lNEbciPfTHylt 5 | NTTuRua7YpJIgEOjhXUbxXxnvF8fhUf5NJpJg6H6fPQARUW+5M//BlVgwn2jhzlW 6 | U+uwgeJthhiuTXkls9Yo3EoJzmkUih+ABZgvaiBpr7GZRw9GO1aucITct0YDNTVX 7 | KA6el78/udi5GZSCKT94yY9ArN4W6NiOFCLV7MU5d6qMjwGFhfg46NBv9nqpGinK 8 | 3NDjqCevKouhtKl2J+nr3Ju3Spzuv6Iex7tsOqt+XdZCoY+8+dy3G5zbJwBYsMiS 9 | rTNF55PHtBH1S0QK5OoN2UR1ie/aURAyAFEMhTzvFB2B2v7C0IKIOmYMEG+DPMs9 10 | FQs/vZ1UnAQgWk02ZiPryoHfjFO80+XYMrdWN+RSo5q9ODClloaKXjqI/aWLGirm 11 | KXw2R8tz31go3NMAEQEAAbQnV2luZUhRIHBhY2thZ2VzIDx3aW5lLWRldmVsQHdp 12 | bmVocS5vcmc+iQHOBBMBCgA4AhsDBQsJCAcCBhUKCQgLAgQWAgMBAh4BAheAFiEE 13 | 1D9kAUU2nFHXht3qdvGiD/mHZy8FAlwOmyUACgkQdvGiD/mHZy/zkwv7B+nKFlDY 14 | Bzz/7j0gqIODbs5FRZRtuf/IuPP3vZdWlNfAW/VyaLtVLJCM/mmaf/O6/gJ+D+E9 15 | BBoSmHdHzBBOQHIj5IbRedynNcHT5qXsdBeU2ZPR50sdE+jmukvw3Wa5JijoDgUu 16 | LGLGtU48Z3JsBXQ54OlnTZXQ2SMFhRUa10JANXSJQ+QY2Wo2Pi2+MEAHcrd71A2S 17 | 0mT2DQSSBQ92c6WPfUpOSBawd8P0ipT7rVFNLJh8HVQGyEWxPl8ecDEHoVfG2rdV 18 | D0ADbNLx9031UUwpUicO6vW/2Ec7c3VNG1cpOtyNTw/lEgvsXOh3GQs/DvFvMy/h 19 | QzaeF3Qq6cAPlKuxieJe4lLYFBTmCAT4iB1J8oeFs4G7ScfZH4+4NBe3VGoeCD/M 20 | Wl+qxntAroblxiFuqtPJg+NKZYWBzkptJNhnrBxcBnRinGZLw2k/GR/qPMgsR2L4 21 | cP+OUuka+R2gp9oDVTZTyMowz+ROIxnEijF50pkj2VBFRB02rfiMp7q6iQIzBBAB 22 | CgAdFiEE2iNXmnTUrZr50/lFzvrI6q8XUZ0FAlwOm3AACgkQzvrI6q8XUZ3KKg/+ 23 | MD8CgvLiHEX90fXQ23RZQRm2J21w3gxdIen/N8yJVIbK7NIgYhgWfGWsGQedtM7D 24 | hMwUlDSRb4rWy9vrXBaiZoF3+nK9AcLvPChkZz28U59Jft6/l0gVrykey/ERU7EV 25 | w1Ie1eRu0tRSXsKvMZyQH8897iHZ7uqoJgyk8U8CvSW+V80yqLB2M8Tk8ECZq34f 26 | HqUIGs4Wo0UZh0vV4+dEQHBh1BYpmmWl+UPf7nzNwFWXu/EpjVhkExRqTnkEJ+Ai 27 | OxbtrRn6ETKzpV4DjyifqQF639bMIem7DRRf+mkcrAXetvWkUkE76e3E9KLvETCZ 28 | l4SBfgqSZs2vNngmpX6Qnoh883aFo5ZgVN3v6uTS+LgTwMt/XlnDQ7+Zw+ehCZ2R 29 | CO21Y9Kbw6ZEWls/8srZdCQ2LxnyeyQeIzsLnqT/waGjQj35i4exzYeWpojVDb3r 30 | tvvOALYGVlSYqZXIALTx2/tHXKLHyrn1C0VgHRnl+hwv7U49f7RvfQXpx47YQN/C 31 | PWrpbG69wlKuJptr+olbyoKAWfl+UzoO8vLMo5njWQNAoAwh1H8aFUVNyhtbkRuq 32 | l0kpy1Cmcq8uo6taK9lvYp8jak7eV8lHSSiGUKTAovNTwfZG2JboGV4/qLDUKvpa 33 | lPp2xVpF9MzA8VlXTOzLpSyIVxZnPTpL+xR5P9WQjMS5AY0EXA6auAEMAMReKL89 34 | 0z0SL+/i/geB/agfG/k6AXiG2a9kVWeIjAqFwHKl9W/DTNvOqCDgAt51oiHGRRjt 35 | 1Xm3XZD4p+GM1uZWn9qIFL49Gt5x94TqdrsKTVCJr0Kazn2mKQc7aja0zac+WtZG 36 | OFn7KbniuAcwtC780cyikfmmExLI1/Vjg+NiMlMtZfpK6FIW+ulPiDQPdzIhVppx 37 | w9/KlR2Fvh4TbzDsUqkFQSSAFdQ65BWgvzLpZHdKO/ILpDkThLbipjtvbBv/pHKM 38 | O/NFTNoYkJ3cNW/kfcynwV+4AcKwdRz2A3Mez+g5TKFYPZROIbayOo01yTMLfz2p 39 | jcqki/t4PACtwFOhkAs+MYPPyZDUkTFcEJQCPDstkAgmJWI3K2qELtDOLQyps3WY 40 | Mfp+mntOdc8bKjFTMcCEk1zcm14K4Oms+w6dw2UnYsX1FAYYhPm8HUYwE4kP8M+D 41 | 9HGLMjLqqF/kanlCFZs5Avx3mDSAx6zS8vtNdGh+64oDNk4x4A2j8GTUuQARAQAB 42 | iQG8BBgBCgAmFiEE1D9kAUU2nFHXht3qdvGiD/mHZy8FAlwOmrgCGwwFCQPCZwAA 43 | CgkQdvGiD/mHZy9FnAwAgfUkxsO53Pm2iaHhtF4+BUc8MNJj64Jvm1tghr6PBRtM 44 | hpbvvN8SSOFwYIsS+2BMsJ2ldox4zMYhuvBcgNUlix0G0Z7h1MjftDdsLFi1DNv2 45 | J9dJ9LdpWdiZbyg4Sy7WakIZ/VvH1Znd89Imo7kCScRdXTjIw2yCkotE5lK7A6Ns 46 | NbVuoYEN+dbGioF4csYehnjTdojwF/19mHFxrXkdDZ/V6ZYFIFxEsxL8FEuyI4+o 47 | LC3DFSA4+QAFdkjGFXqFPlaEJxWt5d7wk0y+tt68v+ulkJ900BvR+OOMqQURwrAi 48 | iP3I28aRrMjZYwyqHl8i/qyIv+WRakoDKV+wWteR5DmRAPHmX2vnlPlCmY8ysR6J 49 | 2jUAfuDFVu4/qzJe6vw5tmPJMdfvy0W5oogX6sEdin5M5w2b3WrN8nXZcjbWymqP 50 | 6jCdl6eoCCkKNOIbr/MMSkd2KqAqDVM5cnnlQ7q+AXzwNpj3RGJVoBxbS0nn9JWY 51 | QNQrWh9rAcMIGT+b1le0 52 | =4lsa 53 | -----END PGP PUBLIC KEY BLOCK----- 54 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 20231228 v2.0.4 2 | rl_framework: 3 | 1. refactor(logger): utilize logurus as logger 4 | `rl_framework.common.logging` should be replaced by `from rl_framework.common.logging import logger as LOG`. 5 | 2. feat(model_manager): support `save_model_seconds`. 6 | 3. feat(model_manager): send checkpoints without optimizer state to reduce disk usage cost. 7 | 4. feat(send_model): support `backup_ckpt_only`. 8 | 9 | aiarena: 10 | 1. fix(1v1/agent_demo): typos 11 | 2. feat(1v1/agent_demo): return home if ego_hp_rate is less than 0.5. 12 | 3. refactor(1v1/3v3): improve code and remove redundant configurations. 13 | 4. feat(actor): support `auto_bind_cpu` to bind cpu_id for each actor process according to actor_id. 14 | 5. feat(learner): support `load_optimizer_state`. 15 | 6. fix(3v3/model): typos 16 | 17 | hok_env: 18 | 1. feat(3v3): support reward configuration. 19 | 20 | Others: 21 | 1. Introduce GitHub workflow to upload Python package hok to pypi for every release. 22 | 2. Archive network.py for the 3v3 paper (cppo, mappo, ppo). 23 | 3. Use a torch-only image, tensorflow training code is now deprecated. 24 | 4. Update README.md. 25 | 26 | # 20230817 27 | 28 | 1. Refactor aiarena/hok_env/rl_framework 29 | 2. Support Python 3.6/3.8/3.9 for hok3v3 30 | 3. Update config.dat for hok1v1/hok3v3 to support more heroes 31 | 4. Add aiarena/process to run the rl_framework with python 32 | 5. Fix bugs hok1v1/lib, hok3v3/lib) 33 | 34 | # 20230607 35 | 36 | 3v3 mode now available(python 3.7 required) 37 | 38 | 1. Refactor hok_env: renamed hok -> hok1v1 39 | 2. Support pytorch in rl_framework 40 | 3. Support 3v3 mode: hok3v3 added 41 | 1. Run the unit test 42 | ``` 43 | python -c "from hok.hok3v3.unit_test.test_env import run_test; run_test()" 44 | ``` 45 | 4. Example: 3v3 dev image 46 | 1. build image 47 | ``` 48 | docker build -t test -f dockerfile/dockerfile.dev.3v3 . 49 | ``` 50 | 2. run train test (start gamecore server on `127.0.0.1:23432` before the test) 51 | ``` 52 | docker run -it --network host test bash 53 | sh /aiarena/scripts/start_test.sh 54 | ``` 55 | 56 | # 20230110 57 | 58 | Support running Windows gamecore on Linux using Wine: 59 | 60 | 1. Update gamecore to fix the compatibility with the Wine 61 | 62 | 2. Add gamecore-server-linux-amd64 to the gamecore pcakge 63 | 64 | 3. Add remote gamecore server 65 | ``` 66 | export SIMULATOR_USE_WINE=1 67 | nohup sh /rl_framework/remote-gc-server/run_and_monitor_gamecore_server.sh & 68 | ``` 69 | 70 | 4. Update dockerfile: use ubuntu as the base image 71 | ``` 72 | sh ./build.sh 73 | ``` 74 | See also [Github Action](./.github/workflows/) 75 | 76 | 5. Sync codes 77 | 1. Support `SLOW_TIME` 78 | 2. Fix `NET_CARD_NAME` 79 | 3. remote check_and_send 80 | 4. Fix typos 81 | 5. Fix the zmq server bind error 82 | 6. Wait for the gamecore process done after `gameover` 83 | 84 | 6. Update hok_env/hok/lib/interface 85 | 1. Remove the init move 86 | 2. Support Python3.8 and Python3.9 87 | -------------------------------------------------------------------------------- /GAMECORE.LICENSE: -------------------------------------------------------------------------------- 1 | 腾讯王者荣耀AI与机器学习协议 2 | 重要提醒: 3 | 在安装或使用腾讯的AI和机器学习软件和环境(以下简称"本软件")之前,您应仔细阅读《腾讯王者荣耀AI与机器学习协议》(以下简称"本协议")。 如果您不同意本协议的所有条款,您不得安装或以其他方式访问本软件。 4 | 5 | 根据本协议的条款,您对本软件的使用受腾讯的《腾讯游戏许可及服务协议》管辖,该协议通过引用纳入本文,可在此查阅(https://jiazhang.qq.com/zk/smallProtocol.html)。在安装或使用本软件之前,请仔细阅读《腾讯游戏许可及服务协议》和本协议。 如果不同意《腾讯游戏许可及服务协议》和本协议的条款,您将不允许安装、复制或使用本软件。 6 | 7 | 1. 本软件的使用 8 | a. 仅限用于人工智能测试和机器学习: 在您遵守本协议的前提下, 腾讯授予您有限的、可撤销的、不可转授的许可以使用本软件, 并且仅允许使用本软件用于人工智能测试、机器学习和相关研究工作。 9 | b. 例外情况: 您对本软件的使用受腾讯《腾讯游戏许可及服务协议》条款的约束,但有以下几种例外情况 10 | i. 《腾讯游戏许可及服务协议》第6.4.(4)不应被理解为禁止使用本软件或从这种使用中产生或收集的数据。但本协议的任何部分都不赋予您创建、分发或以其他方式利用本软件的未经授权的衍生作品的权利。 11 | ii. 《腾讯游戏许可及服务协议》第6.4.(6)不应被理解为禁止对本软件使用自动化的程序。但本协议并不赋予您任何制作、发布、传播第三方软件、插件、外挂、系统的权利。 12 | 2. 所有权 13 | a. 《腾讯游戏许可及服务协议》第7条规定完全适用于本软件(包括其任何衍生产品),但您可以拥有并使用由授权使用本软件产生或收集的人工智能或机器学习数据。 14 | 15 | 16 | 17 | 18 | 19 | 20 | Tencent's Hornor of King AI And Machine Learning License 21 | IMPORTANT NOTICE: 22 | YOU SHOULD CAREFULLY READ THIS AGREEMENT (THE “AGREEMENT”) BEFORE INSTALLING OR USING TENCENT'S AI AND MACHINE LEARNING SOFTWARE AND ENVIRONMENT (THE “SOFTWARE”). IF YOU DO NOT AGREE WITH ALL OF THE TERMS OF THIS AGREEMENT, YOU MAY NOT INSTALL OR OTHERWISE ACCESS THE SOFTWARE. 23 | Subject to the terms of this Agreement, your use of the Software is governed by Tencent Game License and Service Agreement ("GLSA"), which is incorporated by reference herein and is available for review here(https://jiazhang.qq.com/zk/smallProtocol.html?lang=en). Please carefully review the GLSA and this Agreement prior to installing or using the Software. IF YOU DO NOT AGREE TO THE TERMS OF THE GLSA AND THIS AGREEMENT, YOU ARE NOT PERMITTED TO INSTALL, COPY, OR USE THE SOFTWARE. 24 | 25 | 1. Use Of The Software 26 | a. AI Testing And Machine Learning Use Only: Subject to your compliance with this Agreement, Tencent grants you a limited, revocable, non-sublicensable license to use the Software for purposes of AI testing, machine learning, and related research only. 27 | b. GLSA Exceptions: The terms of the Tencent Game License and Service Agreement govern your use of the Software, subject to the following narrow exceptions: 28 | i. The provisions of Section 6.4.(4) of the GLSA shall not be read to prohibit the authorized use of the Software or data generated or collected from such use. However, no portion of this Agreement shall give you the right to create, distribute, or otherwise exploit unauthorized derivative works of the Software. 29 | ii. The provisions of Section 6.4.(6) of the GLSA shall not be read to prohibit the use of automation processes for the Software. However, no portion of this Agreement shall give you the right to create, publish and propagate third-party software, plug-in, cheating tool and system that is not developed nor authorized by Tencent. 30 | 2. Ownership 31 | a. The provisions of Section 7 of the GLSA apply in full force to the Software (including any derivatives thereof), except that you may own and use the AI or machine learning data generated by or collected through the authorized use of the Software. 32 | -------------------------------------------------------------------------------- /aiarena/1v1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/aiarena/1v1/__init__.py -------------------------------------------------------------------------------- /aiarena/1v1/actor/__init__.py: -------------------------------------------------------------------------------- 1 | from aiarena.code.actor.actor import Actor 2 | -------------------------------------------------------------------------------- /aiarena/1v1/actor/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "reward_money": "0.006", 3 | "reward_exp": "0.006" , 4 | "reward_hp_point": "2.0", 5 | "reward_ep_rate": "0.75", 6 | "reward_kill": "-0.6", 7 | "reward_dead": "-1.0", 8 | "reward_tower_hp_point": "5.0", 9 | "reward_last_hit": "0.5", 10 | "log_level": "8" 11 | } -------------------------------------------------------------------------------- /aiarena/1v1/actor/custom.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rl_framework.common.logging import logger as LOG 3 | 4 | from agent import Agent as AgentBase 5 | 6 | HERO_ID_INDEX_DICT = { 7 | 112: 0, 8 | 121: 1, 9 | 123: 2, 10 | 131: 3, 11 | 132: 4, 12 | 133: 5, 13 | 140: 6, 14 | 141: 7, 15 | 146: 8, 16 | 150: 9, 17 | 154: 10, 18 | 157: 11, 19 | 163: 12, 20 | 169: 13, 21 | 175: 14, 22 | 182: 15, 23 | 193: 16, 24 | 199: 17, 25 | 502: 18, 26 | 513: 19, 27 | } 28 | 29 | 30 | class Agent(AgentBase): 31 | def append_hero_identity(self, state_dict): 32 | # hero identity feature (ont-hot) 33 | runtime_id = state_dict["player_id"] 34 | hero_id = None 35 | for hero in state_dict["req_pb"].hero_list: 36 | if hero.runtime_id == runtime_id: 37 | hero_id = hero.config_id 38 | 39 | if hero_id is None: 40 | raise Exception("can not find config_id for runtime_id") 41 | 42 | hero_id_vec = np.zeros( 43 | [ 44 | len(HERO_ID_INDEX_DICT), 45 | ], 46 | dtype=np.float32, 47 | ) 48 | if HERO_ID_INDEX_DICT.get(hero_id) is not None: 49 | hero_id_vec[HERO_ID_INDEX_DICT[hero_id]] = 1 50 | else: 51 | LOG.debug("Unknown hero_id for network: %s" % hero_id) 52 | state_dict["observation"] = np.concatenate( 53 | (state_dict["observation"], hero_id_vec), axis=0 54 | ) 55 | return state_dict 56 | 57 | def feature_post_process(self, state_dict): 58 | state_dict = self.append_hero_identity(state_dict) 59 | return state_dict 60 | -------------------------------------------------------------------------------- /aiarena/1v1/actor/model.py: -------------------------------------------------------------------------------- 1 | from rl_framework.common.utils.common_func import Singleton 2 | 3 | 4 | def get_model_class(backend): 5 | if backend == "pytorch": 6 | from common.algorithm_torch import Algorithm 7 | import torch 8 | 9 | torch.set_num_threads(1) 10 | torch.set_num_interop_threads(1) 11 | elif backend == "tensorflow": 12 | from common.algorithm_tf import Algorithm 13 | else: 14 | raise NotImplementedError( 15 | "check ModelConfig, backend=['tensorflow', 'pytorch']" 16 | ) 17 | 18 | # Singleton Pattern 19 | @Singleton 20 | class Model(Algorithm): 21 | def __init__(self): 22 | super().__init__() 23 | self.lstm_time_steps = 1 24 | self.batch_size = 1 25 | 26 | return Model 27 | -------------------------------------------------------------------------------- /aiarena/1v1/actor/rl_data_info.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | class RLDataInfo: 3 | def __init__(self): 4 | self.frame_no = -1 5 | self.feature = b"" 6 | self.next_feature = b"" 7 | self.reward = 0 8 | self.reward2 = 0 9 | self.reward_sum = 0 10 | self.reward_sum2 = 0 11 | self.action = 0 12 | self.action_list = [] 13 | self.done = 0 14 | self.info = None 15 | self.value = 0 16 | self.value2 = 0 17 | # self.neg_log_pis = 0 18 | self.advantage = 0 19 | self.game_id = b"" 20 | self.is_train = False 21 | self.is_game_over = 0 22 | self.task_uuid = b"" 23 | self.next_Q_value = b"" 24 | self.gamma_pow = 1 25 | 26 | self.prob = None 27 | self.sub_action = None 28 | self.next_value = 0 29 | self.next_value2 = 0 30 | self.lstm_info = None 31 | 32 | def struct_to_pb(self, off_policy_rl_info): 33 | off_policy_rl_info.frame_no = self.frame_no 34 | off_policy_rl_info.feature = self.feature 35 | off_policy_rl_info.next_feature = self.next_feature 36 | off_policy_rl_info.reward_sum = self.reward_sum 37 | off_policy_rl_info.reward = self.reward 38 | off_policy_rl_info.done = self.done 39 | off_policy_rl_info.value = self.value 40 | off_policy_rl_info.neg_log_pis = self.neg_log_pis 41 | off_policy_rl_info.action = self.action 42 | off_policy_rl_info.action_list.extend(self.action_list) 43 | off_policy_rl_info.advantage = self.advantage 44 | off_policy_rl_info.game_id = self.game_id 45 | off_policy_rl_info.is_train = self.is_train 46 | off_policy_rl_info.is_game_over = self.is_game_over 47 | off_policy_rl_info.uuid = self.task_uuid 48 | off_policy_rl_info.next_Q_value = self.next_Q_value 49 | off_policy_rl_info.gamma_pow = self.gamma_pow 50 | -------------------------------------------------------------------------------- /aiarena/1v1/actor/server.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | work_dir = os.path.dirname(os.path.abspath(__file__)) 5 | 6 | # chdir to work_dir to access the config.json with relative path 7 | os.chdir(work_dir) 8 | 9 | # sys.path.append("/aiarena/code/") add common to path 10 | sys.path.append(os.path.dirname(work_dir)) 11 | 12 | from absl import app as absl_app 13 | from absl import flags 14 | 15 | from hok.hok1v1.server import AIServer 16 | 17 | from custom import Agent 18 | from model import get_model_class 19 | from common.config import Config 20 | from rl_framework.common.logging import setup_logger 21 | 22 | DEFAULT_MODEL_PATH = os.path.join(work_dir, "model", "init") 23 | 24 | flags.DEFINE_string("server_addr", "tcp://0.0.0.0:35400", "address of server") 25 | flags.DEFINE_string("model_path", DEFAULT_MODEL_PATH, "path to checkpoint") 26 | 27 | 28 | def server(_): 29 | setup_logger(filename=None, level="INFO") 30 | 31 | FLAGS = flags.FLAGS 32 | Model = get_model_class(Config.backend) 33 | 34 | agent = Agent( 35 | Model(), 36 | model_pool_addr=None, 37 | config=Config, 38 | ) 39 | agent.reset(model_path=FLAGS.model_path) 40 | 41 | server = AIServer(agent, FLAGS.server_addr) 42 | server.run() 43 | 44 | 45 | if __name__ == "__main__": 46 | absl_app.run(server) 47 | -------------------------------------------------------------------------------- /aiarena/1v1/common/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class DimConfig: 5 | # main camp soldier 6 | DIM_OF_SOLDIER_1_10 = [18, 18, 18, 18] 7 | # enemy camp soldier 8 | DIM_OF_SOLDIER_11_20 = [18, 18, 18, 18] 9 | # main camp organ 10 | DIM_OF_ORGAN_1_2 = [18, 18] 11 | # enemy camp organ 12 | DIM_OF_ORGAN_3_4 = [18, 18] 13 | # main camp hero 14 | DIM_OF_HERO_FRD = [235] 15 | # enemy camp hero 16 | DIM_OF_HERO_EMY = [235] 17 | # public hero info 18 | DIM_OF_HERO_MAIN = [14] # main_hero_vec 19 | 20 | DIM_OF_GLOBAL_INFO = [25] 21 | 22 | 23 | class Config: 24 | backend = os.getenv("AIARENA_BACKEND", "pytorch") 25 | actor_num = int(os.getenv("ACTOR_NUM", "1")) 26 | auto_bind_cpu = os.getenv("AUTO_BIND_CPU", "0") == "1" 27 | 28 | # TODO refactor: learner only config 29 | use_init_model = os.getenv("AIARENA_USE_INIT_MODEL", "0") == "1" 30 | init_model_path = os.getenv( 31 | "AIARENA_INIT_MODEL_PATH", "/aiarena/code/learner/model/init/" 32 | ) 33 | load_optimizer_state = os.getenv("AIARENA_LOAD_OPTIMIZER_STATE", "1") == "1" 34 | NETWORK_NAME = "network" 35 | LSTM_TIME_STEPS = 16 36 | LSTM_UNIT_SIZE = 512 37 | DATA_SPLIT_SHAPE = [ 38 | 809, 39 | 1, 40 | 1, 41 | 1, 42 | 1, 43 | 1, 44 | 1, 45 | 1, 46 | 1, 47 | 12, 48 | 16, 49 | 16, 50 | 16, 51 | 16, 52 | 8, 53 | 1, 54 | 1, 55 | 1, 56 | 1, 57 | 1, 58 | 1, 59 | 1, 60 | 512, 61 | 512, 62 | ] 63 | SERI_VEC_SPLIT_SHAPE = [(725,), (84,)] 64 | INIT_LEARNING_RATE_START = 0.0001 65 | BETA_START = 0.025 66 | LOG_EPSILON = 1e-6 67 | LABEL_SIZE_LIST = [12, 16, 16, 16, 16, 8] 68 | IS_REINFORCE_TASK_LIST = [ 69 | True, 70 | True, 71 | True, 72 | True, 73 | True, 74 | True, 75 | ] # means each task whether need reinforce 76 | 77 | RMSPROP_DECAY = 0.9 78 | RMSPROP_MOMENTUM = 0.0 79 | RMSPROP_EPSILON = 0.01 80 | CLIP_PARAM = 0.2 81 | 82 | MIN_POLICY = 0.00001 83 | TASK_ID = 15428 84 | TASK_UUID = "a2dbb49f-8a67-4bd4-9dc5-69e78422e72e" 85 | 86 | TARGET_EMBED_DIM = 32 87 | 88 | data_keys = ( 89 | "observation,reward,advantage," 90 | "label0,label1,label2,label3,label4,label5," 91 | "prob0,prob1,prob2,prob3,prob4,prob5," 92 | "weight0,weight1,weight2,weight3,weight4,weight5," 93 | "is_train, lstm_cell, lstm_hidden_state" 94 | ) 95 | data_shapes = [ 96 | [12944], 97 | [16], 98 | [16], 99 | [16], 100 | [16], 101 | [16], 102 | [16], 103 | [16], 104 | [16], 105 | [192], 106 | [256], 107 | [256], 108 | [256], 109 | [256], 110 | [128], 111 | [16], 112 | [16], 113 | [16], 114 | [16], 115 | [16], 116 | [16], 117 | [16], 118 | [512], 119 | [512], 120 | ] 121 | key_types = ( 122 | "tf.float32,tf.float32,tf.float32," 123 | "tf.int32,tf.int32,tf.int32,tf.int32,tf.int32,tf.int32," 124 | "tf.float32,tf.float32,tf.float32,tf.float32,tf.float32,tf.float32," 125 | "tf.float32,tf.float32,tf.float32,tf.float32,tf.float32," 126 | "tf.float32,tf.float32,tf.float32,tf.float32" 127 | ) 128 | 129 | LEGAL_ACTION_SIZE_LIST = LABEL_SIZE_LIST.copy() 130 | LEGAL_ACTION_SIZE_LIST[-1] = LEGAL_ACTION_SIZE_LIST[-1] * LEGAL_ACTION_SIZE_LIST[0] 131 | slow_time = float(os.getenv("SLOW_TIME", "0").strip()) 132 | ENEMY_TYPE = "network" 133 | if os.getenv("ENEMY_TYPE") is not None: 134 | enemy_type = int(os.getenv("ENEMY_TYPE")) 135 | if enemy_type == 0: 136 | ENEMY_TYPE = "random" 137 | elif enemy_type == 1: 138 | ENEMY_TYPE = "common_ai" 139 | elif enemy_type == 2: 140 | ENEMY_TYPE = "network" 141 | EVAL_FREQ = 5 142 | GAMMA = 0.995 143 | LAMDA = 0.95 144 | IS_TRAIN = True 145 | -------------------------------------------------------------------------------- /aiarena/1v1/learner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/aiarena/1v1/learner/__init__.py -------------------------------------------------------------------------------- /aiarena/1v1/learner/config/common.conf: -------------------------------------------------------------------------------- 1 | [main] 2 | # backend 不要在这里修改, 会被common/config.py:backend覆盖 3 | backend=pytorch 4 | ips = 127.0.0.1 5 | ports = [35200] 6 | save_model_steps = 1000 7 | save_model_seconds = 120 8 | save_model_dir = /aiarena/checkpoints 9 | variable_update = horovod 10 | display_every = 200 11 | max_steps = 100000000 12 | train_dir = /aiarena/logs/learner/ 13 | distributed_backend = ddp 14 | print_variables = False 15 | print_timeline = False 16 | dump_profile = False 17 | batch_size = 512 18 | mem_process_num = 4 19 | push_to_modelpool = True 20 | mempool_path = /rl_framework/mem_pool_server_pkg/ 21 | send_model_dir = /rl_framework/send_model/model 22 | 23 | [model] 24 | # use_init_model不要在这里修改, 会被common/config.py:use_init_model覆盖 25 | use_init_model = False 26 | init_model_path = /aiarena/code/learner/model/init/ 27 | load_optimizer_state = True 28 | use_xla = True 29 | use_jit = False 30 | use_mix_precision = False 31 | use_fp16 = False 32 | channels_last = False 33 | has_unused_params = False 34 | 35 | [grads] 36 | check_values = False 37 | use_fusion = True 38 | piecewise_fusion_schedule = 28;40 39 | use_grad_clip = True 40 | grad_clip_range = 0.5 41 | sparse_as_dense = False 42 | to_fp16 = False 43 | use_xla_fusion = False 44 | 45 | [dataset] 46 | store_max_sample = 5000 47 | sample_process = 2 48 | batch_process = 2 49 | -------------------------------------------------------------------------------- /aiarena/3v3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/aiarena/3v3/__init__.py -------------------------------------------------------------------------------- /aiarena/3v3/actor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/aiarena/3v3/actor/__init__.py -------------------------------------------------------------------------------- /aiarena/3v3/actor/agent/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/aiarena/3v3/actor/agent/__init__.py -------------------------------------------------------------------------------- /aiarena/3v3/actor/agent/common_ai_agent.py: -------------------------------------------------------------------------------- 1 | from agent.agent import Agent as BaseAgent 2 | 3 | 4 | class Agent(BaseAgent): 5 | def __init__(self, *args, **kwargs): 6 | kwargs["rule_only"] = True 7 | super().__init__(*args, **kwargs) 8 | 9 | def reset(self, *args, **kwargs): 10 | return super().reset(agent_type="common_ai") 11 | -------------------------------------------------------------------------------- /aiarena/3v3/actor/agent/random_agent.py: -------------------------------------------------------------------------------- 1 | from agent.agent import Agent as BaseAgent 2 | from numpy.random import rand 3 | import numpy as np 4 | 5 | pred_ret_shape = [(1, 162)] * 3 6 | lstm_cell_shape = [(1, 16), (1, 16)] 7 | 8 | 9 | class Agent(BaseAgent): 10 | """ 11 | random agent 12 | """ 13 | 14 | def __init__(self, *args, **kwargs): 15 | kwargs["rule_only"] = True 16 | super().__init__(*args, **kwargs) 17 | 18 | def _predict_process(self, features, frame_state, runtime_ids): 19 | pred_ret = [] 20 | for shape in pred_ret_shape: 21 | pred_ret.append(rand(*shape).astype("float32")) 22 | lstm_info = [] 23 | for shape in lstm_cell_shape: 24 | lstm_info.append(np.zeros(shape).astype("float32")) 25 | 26 | return pred_ret, lstm_info 27 | -------------------------------------------------------------------------------- /aiarena/3v3/actor/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/aiarena/3v3/actor/config/__init__.py -------------------------------------------------------------------------------- /aiarena/3v3/actor/config/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import os 3 | 4 | 5 | class Config: 6 | backend = os.getenv("AIARENA_BACKEND", "pytorch") 7 | actor_num = int(os.getenv("ACTOR_NUM", "1")) 8 | auto_bind_cpu = os.getenv("AUTO_BIND_CPU", "0") == "1" 9 | 10 | SEND_SAMPLE_FRAME = 963 11 | 12 | GAMMA = 0.995 13 | LAMDA = 0.95 14 | 15 | reward_config = { 16 | "whether_use_zero_sum_reward": 1, 17 | "team_spirit": 0, 18 | "time_scaling_discount": 1, 19 | "time_scaling_time": 4500, 20 | "reward_policy": { 21 | "hero_0": { 22 | "hp_rate_sqrt_sqrt": 1, 23 | "money": 0.001, 24 | "exp": 0.001, 25 | "tower": 1, 26 | "killCnt": 1, 27 | "deadCnt": -1, 28 | "assistCnt": 1, 29 | "total_hurt_to_hero": 0.1, 30 | "atk_monster": 0.1, 31 | "win_crystal": 1, 32 | "atk_crystal": 1, 33 | }, 34 | "hero_1": { 35 | "hp_rate_sqrt_sqrt": 1, 36 | "money": 0.001, 37 | "exp": 0.001, 38 | "tower": 1, 39 | "killCnt": 1, 40 | "deadCnt": -1, 41 | "assistCnt": 1, 42 | "total_hurt_to_hero": 0.1, 43 | "atk_monster": 0.1, 44 | "win_crystal": 1, 45 | "atk_crystal": 1, 46 | }, 47 | "hero_2": { 48 | "hp_rate_sqrt_sqrt": 1, 49 | "money": 0.001, 50 | "exp": 0.001, 51 | "tower": 1, 52 | "killCnt": 1, 53 | "deadCnt": -1, 54 | "assistCnt": 1, 55 | "total_hurt_to_hero": 0.1, 56 | "atk_monster": 0.1, 57 | "win_crystal": 1, 58 | "atk_crystal": 1, 59 | }, 60 | }, 61 | "policy_heroes": { 62 | "hero_0": [169, 112, 174], 63 | "hero_1": [176, 119, 157], 64 | "hero_2": [128, 163, 167], 65 | }, 66 | } 67 | -------------------------------------------------------------------------------- /aiarena/3v3/actor/config/model_config.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import numpy as np 3 | 4 | 5 | class ModelConfig: 6 | LSTM_TIME_STEPS = 16 7 | LSTM_UNIT_SIZE = 16 8 | # HERO_NUM = 3 9 | HERO_DATA_SPLIT_SHAPE = [ 10 | 4586, # feature 11 | 13, # legal_action 12 | 25, 13 | 42, 14 | 42, 15 | 39, 16 | 1, # reward 17 | 1, # advantage 18 | 1, # action 19 | 1, 20 | 1, 21 | 1, 22 | 1, 23 | 13, # probs 24 | 25, 25 | 42, 26 | 42, 27 | 39, 28 | 1, # is_train 29 | 1, # sub_action 30 | 1, 31 | 1, 32 | 1, 33 | 1, 34 | ] 35 | HERO_SERI_VEC_SPLIT_SHAPE = [(6, 17, 17), (2852,)] 36 | HERO_FEATURE_IMG_CHANNEL = 6 # feature image channel for each hero 37 | HERO_LABEL_SIZE_LIST = [13, 25, 42, 42, 39] 38 | 39 | DIM_OF_SOLDIER_1_10 = [25] * 10 40 | DIM_OF_SOLDIER_11_20 = [25] * 10 41 | DIM_OF_ORGAN_1_3 = [29] * 3 42 | DIM_OF_ORGAN_4_6 = [29] * 3 43 | DIM_OF_MONSTER_1_20 = [28] * 20 44 | DIM_OF_HERO_FRD = [251] * 3 45 | DIM_OF_HERO_EMY = [251] * 3 46 | DIM_OF_HERO_MAIN = [44] 47 | DIM_OF_GLOBAL_INFO = [68] 48 | 49 | sample_one_size = np.sum(HERO_DATA_SPLIT_SHAPE) 50 | 51 | # tensorflow only 52 | use_xla = True 53 | -------------------------------------------------------------------------------- /aiarena/3v3/actor/frozen.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import sys 4 | 5 | 6 | import tensorflow as tf 7 | from tensorflow.python.framework import ops 8 | from tensorflow.python.tools import freeze_graph 9 | from model.tensorflow.model import Model 10 | from config.model_config import ModelConfig 11 | 12 | 13 | cpu_num = 1 14 | sess_config = tf.ConfigProto( 15 | device_count={"CPU": cpu_num}, 16 | inter_op_parallelism_threads=cpu_num, 17 | intra_op_parallelism_threads=cpu_num, 18 | log_device_placement=False, 19 | ) 20 | 21 | 22 | def save_as_pb( 23 | graph, checkpoint_path, output_tensors, directory="checkpoints", filename="frozen" 24 | ): 25 | os.makedirs(directory, exist_ok=True) 26 | 27 | pbtxt_filename = filename + ".pbtxt" 28 | pbtxt_filepath = os.path.join(directory, pbtxt_filename) 29 | pb_filepath = os.path.join(directory, filename + ".pb") 30 | 31 | with tf.Session(graph=graph, config=sess_config) as sess: 32 | sess.run(tf.global_variables_initializer()) 33 | tf.train.write_graph( 34 | graph_or_graph_def=sess.graph_def, 35 | logdir=directory, 36 | name=pbtxt_filename, 37 | as_text=True, 38 | ) 39 | 40 | freeze_graph.freeze_graph( 41 | input_graph=pbtxt_filepath, 42 | input_saver="", 43 | input_binary=False, 44 | input_checkpoint=checkpoint_path, 45 | output_node_names=",".join([t.op.name for t in output_tensors]), 46 | restore_op_name="Unused", 47 | filename_tensor_name="Unused", 48 | output_graph=pb_filepath, 49 | clear_devices=True, 50 | initializer_nodes="", 51 | ) 52 | 53 | return pb_filepath 54 | 55 | 56 | model = Model(ModelConfig) 57 | graph = model.build_infer_graph() 58 | 59 | saver = tf.train.Saver( 60 | graph.get_collection(ops.GraphKeys.GLOBAL_VARIABLES), 61 | allow_empty=True, 62 | ) 63 | 64 | checkpoint_path = "/aiarena/checkpoints/" 65 | with tf.Session(graph=graph, config=sess_config) as sess: 66 | ckpt = tf.train.get_checkpoint_state(checkpoint_path) 67 | saver.restore(sess, ckpt.model_checkpoint_path) 68 | 69 | output_tensors = model.get_output_tensors() 70 | save_as_pb(graph, checkpoint_path + "model.ckpt", output_tensors, directory=checkpoint_path) 71 | -------------------------------------------------------------------------------- /aiarena/3v3/actor/kaiwu.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from rl_framework.common.logging import logger as LOG 5 | 6 | work_dir = os.path.dirname(os.path.abspath(__file__)) 7 | DEFAULT_CKPT_LIST_FILE = os.path.join(work_dir, "models.json") 8 | 9 | 10 | def get_kaiwu_battle_info(): 11 | """ 12 | For competition battle info, parse following env 13 | CAMP_TYPE: RED or BLUE 14 | CAMP_BLUE_LINEUP_ID 15 | CAMP_BLUE_TEAM_ID 16 | CAMP_RED_LINEUP_ID 17 | CAMP_RED_TEAM_ID 18 | """ 19 | 20 | ego_camp = os.getenv("CAMP_TYPE", "NOTSET").upper() 21 | 22 | enemy_camp = "NOTSET" 23 | if ego_camp == "BLUE": 24 | enemy_camp = "RED" 25 | elif ego_camp == "RED": 26 | enemy_camp = "BLUE" 27 | else: 28 | LOG.warning("Unknown camp info: {}", ego_camp) 29 | 30 | # 己方参数 31 | lineup_id = os.getenv("CAMP_" + ego_camp + "_LINEUP_ID", "-1") 32 | team_id = os.getenv("CAMP_" + ego_camp + "_TEAM_ID", "-1") 33 | 34 | # 敌方参数 35 | enemy_lineup_id = os.getenv("CAMP_" + enemy_camp + "_LINEUP_ID", "-1") 36 | enemy_team_id = os.getenv("CAMP_" + enemy_camp + "_TEAM_ID", "-1") 37 | return ego_camp, lineup_id, team_id, enemy_camp, enemy_lineup_id, enemy_team_id 38 | 39 | 40 | def get_ckpt_list(ckpt_list_file): 41 | """ 42 | parse ckpit list file injected by kaiwu, and return the dir list 43 | """ 44 | if not os.path.exists(ckpt_list_file): 45 | LOG.info("{} not exists, ignore.", ckpt_list_file) 46 | return [] 47 | 48 | with open(ckpt_list_file) as f: 49 | data = json.load(f) 50 | LOG.debug("load ckpt list file: {}", data) 51 | 52 | ckpt_dir_list = [] 53 | for model_id in data.get("ids", []): 54 | ckpt_dir = os.path.join(work_dir, "model", f"model_{model_id}") 55 | if not os.path.exists(ckpt_dir): 56 | LOG.info("{} not exists, ignore.", ckpt_dir) 57 | continue 58 | 59 | LOG.info("List {}: {}", ckpt_dir, os.listdir(ckpt_dir)) 60 | ckpt_dir_list.append(ckpt_dir) 61 | 62 | return ckpt_dir_list 63 | 64 | 65 | def _kaiwu_info_example(): 66 | ( 67 | ego_camp, 68 | lineup_id, 69 | team_id, 70 | enemy_camp, 71 | enemy_lineup_id, 72 | enemy_team_id, 73 | ) = get_kaiwu_battle_info() 74 | LOG.info( 75 | "Get kaiwu battle info - camp:{} lineup_id:{} team_id:{}, enemy_camp:{}, enemy_lineup_id:{}, enemy_team_id:{}", 76 | ego_camp, 77 | lineup_id, 78 | team_id, 79 | enemy_camp, 80 | enemy_lineup_id, 81 | enemy_team_id, 82 | ) 83 | 84 | ckpt_dir_list = get_ckpt_list(DEFAULT_CKPT_LIST_FILE) 85 | LOG.info("Get ckpt dir list: {}", ckpt_dir_list) 86 | 87 | 88 | def kaiwu_info_example(): 89 | try: 90 | _kaiwu_info_example() 91 | except: 92 | LOG.exception("kaiwu_info_example failed, ignore") 93 | 94 | 95 | if __name__ == "__main__": 96 | kaiwu_info_example() 97 | -------------------------------------------------------------------------------- /aiarena/3v3/actor/rl_data_info.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | class RLDataInfo: 3 | def __init__(self): 4 | self.frame_no = -1 5 | self.feature = b"" 6 | self.next_feature = b"" 7 | self.reward = 0 8 | self.reward2 = 0 9 | self.reward_sum = 0 10 | self.reward_sum2 = 0 11 | self.action = 0 12 | self.action_list = [] 13 | self.done = 0 14 | self.info = None 15 | self.value = 0 16 | self.value2 = 0 17 | # self.neg_log_pis = 0 18 | self.advantage = 0 19 | self.game_id = b"" 20 | self.is_train = False 21 | self.is_game_over = 0 22 | self.task_uuid = b"" 23 | self.next_Q_value = b"" 24 | self.gamma_pow = 1 25 | 26 | self.prob = None 27 | self.sub_action = None 28 | self.next_value = 0 29 | self.next_value2 = 0 30 | self.lstm_info = None 31 | 32 | def struct_to_pb(self, off_policy_rl_info): 33 | off_policy_rl_info.frame_no = self.frame_no 34 | off_policy_rl_info.feature = self.feature 35 | off_policy_rl_info.next_feature = self.next_feature 36 | off_policy_rl_info.reward_sum = self.reward_sum 37 | off_policy_rl_info.reward = self.reward 38 | off_policy_rl_info.done = self.done 39 | off_policy_rl_info.value = self.value 40 | off_policy_rl_info.neg_log_pis = self.neg_log_pis 41 | off_policy_rl_info.action = self.action 42 | off_policy_rl_info.action_list.extend(self.action_list) 43 | off_policy_rl_info.advantage = self.advantage 44 | off_policy_rl_info.game_id = self.game_id 45 | off_policy_rl_info.is_train = self.is_train 46 | off_policy_rl_info.is_game_over = self.is_game_over 47 | off_policy_rl_info.uuid = self.task_uuid 48 | off_policy_rl_info.next_Q_value = self.next_Q_value 49 | off_policy_rl_info.gamma_pow = self.gamma_pow 50 | -------------------------------------------------------------------------------- /aiarena/3v3/actor/server.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from absl import app as absl_app 4 | from absl import flags 5 | 6 | from hok.hok3v3.hero_config import interface_default_config 7 | from hok.hok3v3.lib import lib3v3 as interface 8 | from hok.hok3v3.server import BattleServer 9 | from hok.common.camp import HERO_DICT 10 | 11 | from rl_framework.common.logging import setup_logger 12 | from rl_framework.common.logging import logger as LOG 13 | 14 | from agent.agent import Agent 15 | from config.model_config import ModelConfig 16 | from config.config import Config 17 | from kaiwu import kaiwu_info_example 18 | 19 | 20 | def get_model_class(backend): 21 | if backend == "tensorflow": 22 | from model.tensorflow.model import Model 23 | elif backend == "pytorch": 24 | from model.pytorch.model import Model 25 | import torch 26 | 27 | torch.set_num_threads(1) 28 | torch.set_num_interop_threads(1) 29 | else: 30 | raise NotImplementedError("backend=['tensorflow', 'pytorch']") 31 | return Model 32 | 33 | 34 | work_dir = os.path.dirname(os.path.abspath(__file__)) 35 | DEFAULT_MODEL_PATH = os.path.join(work_dir, "model", "init") 36 | 37 | flags.DEFINE_string("server_addr", "tcp://0.0.0.0:35400", "address of server") 38 | flags.DEFINE_string("model_path", DEFAULT_MODEL_PATH, "path to checkpoint") 39 | flags.DEFINE_string( 40 | "config_path", interface_default_config, "config file for interface" 41 | ) 42 | 43 | 44 | def server(_): 45 | kaiwu_info_example() 46 | 47 | # 未设置默认为随机, -1表示随机选择一套装备, 0表示index为0的装备列表, 以此类推 48 | equip_config = { 49 | "houyi": 0, 50 | "yangyuhuan": 0, 51 | "caocao": 0, 52 | "jvyoujing": 0, 53 | "luban": 0, 54 | "bianque": 0, 55 | "buzhihuowu": 0, 56 | "yuji": 0, 57 | "sunwukong": 0, 58 | } 59 | 60 | model_config, config = ModelConfig, Config 61 | 62 | setup_logger(filename=None, level="INFO") 63 | Model = get_model_class(config.backend) 64 | 65 | FLAGS = flags.FLAGS 66 | agent = Agent( 67 | Model(model_config), 68 | None, 69 | backend=config.backend, 70 | ) 71 | agent.reset(model_path=FLAGS.model_path) 72 | 73 | lib_processor = interface.Interface() 74 | lib_processor.Init(FLAGS.config_path) 75 | lib_processor.SetEvalMode(True) 76 | 77 | for hero_name, equip_index in equip_config.items(): 78 | lib_processor.SetHeroEquipIndex(HERO_DICT.get(hero_name, 0), equip_index) 79 | LOG.info("equip_config: {}", lib_processor.m_hero_equip_idx) 80 | 81 | server = BattleServer(agent, FLAGS.server_addr, lib_processor) 82 | server.run() 83 | 84 | 85 | if __name__ == "__main__": 86 | absl_app.run(server) 87 | -------------------------------------------------------------------------------- /aiarena/3v3/learner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/aiarena/3v3/learner/__init__.py -------------------------------------------------------------------------------- /aiarena/3v3/learner/config/Config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class Config: 5 | slow_time = float(os.getenv("SLOW_TIME", "0").strip()) 6 | backend = os.getenv("AIARENA_BACKEND", "pytorch") 7 | use_init_model = os.getenv("AIARENA_USE_INIT_MODEL", "0") == "1" 8 | init_model_path = os.getenv( 9 | "AIARENA_INIT_MODEL_PATH", "/aiarena/code/learner/model/init/" 10 | ) 11 | load_optimizer_state = os.getenv("AIARENA_LOAD_OPTIMIZER_STATE", "1") == "1" 12 | 13 | LSTM_TIME_STEPS = 16 14 | LSTM_UNIT_SIZE = 16 15 | HERO_DATA_SPLIT_SHAPE = [ 16 | 4586, # feature 17 | 13, # legal_action 18 | 25, 19 | 42, 20 | 42, 21 | 39, 22 | 1, # reward 23 | 1, # advantage 24 | 1, # action 25 | 1, 26 | 1, 27 | 1, 28 | 1, 29 | 13, # probs 30 | 25, 31 | 42, 32 | 42, 33 | 39, 34 | 1, # is_train 35 | 1, # sub_action 36 | 1, 37 | 1, 38 | 1, 39 | 1, 40 | ] 41 | HERO_SERI_VEC_SPLIT_SHAPE = [(6, 17, 17), (2852,)] 42 | HERO_FEATURE_IMG_CHANNEL = 6 # feature image channel for each hero 43 | HERO_LABEL_SIZE_LIST = [13, 25, 42, 42, 39] 44 | 45 | HERO_NUM = 3 46 | HERO_IS_REINFORCE_TASK_LIST = [[True] * len(HERO_LABEL_SIZE_LIST)] * HERO_NUM 47 | INIT_LEARNING_RATE_START = 0.0006 48 | BETA_START = 0.008 49 | CLIP_PARAM = 0.2 50 | MIN_POLICY = 0.00001 51 | data_shapes = [ 52 | [sum(HERO_DATA_SPLIT_SHAPE) * LSTM_TIME_STEPS + LSTM_UNIT_SIZE * 2] 53 | ] * HERO_NUM 54 | -------------------------------------------------------------------------------- /aiarena/3v3/learner/config/DimConfig.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | 4 | class DimConfig: 5 | DIM_OF_SOLDIER_1_10 = [25] * 10 6 | DIM_OF_SOLDIER_11_20 = [25] * 10 7 | DIM_OF_ORGAN_1_3 = [29] * 3 8 | DIM_OF_ORGAN_4_6 = [29] * 3 9 | DIM_OF_MONSTER_1_20 = [28] * 20 10 | DIM_OF_HERO_FRD = [251] * 3 11 | DIM_OF_HERO_EMY = [251] * 3 12 | DIM_OF_HERO_MAIN = [44] # main_hero_vec 13 | DIM_OF_GLOBAL_INFO = [68] 14 | -------------------------------------------------------------------------------- /aiarena/3v3/learner/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/aiarena/3v3/learner/config/__init__.py -------------------------------------------------------------------------------- /aiarena/3v3/learner/config/common.conf: -------------------------------------------------------------------------------- 1 | [main] 2 | backend=pytorch 3 | ips = 127.0.0.1 4 | ports = [35100,35101,35102,35103,35104,35105,35106,35107,35108,35109,35110,35111,35112,35113,35114,35115] 5 | save_model_steps = 500 6 | save_model_seconds = 120 7 | save_model_dir = /aiarena/checkpoints 8 | variable_update = horovod 9 | display_every = 100 10 | max_steps = 100000000 11 | train_dir = /aiarena/logs/learner/ 12 | distributed_backend = ddp 13 | print_variables = False 14 | print_timeline = False 15 | dump_profile = False 16 | batch_size = 288 17 | mem_process_num = 4 18 | push_to_modelpool = True 19 | mempool_path = /rl_framework/mem_pool_server_pkg/ 20 | send_model_dir = /rl_framework/send_model/model 21 | 22 | [model] 23 | use_init_model = False 24 | init_model_path = /aiarena/code/learner/model/init/ 25 | load_optimizer_state = True 26 | use_xla = True 27 | use_jit = False 28 | use_compile = False 29 | use_mix_precision = False 30 | use_fp16 = False 31 | channels_last = False 32 | has_unused_params = True 33 | 34 | [grads] 35 | check_values = False 36 | use_fusion = False 37 | piecewise_fusion_schedule = 8;9;24 38 | use_grad_clip = True 39 | grad_clip_range = 0.5 40 | sparse_as_dense = True 41 | to_fp16 = False 42 | use_xla_fusion = True 43 | 44 | [dataset] 45 | store_max_sample = 5000 46 | sample_process = 8 47 | batch_process = 2 48 | -------------------------------------------------------------------------------- /aiarena/3v3/learner/kaiwu.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from rl_framework.common.logging import logger as LOG 5 | 6 | work_dir = os.path.dirname(os.path.abspath(__file__)) 7 | DEFAULT_CKPT_LIST_FILE = os.path.join(work_dir, "models.json") 8 | 9 | 10 | def get_kaiwu_battle_info(): 11 | """ 12 | For competition battle info, parse following env 13 | CAMP_TYPE: RED or BLUE 14 | CAMP_BLUE_LINEUP_ID 15 | CAMP_BLUE_TEAM_ID 16 | CAMP_RED_LINEUP_ID 17 | CAMP_RED_TEAM_ID 18 | """ 19 | 20 | ego_camp = os.getenv("CAMP_TYPE", "NOTSET").upper() 21 | 22 | enemy_camp = "NOTSET" 23 | if ego_camp == "BLUE": 24 | enemy_camp = "RED" 25 | elif ego_camp == "RED": 26 | enemy_camp = "BLUE" 27 | else: 28 | LOG.warning("Unknown camp info: {}", ego_camp) 29 | 30 | # 己方参数 31 | lineup_id = os.getenv("CAMP_" + ego_camp + "_LINEUP_ID", "-1") 32 | team_id = os.getenv("CAMP_" + ego_camp + "_TEAM_ID", "-1") 33 | 34 | # 敌方参数 35 | enemy_lineup_id = os.getenv("CAMP_" + enemy_camp + "_LINEUP_ID", "-1") 36 | enemy_team_id = os.getenv("CAMP_" + enemy_camp + "_TEAM_ID", "-1") 37 | return ego_camp, lineup_id, team_id, enemy_camp, enemy_lineup_id, enemy_team_id 38 | 39 | 40 | def get_ckpt_list(ckpt_list_file): 41 | """ 42 | parse ckpit list file injected by kaiwu, and return the dir list 43 | """ 44 | if not os.path.exists(ckpt_list_file): 45 | LOG.info("{} not exists, ignore.", ckpt_list_file) 46 | return [] 47 | 48 | with open(ckpt_list_file) as f: 49 | data = json.load(f) 50 | LOG.debug("load ckpt list file: {}", data) 51 | 52 | ckpt_dir_list = [] 53 | for model_id in data.get("ids", []): 54 | ckpt_dir = os.path.join(work_dir, "..", "actor", "model", f"model_{model_id}") 55 | if not os.path.exists(ckpt_dir): 56 | LOG.info("{} not exists, ignore.", ckpt_dir) 57 | continue 58 | 59 | LOG.info("List {}: {}", ckpt_dir, os.listdir(ckpt_dir)) 60 | ckpt_dir_list.append(ckpt_dir) 61 | 62 | return ckpt_dir_list 63 | 64 | 65 | def _kaiwu_info_example(): 66 | ( 67 | ego_camp, 68 | lineup_id, 69 | team_id, 70 | enemy_camp, 71 | enemy_lineup_id, 72 | enemy_team_id, 73 | ) = get_kaiwu_battle_info() 74 | LOG.info( 75 | "Get kaiwu battle info - camp:{} lineup_id:{} team_id:{}, enemy_camp:{}, enemy_lineup_id:{}, enemy_team_id:{}", 76 | ego_camp, 77 | lineup_id, 78 | team_id, 79 | enemy_camp, 80 | enemy_lineup_id, 81 | enemy_team_id, 82 | ) 83 | 84 | ckpt_dir_list = get_ckpt_list(DEFAULT_CKPT_LIST_FILE) 85 | LOG.info("Get ckpt dir list: {}", ckpt_dir_list) 86 | 87 | 88 | def kaiwu_info_example(): 89 | try: 90 | _kaiwu_info_example() 91 | except: 92 | LOG.exception("kaiwu_info_example failed, ignore") 93 | 94 | 95 | if __name__ == "__main__": 96 | kaiwu_info_example() 97 | -------------------------------------------------------------------------------- /aiarena/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/aiarena/__init__.py -------------------------------------------------------------------------------- /aiarena/grafana/etc/grafana.ini: -------------------------------------------------------------------------------- 1 | [server] 2 | http_port = 8081 3 | -------------------------------------------------------------------------------- /aiarena/grafana/etc/provisioning/dashboards/3v3.yaml: -------------------------------------------------------------------------------- 1 | # # config file version 2 | apiVersion: 1 3 | 4 | providers: 5 | - name: 'default' 6 | orgId: 1 7 | folder: '' 8 | folderUid: '' 9 | type: file 10 | allowUiUpdates: true 11 | options: 12 | path: /var/lib/grafana/dashboards 13 | -------------------------------------------------------------------------------- /aiarena/grafana/etc/provisioning/datasources/3v3.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | datasources: 3 | - name: monitordb 4 | type: influxdb 5 | access: proxy 6 | orgId: 1 7 | url: http://localhost:8086 8 | database: monitordb 9 | -------------------------------------------------------------------------------- /aiarena/process/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/aiarena/process/__init__.py -------------------------------------------------------------------------------- /aiarena/process/actor_process.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from multiprocessing import Process 3 | 4 | sys.path.append("/") 5 | sys.path.append("/aiarena/code/actor/") # TODO refactor 6 | 7 | 8 | from aiarena.code.actor.entry import run 9 | from aiarena.process.process_base import PyProcessBase 10 | 11 | 12 | class ActorProcess(PyProcessBase): 13 | def __init__( 14 | self, 15 | actor_id=0, 16 | config_path=None, 17 | model_pool_addr="localhost:10016", 18 | single_test=False, 19 | port_begin=35300, 20 | gc_server_addr="127.0.0.1:23432", 21 | gamecore_req_timeout=30000, 22 | max_frame_num=20000, 23 | runtime_id_prefix="actor", 24 | aiserver_ip="127.0.0.1", 25 | mem_pool_addr_list=None, 26 | max_episode=-1, 27 | monitor_server_addr="127.0.0.1:8086", 28 | config=None, 29 | model_config=None, 30 | ) -> None: 31 | super().__init__() 32 | 33 | self.actor_id = actor_id 34 | self.config_path = config_path 35 | self.model_pool_addr = model_pool_addr 36 | self.single_test = single_test 37 | self.port_begin = port_begin 38 | self.gc_server_addr = gc_server_addr 39 | self.gamecore_req_timeout = gamecore_req_timeout 40 | self.max_frame_num = max_frame_num 41 | self.runtime_id_prefix = runtime_id_prefix 42 | self.aiserver_ip = aiserver_ip 43 | self.mem_pool_addr_list = mem_pool_addr_list or ["localhost:35200"] 44 | self.max_episode = max_episode 45 | self.monitor_server_addr = monitor_server_addr 46 | self.config = config 47 | self.model_config = model_config 48 | 49 | def start(self): 50 | self.proc = Process( 51 | target=run, 52 | args=( 53 | self.actor_id, 54 | self.config_path, 55 | self.model_pool_addr, 56 | self.single_test, 57 | self.port_begin, 58 | self.gc_server_addr, 59 | self.gamecore_req_timeout, 60 | self.max_frame_num, 61 | self.runtime_id_prefix, 62 | self.aiserver_ip, 63 | self.mem_pool_addr_list, 64 | self.max_episode, 65 | self.monitor_server_addr, 66 | self.config, 67 | self.model_config, 68 | ), 69 | ) 70 | self.proc.start() 71 | 72 | 73 | if __name__ == "__main__": 74 | actor = ActorProcess(single_test=True, max_frame_num=100, max_episode=2) 75 | actor.start() 76 | actor.wait() 77 | -------------------------------------------------------------------------------- /aiarena/process/learner.iplist: -------------------------------------------------------------------------------- 1 | 127.0.0.1 root 36000 1 2 | -------------------------------------------------------------------------------- /aiarena/process/learner.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import os 4 | import tempfile 5 | from multiprocessing import Process 6 | import configparser 7 | 8 | 9 | sys.path.append("/") 10 | 11 | from aiarena.code.learner.train import run, config_path 12 | from aiarena.process.process_base import PyProcessBase 13 | from rl_framework.learner.framework.common.config_control import ConfigControl 14 | 15 | 16 | class LearnerProcess(PyProcessBase): 17 | def __init__( 18 | self, 19 | mem_pool_port_list=None, 20 | single_test=False, 21 | default_config_path=config_path, 22 | display_every=1, 23 | save_model_steps=2, 24 | max_steps=5, 25 | batch_size=1, 26 | store_max_sample=20, 27 | use_xla=False, 28 | model_config=None, 29 | ) -> None: 30 | super().__init__() 31 | self.mem_pool_port_list = mem_pool_port_list or [] 32 | self.default_config_path = default_config_path 33 | self.single_test = single_test 34 | 35 | self.display_every = display_every 36 | self.save_model_steps = save_model_steps 37 | self.max_steps = max_steps 38 | self.batch_size = batch_size 39 | self.store_max_sample = store_max_sample 40 | self.use_xla = use_xla 41 | 42 | self.model_config = model_config 43 | 44 | def _get_config(self): 45 | config = configparser.ConfigParser() 46 | config.read(self.default_config_path) 47 | 48 | # overwrite config 49 | config.set("main", "ports", json.dumps(self.mem_pool_port_list)) 50 | config.set("main", "backend", self.model_config.backend) 51 | if self.model_config.backend == "pytorch": # TODO 52 | config.set("main", "distributed_backend", "none") 53 | 54 | config.set("main", "display_every", str(self.display_every)) 55 | config.set("main", "save_model_steps", str(self.save_model_steps)) 56 | config.set("main", "max_steps", str(self.max_steps)) 57 | config.set("main", "batch_size", str(self.batch_size)) 58 | config.set("dataset", "store_max_sample", str(self.store_max_sample)) 59 | config.set("model", "use_xla", str(self.use_xla)) 60 | config.set("model", "use_init_model", str(self.model_config.use_init_model)) 61 | 62 | return config 63 | 64 | def _generate_config_file(self): 65 | config = self._get_config() 66 | fd, file = tempfile.mkstemp() 67 | with os.fdopen(fd, "w") as f: 68 | config.write(f) 69 | return file 70 | 71 | def start(self): 72 | config_path = self._generate_config_file() 73 | config_manager = ConfigControl(config_path) 74 | 75 | self.proc = Process( 76 | target=run, args=(self.model_config, config_manager, self.single_test) 77 | ) 78 | self.proc.start() 79 | 80 | 81 | if __name__ == "__main__": 82 | learner = LearnerProcess(single_test=True) 83 | learner.start() 84 | learner.wait() 85 | print(learner.exitcode()) 86 | -------------------------------------------------------------------------------- /aiarena/process/model_pool.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import os 3 | import yaml 4 | from yaml import Loader 5 | from process_base import ProcessBase 6 | 7 | default_pkg_path = "/rl_framework/model_pool/pkg/model_pool_pkg/" 8 | # TODO install pkg to python package 9 | 10 | 11 | class ModelPoolProcess(ProcessBase): 12 | def __init__( 13 | self, 14 | role="gpu", 15 | master_ip="127.0.0.1", 16 | log_file="/aiarena/logs/model_pool.log", 17 | pkg_path=default_pkg_path, 18 | ): 19 | """ 20 | pkg_path: model_pool pkg包的路径, 用于启动程序 21 | log_path: model_pool 进程启动后的日志输出路径 22 | """ 23 | super().__init__(log_file) 24 | self.pkg_path = pkg_path 25 | self.ip = "127.0.0.1" # TODO 确认是否需要配置成当前ip 26 | self.cluster_context = "default" 27 | self.role = role 28 | self.master_ip = master_ip 29 | 30 | def _get_config(self, role, master_ip): 31 | # load default config from file 32 | config_file = os.path.join(self.pkg_path, "config", f"trpc_go.yaml.{role}") 33 | with open(config_file) as f: 34 | config = yaml.load(f, Loader=Loader) 35 | 36 | for _, log_plugin in config.get("plugins", {}).get("log", {}).items(): 37 | for _config in log_plugin: 38 | if _config.get("writer_config", {}).get("filename"): 39 | _config["writer_config"]["filename"] = self.log_file 40 | 41 | # overwrite default config 42 | if role == "cpu": 43 | config["client"]["service"][0]["target"] = f"dns://{master_ip}:10013" 44 | config["modelpool"]["ip"] = self.ip 45 | config["modelpool"]["name"] = self.ip 46 | config["modelpool"]["cluster"] = self.cluster_context 47 | elif role == "gpu": 48 | config["modelpool"]["ip"] = self.ip 49 | else: 50 | raise Exception(f"Unknow role: {role}") 51 | return config 52 | 53 | def _generate_config_file(self, role, master_ip): 54 | config = self._get_config(role, master_ip) 55 | fd, file = tempfile.mkstemp() 56 | with os.fdopen(fd, "w") as f: 57 | yaml.dump(config, f) 58 | return file 59 | 60 | def get_cmd_cwd(self): 61 | config_file = self._generate_config_file(self.role, self.master_ip) 62 | full_cmd = ["./modelpool", "-conf", config_file] 63 | cwd = os.path.join(self.pkg_path, "bin") 64 | return full_cmd, cwd 65 | 66 | 67 | class ModelPoolProxyProcess(ProcessBase): 68 | def __init__( 69 | self, 70 | file_save_path="/mnt/ramdisk/model", 71 | log_file="/aiarena/logs/model_pool_proxy.log", 72 | pkg_path=default_pkg_path, 73 | ) -> None: 74 | """ 75 | pkg_path: model_pool pkg包的路径, 用于启动程序 76 | file_save_path: model_pool 模型保存的路径 77 | log_path: model_pool 进程启动后的日志输出路径 78 | """ 79 | super().__init__(log_file) 80 | 81 | self.pkg_path = pkg_path 82 | self.file_save_path = file_save_path 83 | 84 | def get_cmd_cwd(self): 85 | os.makedirs(self.file_save_path, exist_ok=True) 86 | full_cmd = ["./modelpool_proxy", "-fileSavePath", self.file_save_path] 87 | cwd = os.path.join(self.pkg_path, "bin") 88 | return full_cmd, cwd 89 | 90 | 91 | def run(): 92 | model_pool_process = ModelPoolProcess() 93 | model_pool_proxy_process = ModelPoolProxyProcess() 94 | model_pool_process.start() 95 | model_pool_proxy_process.start() 96 | 97 | model_pool_process.wait() 98 | model_pool_proxy_process.wait() 99 | 100 | 101 | if __name__ == "__main__": 102 | run() 103 | -------------------------------------------------------------------------------- /aiarena/process/monitor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | from yaml import full_load 4 | from process_base import ProcessBase 5 | from hok.common.log import logger as LOG 6 | 7 | 8 | class InfluxdbExporterProcess(ProcessBase): 9 | def __init__( 10 | self, 11 | port=8086, 12 | log_file="/aiarena/logs/influxdb-exporter.log", 13 | ) -> None: 14 | super().__init__(log_file) 15 | self.port = port 16 | 17 | def get_cmd_cwd(self): 18 | full_cmd = [ 19 | "influxdb_exporter", 20 | "--web.listen-address", 21 | f":{self.port}", 22 | "--udp.bind-address", 23 | f":{self.port}", 24 | ] 25 | 26 | return full_cmd, None 27 | 28 | 29 | class InfluxdbProcess(ProcessBase): 30 | def __init__( 31 | self, 32 | port=8086, 33 | retry_num=5, 34 | log_file="/aiarena/logs/influxdb.log", 35 | ) -> None: 36 | super().__init__(log_file) 37 | self.port = port 38 | self.retry_num = retry_num 39 | 40 | def get_cmd_cwd(self): 41 | full_cmd = ["/usr/bin/influxd"] 42 | return full_cmd, None 43 | 44 | def start(self): 45 | super().start() 46 | 47 | # wait server and create database 48 | self.wait_server_started("127.0.0.1", self.port) 49 | 50 | for i in range(self.retry_num): 51 | try: 52 | resp = requests.post( 53 | f"http://127.0.0.1:{self.port}/query", 54 | headers={"Content-Type": "application/x-www-form-urlencoded"}, 55 | data={"q": "CREATE DATABASE monitordb"}, 56 | ) 57 | resp.raise_for_status() 58 | break 59 | except Exception: 60 | LOG.exception(f"create influxdb database failed: {i}/{self.retry_num}") 61 | if i == self.retry_num - 1: 62 | raise 63 | 64 | LOG.debug("influxd started") 65 | 66 | 67 | class GrafanaServerProcess(ProcessBase): 68 | def __init__(self, log_file="/aiarena/logs/grafana.log") -> None: 69 | super().__init__(log_file) 70 | 71 | def get_cmd_cwd(self): 72 | full_cmd = [ 73 | "/usr/sbin/grafana-server", 74 | "--config", 75 | "/etc/grafana/grafana.ini", 76 | "cfg:default.paths.provisioning=/etc/grafana/provisioning", 77 | ] 78 | cwd = "/usr/share/grafana" 79 | return full_cmd, cwd 80 | 81 | 82 | if __name__ == "__main__": 83 | # influxdb_process = InfluxdbProcess() 84 | # influxdb_process.start() 85 | # influxdb_process.wait() 86 | # influxdb_exporter_process = InfluxdbExporterProcess() 87 | # influxdb_exporter_process.start() 88 | # influxdb_exporter_process.wait() 89 | grafana_process = GrafanaServerProcess() 90 | grafana_process.start() 91 | grafana_process.wait() 92 | -------------------------------------------------------------------------------- /aiarena/process/process_base.py: -------------------------------------------------------------------------------- 1 | import time 2 | import socket 3 | import os 4 | import subprocess 5 | from rl_framework.common.logging import logger as LOG 6 | 7 | class PyProcessBase: 8 | def __init__(self) -> None: 9 | self.proc = None 10 | 11 | def stop(self): 12 | if self.proc: 13 | self.proc.kill() 14 | 15 | def wait(self, timeout=None): 16 | if self.proc: 17 | self.proc.join(timeout=timeout) 18 | 19 | def terminate(self): 20 | if self.proc: 21 | self.proc.terminate() 22 | 23 | def exitcode(self): 24 | if self.proc: 25 | return self.proc.exitcode 26 | return None 27 | 28 | 29 | class ProcessBase: 30 | def __init__( 31 | self, 32 | log_file="/aiarena/logs/process_base.log", 33 | ) -> None: 34 | self.log_file = log_file 35 | self.proc = None 36 | 37 | def get_cmd_cwd(self): 38 | return ["echo", "123"], "/" 39 | 40 | # Start process 41 | def start(self): 42 | # redirect sdtout/stderr to log_file 43 | os.makedirs(os.path.dirname(self.log_file), exist_ok=True) 44 | f = open(self.log_file, "w") 45 | 46 | # _start_model_pool 47 | full_cmd, cwd = self.get_cmd_cwd() 48 | LOG.debug(f"start process: {cwd} {full_cmd}") 49 | self.proc = subprocess.Popen( 50 | full_cmd, 51 | env=os.environ, 52 | stderr=subprocess.STDOUT, 53 | stdout=f, 54 | preexec_fn=os.setsid, 55 | bufsize=10240, 56 | cwd=cwd, 57 | ) 58 | 59 | # Stop process 60 | def stop(self): 61 | if not self.proc: 62 | return 63 | 64 | self.proc.kill() 65 | if self.proc.stdout: 66 | self.proc.stdout.close() 67 | if self.proc.stderr: 68 | self.proc.stderr.close() 69 | 70 | def wait(self, timeout=None): 71 | if not self.proc: 72 | return 73 | self.proc.wait(timeout) 74 | 75 | def _test_connect(self, host, port): 76 | with socket.socket(socket.AF_INET) as s: 77 | try: 78 | s.connect((host, port)) 79 | except ConnectionRefusedError: 80 | return False 81 | return True 82 | 83 | def wait_server_started(self, host, port, timeout=-1): 84 | start_time = time.time() 85 | while timeout <= 0 or time.time() - start_time < timeout: 86 | if self._test_connect(host, port): 87 | break 88 | if self.proc and self.proc.poll() is not None: 89 | LOG.warning("proc terminated") 90 | break 91 | LOG.debug(f"server({host},{port}) not ok, wait") 92 | time.sleep(1) 93 | 94 | def terminate(self): 95 | if self.proc: 96 | self.proc.terminate() 97 | -------------------------------------------------------------------------------- /aiarena/process/send_model.py: -------------------------------------------------------------------------------- 1 | from process_base import ProcessBase 2 | 3 | 4 | # TODO impl check_and_send 5 | class CheckAndSendProcess(ProcessBase): 6 | def __init__(self, log_file="/aiarena/logs/send.log") -> None: 7 | super().__init__(log_file) 8 | 9 | def get_cmd_cwd(self): 10 | return super().get_cmd_cwd() 11 | -------------------------------------------------------------------------------- /aiarena/process/sshd.py: -------------------------------------------------------------------------------- 1 | from process_base import ProcessBase 2 | import os 3 | import subprocess 4 | 5 | 6 | class SSHDProcess(ProcessBase): 7 | def __init__( 8 | self, port=36001, passwd="passwd", log_file="/aiarena/logs/sshd.log" 9 | ) -> None: 10 | super().__init__(log_file) 11 | self.port = port 12 | self.passwd = passwd 13 | 14 | def get_cmd_cwd(self): 15 | subprocess.run( 16 | "rm -rf /root/.ssh/authorized_keys && ssh-keygen -t rsa -N '' -f /root/.ssh/id_rsa << /dev/null 2>&1 & 8 | 9 | while [ "1" == "1" ] 10 | do 11 | echo "[`date`] restart server" 12 | mkdir -p $LOG_DIR 13 | bash $SCRIPT_DIR/run_gamecore_server.sh >> $LOG_FILE 2>&1 14 | sleep 1 15 | done 16 | -------------------------------------------------------------------------------- /aiarena/remote-gc-server/run_gamecore_server.sh: -------------------------------------------------------------------------------- 1 | GAMECORE_PATH=${GAMECORE_PATH:-"/rl_framework/gamecore/"} 2 | GAMECORE_SERVER_BIND_ADDR=${GAMECORE_SERVER_BIND_ADDR:-":23432"} 3 | 4 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 5 | 6 | cd $GAMECORE_PATH 7 | 8 | if [ "${REPORTER_MONGODB_URI}" != "" ]; then 9 | args+=("--reporter-callback-enable" "--reporter-mongodb-uri=${REPORTER_MONGODB_URI}") 10 | fi 11 | 12 | if [ -f "gamecore-server-linux-amd64" ] && [ -z "${GAMECORE_SERVER_USE_WINE}" ]; then 13 | if [ -z "${SIMULATOR_USE_WINE}" ]; then 14 | ./gamecore-server-linux-amd64 server --server-address=${GAMECORE_SERVER_BIND_ADDR} "${args[@]}" 15 | else 16 | ./gamecore-server-linux-amd64 server --server-address=${GAMECORE_SERVER_BIND_ADDR} \ 17 | --simulator-remote-bin ${SCRIPT_DIR}/sgame_simulator_remote_zmq \ 18 | --simulator-repeat-bin ${SCRIPT_DIR}/sgame_simulator_repeated_zmq \ 19 | "${args[@]}" 20 | fi 21 | else 22 | export WINEPATH="${GAMECORE_PATH}/lib/;${GAMECORE_PATH}/bin/" 23 | wine gamecore-server.exe server --server-address=${GAMECORE_SERVER_BIND_ADDR} "${args[@]}" 24 | fi 25 | -------------------------------------------------------------------------------- /aiarena/remote-gc-server/sgame_simulator_remote_zmq: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 4 | 5 | conf=$1 6 | new_conf=${conf}.new 7 | work_dir=$(pwd) 8 | 9 | set -ex 10 | 11 | python3 ${SCRIPT_DIR}/process.py $conf $new_conf ${work_dir} 12 | 13 | GAMECORE_PATH=${GAMECORE_PATH:-"/rl_framework/gamecore/"} 14 | export WINEPATH="${GAMECORE_PATH}/lib/;${GAMECORE_PATH}/bin/" 15 | 16 | mkdir -p /.wine/ 17 | export WINEPREFIX=/.wine/$KAIWU_RUNTIME_ID 18 | 19 | wine ${GAMECORE_PATH}/bin/sgame_simulator_remote_zmq.exe $new_conf 20 | -------------------------------------------------------------------------------- /aiarena/remote-gc-server/sgame_simulator_repeated_zmq: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 4 | 5 | conf=$1 6 | new_conf=${conf}.new 7 | work_dir=$(pwd) 8 | 9 | set -ex 10 | 11 | python3 ${SCRIPT_DIR}/process.py $conf $new_conf ${work_dir} 12 | 13 | GAMECORE_PATH=${GAMECORE_PATH:-"/rl_framework/gamecore/"} 14 | export WINEPATH="${GAMECORE_PATH}/lib/;${GAMECORE_PATH}/bin/" 15 | 16 | mkdir -p /.wine/ 17 | export WINEPREFIX=/.wine/$KAIWU_RUNTIME_ID 18 | 19 | wine ${GAMECORE_PATH}/bin/sgame_simulator_repeated_zmq.exe $new_conf 20 | -------------------------------------------------------------------------------- /aiarena/remote-gc-server/start_gamecore_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | LOG_DIR=/aiarena/logs/ 4 | mkdir -p $LOG_DIR 5 | 6 | nohup sh /rl_framework/remote-gc-server/run_and_monitor_gamecore_server.sh >/dev/null 2>&1 & 7 | while true; do 8 | lsof -i ${GAMECORE_SERVER_BIND_ADDR} && break 9 | sleep 1 10 | done 11 | -------------------------------------------------------------------------------- /aiarena/remote-gc-server/test_client.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | url = "http://127.0.0.1:23432/v2/newGame" 4 | headers = { 5 | "Content-Type": "application/json", 6 | } 7 | data = { 8 | "simulator_type": "remote_repeat", 9 | "runtime_id": "test-runtime-id-0", 10 | "simulator_config": { 11 | "game_mode": "1v1", 12 | "hero_conf": [ 13 | { 14 | "hero_id": 139, 15 | }, 16 | { 17 | "hero_id": 139, 18 | }, 19 | ] 20 | }, 21 | } 22 | 23 | resp = requests.post(url=url, json=data, headers=headers, verify=False) 24 | if resp.ok: 25 | ret = resp.json() 26 | print("Success", ret) 27 | else: 28 | print("Failed", resp) 29 | -------------------------------------------------------------------------------- /aiarena/scripts/actor/kill.sh: -------------------------------------------------------------------------------- 1 | ps -aux | grep sgame_simulator_ | grep -v grep |awk '{print $2}' | xargs kill -9 2 | ps -aux | grep entry.py | grep -v grep |awk '{print $2}' | xargs kill -9 3 | ps -aux | grep monitor_actor.sh |grep -v grep |awk '{print $2}' |xargs kill -9 4 | -------------------------------------------------------------------------------- /aiarena/scripts/actor/learner.iplist: -------------------------------------------------------------------------------- 1 | 127.0.0.1 root 36000 1 2 | -------------------------------------------------------------------------------- /aiarena/scripts/actor/monitor_actor.sh: -------------------------------------------------------------------------------- 1 | function log() { 2 | now=$(date +"%Y-%m-%d %H:%M:%S") 3 | echo "[$now] $1" 4 | } 5 | 6 | ############################ 7 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 8 | learner_list=${learner_list:-"${SCRIPT_DIR}/learner.iplist.new"} 9 | 10 | log "parse mem pool" 11 | idx=0 12 | mem_pool_addr="" 13 | while read ip user port gpu_card_num; do 14 | let end_num=gpu_card_num-1 15 | for i in $(seq 0 $end_num); do 16 | let port=35200+i 17 | log "mem_pool_$idx $ip:$port" 18 | if [ $idx -eq 0 ]; then 19 | mem_pool_addr="$ip:$port" 20 | else 21 | mem_pool_addr="${mem_pool_addr};$ip:$port" 22 | fi 23 | let idx+=1 24 | done 25 | done <$learner_list 26 | log ${mem_pool_addr} 27 | 28 | ############################ 29 | 30 | monitor_server_addr=$(cat ${learner_list} | head -n 1 | awk '{print $1}'):8086 31 | log "monitor_server_addr: $monitor_server_addr" 32 | 33 | ############################ 34 | 35 | ACTOR_CODE_DIR=${ACTOR_CODE_DIR:-"/aiarena/code/actor"} 36 | ACTOR_NUM=${ACTOR_NUM:-${CPU_NUM:-"1"}} 37 | MAX_EPISODE=${MAX_EPISODE-"-1"} 38 | LOG_DIR="/aiarena/logs/actor/" 39 | mkdir -p $LOG_DIR 40 | 41 | let actor_end=ACTOR_NUM-1 42 | while [ "1" == "1" ]; do 43 | cd ${ACTOR_CODE_DIR} 44 | for i in $(seq 0 $actor_end); do 45 | actor_cnt=$(ps -elf | grep "python entry.py --actor_id=$i " | grep -v grep | wc -l) 46 | log "actor_id:$i actor_cnt:$actor_cnt" 47 | if [ $actor_cnt -lt 1 ]; then 48 | log "restart actor_id:$i" 49 | nohup python entry.py --actor_id=$i \ 50 | --mem_pool_addr=$mem_pool_addr \ 51 | --model_pool_addr="localhost:10016" \ 52 | --max_episode=${MAX_EPISODE} \ 53 | --monitor_server_addr=${monitor_server_addr} \ 54 | >>${LOG_DIR}/actor_$i.log 2>&1 & 55 | sleep 1 56 | fi 57 | done # for 58 | 59 | sleep 30 60 | 61 | done # while 62 | -------------------------------------------------------------------------------- /aiarena/scripts/actor/parse_iplist.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import sys 3 | import time 4 | 5 | if __name__ == "__main__": 6 | src = sys.argv[1] 7 | dst = sys.argv[2] 8 | is_strict = int(sys.argv[3]) 9 | iplist = [] 10 | with open(src, "r") as f: 11 | for line in f.readlines(): 12 | line = line.rstrip() 13 | vec = line.split() 14 | hostname = vec[0] 15 | print(hostname) 16 | while True: 17 | try: 18 | ip = socket.gethostbyname(hostname) 19 | break 20 | except socket.error as error: 21 | print("ip not found: %s" % (hostname)) 22 | sys.stdout.flush() 23 | ip = hostname 24 | if not is_strict: 25 | break 26 | time.sleep(1) 27 | vec[0] = ip 28 | iplist.append(" ".join(vec).rstrip()) 29 | print(iplist) 30 | with open(dst, "w") as f: 31 | for ip in iplist: 32 | f.write("%s\n" % ip) 33 | -------------------------------------------------------------------------------- /aiarena/scripts/actor/start_actor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function log() { 4 | now=$(date +"%Y-%m-%d %H:%M:%S") 5 | echo "[$now] $1" 6 | } 7 | 8 | ############################ 9 | MODEL_POOL_PKG_DIR=${MODEL_POOL_PKG_DIR:-"/rl_framework/model_pool/pkg/model_pool_pkg/"} 10 | LOG_DIR="/aiarena/logs/actor/" 11 | 12 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 13 | input_learner_list=${input_learner_list:-"${SCRIPT_DIR}/learner.iplist"} 14 | export learner_list=${learner_list:-"${SCRIPT_DIR}/learner.iplist.new"} 15 | 16 | mkdir -p ${LOG_DIR} 17 | 18 | ############################ 19 | log "parse ip list" 20 | cd ${SCRIPT_DIR} 21 | python parse_iplist.py ${input_learner_list} ${learner_list} 1 22 | 23 | ############################ 24 | # 等待的 learner 启动,通过探测 model pool 端口实现 25 | ip=$(cat ${learner_list} | awk '{print $1}' | sed -n '1p') 26 | log "learner ip: $ip" 27 | while true; do 28 | code=$(curl -sIL -w "%{http_code}\n" -o /dev/null http://$ip:10016) 29 | if [ $code -gt 200 ]; then 30 | log "learner is ok" 31 | break 32 | fi 33 | log "learner is not ok, wait for ready" 34 | sleep 1 35 | done 36 | 37 | ############################ 38 | if [ -z "$NO_ACTOR_MODEL_POOL" ]; then 39 | log "start model_pool" 40 | master_ip=$(head -n 1 ${learner_list} | awk '{print $1}') 41 | cd ${MODEL_POOL_PKG_DIR}/op && bash stop.sh && bash start.sh cpu $master_ip $LOG_DIR 42 | fi 43 | 44 | if [ "$DEPLOY_GAMECORE" = "1" ]; then 45 | sh /rl_framework/remote-gc-server/start_gamecore_server.sh 46 | fi 47 | 48 | ############################ 49 | log "start actor" 50 | bash ${SCRIPT_DIR}/kill.sh 51 | nohup bash ${SCRIPT_DIR}/monitor_actor.sh >>${LOG_DIR}/monitor.log 2>&1 & 52 | -------------------------------------------------------------------------------- /aiarena/scripts/build_code.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # usage: build_code.sh 4 | # env: OUTPUT_DIR, default is ./build 5 | # env: OUTPUT_FILENAME, default is code-$version.tgz 6 | 7 | version=2.3.3-$(date +"%Y%m%d%H%M") 8 | filename=code-$version.tgz 9 | 10 | # current shell script directory 11 | SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) 12 | ROOT_DIR=$(dirname $SCRIPT_DIR) 13 | TMP_DIR="/tmp/train/" 14 | 15 | output_dir=${OUTPUT_DIR:-"$SCRIPT_DIR/../build"} 16 | mkdir -p $output_dir 17 | output_dir=$(cd -- "$output_dir" &>/dev/null && pwd) 18 | filename=${OUTPUT_FILENAME:-"code-$version.tgz"} 19 | 20 | # reset build code dir 21 | rm -rf $TMP_DIR && mkdir -p $TMP_DIR 22 | 23 | # build code 24 | 25 | # 在集群训练会给环境注入TASK_ID, 通过这个变量判断是否是集群训练 26 | if [ -n "$TASK_ID" ]; then 27 | rsync -a --exclude="checkpoints_*" \ 28 | --exclude="**/checkpoints" \ 29 | --exclude="**/checkpoint" \ 30 | --exclude="GameAiMgr_*.txt" \ 31 | --exclude="log" \ 32 | $ROOT_DIR/code $TMP_DIR 33 | else 34 | # 集群训练不打包learner的init model以减小代码包大小 35 | rsync -a --exclude="checkpoints_*" \ 36 | --exclude="**/checkpoints" \ 37 | --exclude="**/checkpoint" \ 38 | --exclude="GameAiMgr_*.txt" \ 39 | --exclude="log" \ 40 | --exclude="code/learner/model/init" \ 41 | $ROOT_DIR/code $TMP_DIR 42 | fi 43 | 44 | cp -r $ROOT_DIR/scripts $TMP_DIR 45 | 46 | # generate version 47 | echo "$version" >$TMP_DIR/version 48 | 49 | # 打包 50 | cd $TMP_DIR && tar -czf $output_dir/$filename . && cd - 51 | touch $output_dir/${filename}.done 52 | echo $output_dir/${filename} done 53 | -------------------------------------------------------------------------------- /aiarena/scripts/check_file_update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" -gt 2 ]; then 4 | echo "Usage: $0 [] []" 5 | exit 1 6 | fi 7 | 8 | file=${1-${CHECK_FILE_UPDATE_FILE-"/aiarena/logs/learner/train.log"}} 9 | max_time_diff=${2-${CHECK_FILE_UPDATE_MAX_TIME_DIFF-"300"}} 10 | 11 | if [ ! -e "$file" ]; then 12 | echo "File not found: $file" 13 | exit 2 14 | fi 15 | 16 | current_time=$(date +%s) 17 | file_mod_time=$(stat -c %Y "$file") 18 | time_diff=$((current_time - file_mod_time)) 19 | 20 | if [ $time_diff -gt ${max_time_diff} ]; then 21 | echo "The file ${file} has not been modified in the last ${max_time_diff} s." 22 | exit 3 23 | fi 24 | -------------------------------------------------------------------------------- /aiarena/scripts/kill_all.sh: -------------------------------------------------------------------------------- 1 | ps -eo "%p %a" | sed '/bash$/d' | awk '{print $1}' | sed '/^1$/d'|grep -v PID|xargs kill -s 9 2 | -------------------------------------------------------------------------------- /aiarena/scripts/learner/kill.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ps -ef | grep -E "sshd|train.py|modelpool|influx|grafana|check_and_send_checkpoint.py" | awk '{print $2}' | xargs kill -9 4 | 5 | process1=$(ps -ef | grep "train.py" | grep -v grep | wc -l) 6 | 7 | if [ $process1 -eq 0 ]; then 8 | exit 0 9 | else 10 | exit -1 11 | fi 12 | -------------------------------------------------------------------------------- /aiarena/scripts/learner/learner.iplist: -------------------------------------------------------------------------------- 1 | 127.0.0.1 root 36000 1 2 | -------------------------------------------------------------------------------- /aiarena/scripts/learner/parse_iplist.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import sys 3 | import time 4 | 5 | if __name__ == "__main__": 6 | src = sys.argv[1] 7 | dst = sys.argv[2] 8 | is_strict = int(sys.argv[3]) 9 | iplist = [] 10 | with open(src, "r") as f: 11 | for line in f.readlines(): 12 | line = line.rstrip() 13 | vec = line.split() 14 | hostname = vec[0] 15 | print(hostname) 16 | while True: 17 | try: 18 | ip = socket.gethostbyname(hostname) 19 | break 20 | except socket.error as error: 21 | print("ip not found: %s" % (hostname)) 22 | sys.stdout.flush() 23 | ip = hostname 24 | if not is_strict: 25 | break 26 | time.sleep(1) 27 | vec[0] = ip 28 | iplist.append(" ".join(vec).rstrip()) 29 | print(iplist) 30 | with open(dst, "w") as f: 31 | for ip in iplist: 32 | f.write("%s\n" % ip) 33 | -------------------------------------------------------------------------------- /aiarena/scripts/learner/ssh-copy-id.expect: -------------------------------------------------------------------------------- 1 | #!/usr/bin/expect -f 2 | set HOST [lindex $argv 0] 3 | set NEW_PASSWORD [lindex $argv 1] 4 | 5 | spawn ssh-copy-id -p 36001 $HOST 6 | expect "password:" 7 | send "$NEW_PASSWORD\n" 8 | expect eof 9 | -------------------------------------------------------------------------------- /aiarena/scripts/learner/start_monitor.sh: -------------------------------------------------------------------------------- 1 | function log(){ 2 | now=`date +"%Y-%m-%d %H:%M:%S"` 3 | echo "[$now] $1" 4 | } 5 | 6 | log Start service... 7 | 8 | # dev 场景不使用influxdb 9 | if [[ -n "$KAIWU_DEV" ]]; then 10 | "Disable influxdb" 11 | exit 0 12 | fi 13 | 14 | if [[ -z "$NOT_USE_INFLUXDB_EXPORTER" ]]; 15 | then 16 | nohup influxdb_exporter --web.listen-address=":8086" --udp.bind-address=":8086" > /dev/null 2>&1 & 17 | else 18 | pgrep influxdb 19 | if [ $? == 1 ];then 20 | nohup /usr/bin/influxd > /dev/null 2>&1 & 21 | while true; do 22 | lsof -i :8086 && break 23 | sleep 1 24 | done 25 | curl -i -XPOST http://localhost:8086/query --data-urlencode "q=CREATE DATABASE monitordb" 26 | fi 27 | 28 | pgrep grafana 29 | if [ $? == 1 ];then 30 | cd /usr/share/grafana 31 | nohup /usr/sbin/grafana-server --config=/etc/grafana/grafana.ini cfg:default.paths.provisioning=/etc/grafana/provisioning > /dev/null 2>&1 & 32 | fi 33 | fi 34 | 35 | log Complete! 36 | -------------------------------------------------------------------------------- /aiarena/scripts/start_dev.sh: -------------------------------------------------------------------------------- 1 | function log(){ 2 | now=`date +"%Y-%m-%d %H:%M:%S"` 3 | echo "[$now] $1" 4 | } 5 | export NO_ACTOR_MODEL_POOL=1 6 | 7 | LOG_DIR=/aiarena/logs/ 8 | mkdir -p $LOG_DIR 9 | 10 | log "---------------------------starting learner-----------------------------" 11 | cd /aiarena/scripts/learner 12 | nohup sh start_learner.sh >> $LOG_DIR/start_learner.log 2>&1 & 13 | 14 | log "---------------------------starting actor-----------------------------" 15 | cd /aiarena/scripts/actor 16 | nohup sh start_actor.sh >> $LOG_DIR/start_actor.log 2>&1 & 17 | -------------------------------------------------------------------------------- /aiarena/scripts/stop_dev.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ps -e f|grep -E "monitor_actor|influxdb|python|modelpool"| awk '{print $1}'|xargs kill -s 9 4 | 5 | -------------------------------------------------------------------------------- /archive/3v3/cppo/NetworkModel.py: -------------------------------------------------------------------------------- 1 | ../../../aiarena/3v3/learner/networkmodel/pytorch/NetworkModel.py -------------------------------------------------------------------------------- /dockerfile/dockerfile.base: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 as base 2 | 3 | RUN echo "dash dash/sh boolean false" | debconf-set-selections 4 | RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash 5 | 6 | RUN apt update && \ 7 | apt install -y wget && \ 8 | rm -rf /var/lib/apt/lists/* 9 | 10 | RUN wget -O /etc/apt/sources.list https://mirrors.tencent.com/repo/ubuntu20_sources.list 11 | 12 | # tzdata 13 | RUN apt update && \ 14 | echo -e "tzdata tzdata/Areas select Asia\ntzdata tzdata/Zones/Asia select Chongqing" | debconf-set-selections && \ 15 | DEBCONF_NONINTERACTIVE_SEEN=true DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata && \ 16 | rm -rf /var/lib/apt/lists/* 17 | 18 | # python 19 | RUN apt update && \ 20 | apt install -y python3 python3-pip python3-venv && \ 21 | rm -rf /var/lib/apt/lists/* 22 | 23 | RUN ln -sf $(which python3) /usr/bin/python 24 | 25 | ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.UTF-8 26 | RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel 27 | 28 | # sshd 29 | RUN apt update && \ 30 | apt install -y --no-install-recommends openssh-server expect telnet && \ 31 | rm -rf /var/lib/apt/lists/* 32 | 33 | RUN sed -i 's|^#*PasswordAuthentication.*|PasswordAuthentication yes|g' /etc/ssh/sshd_config && \ 34 | sed -i 's|^#*UsePAM.*|UsePAM no|g' /etc/ssh/sshd_config && \ 35 | sed -i 's|^#*PermitRootLogin.*|PermitRootLogin yes|g' /etc/ssh/sshd_config && \ 36 | sed -i 's|^#*Port.*|Port 36001|g' /etc/ssh/sshd_config && \ 37 | sed -i 's|^#* *StrictHostKeyChecking.*|StrictHostKeyChecking no|g' /etc/ssh/ssh_config && \ 38 | sed -i 's|^#* *UserKnownHostsFile.*|SUserKnownHostsFile=/dev/null|g' /etc/ssh/ssh_config 39 | 40 | RUN apt update && \ 41 | apt install -y net-tools tree rsync lrzsz && \ 42 | rm -rf /var/lib/apt/lists/* 43 | 44 | RUN apt update && \ 45 | apt install -y curl && \ 46 | rm -rf /var/lib/apt/lists/* 47 | 48 | RUN curl -LO https://github.com/neovim/neovim/releases/latest/download/nvim.appimage && \ 49 | chmod u+x nvim.appimage && \ 50 | ./nvim.appimage --appimage-extract && \ 51 | cp -r ./squashfs-root/usr/ / && \ 52 | rm -r squashfs-root nvim.appimage && \ 53 | ln -sf /usr/bin/nvim /usr/bin/vim 54 | 55 | RUN mkdir -p ~/.config/nvim/ && echo "set mouse=" >> ~/.config/nvim/init.vim 56 | -------------------------------------------------------------------------------- /dockerfile/dockerfile.base.cpu: -------------------------------------------------------------------------------- 1 | # py3.7 + cpu 2 | ARG BASE_IMAGE=common_base_py37 3 | FROM ${BASE_IMAGE} as cpu 4 | 5 | # tensorflow 6 | ARG TENSORFLOW_VERSION=1.15.5 7 | RUN pip3 install --no-cache-dir protobuf~=3.20 tensorflow==${TENSORFLOW_VERSION} 8 | RUN pip3 install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu 9 | 10 | RUN apt update && \ 11 | apt install -y cmake libopenmpi-dev build-essential && \ 12 | rm -rf /var/lib/apt/lists/* 13 | 14 | RUN HOROVOD_WITH_MPI=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITH_TENSORFLOW=1 pip3 install --no-cache-dir horovod==0.28.1 15 | -------------------------------------------------------------------------------- /dockerfile/dockerfile.base.gpu: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE=common_base_py38 2 | ARG BUILD_PYTHON_IMAGE=build_python_py38 3 | ARG NV_TENSORFLOW_VERSION=1.15.5+nv22.8 4 | 5 | FROM ${BUILD_PYTHON_IMAGE} as build_python 6 | 7 | FROM nvidia/cuda:11.6.2-devel-ubuntu20.04 as build_horovod 8 | 9 | RUN apt update && \ 10 | apt install -y wget && \ 11 | rm -rf /var/lib/apt/lists/* 12 | 13 | RUN wget -O /etc/apt/sources.list https://mirrors.tencent.com/repo/ubuntu20_sources.list 14 | 15 | ARG NV_TENSORFLOW_VERSION 16 | 17 | COPY --from=build_python /python/ / 18 | 19 | # python 20 | ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 PYTHONIOENCODING=UTF-8 LANG=C.UTF-8 21 | RUN python3 -m ensurepip --altinstall --upgrade && \ 22 | python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel 23 | 24 | RUN pip3 install --no-cache-dir nvidia-pyindex 25 | RUN pip3 install --no-cache-dir nvidia-tensorflow==${NV_TENSORFLOW_VERSION} 26 | RUN pip3 install --no-cache-dir torch --extra-index-url https://download.pytorch.org/whl/cu116 27 | 28 | # tzdata 29 | RUN apt update && \ 30 | echo -e "tzdata tzdata/Areas select Asia\ntzdata tzdata/Zones/Asia select Chongqing" | debconf-set-selections && \ 31 | DEBCONF_NONINTERACTIVE_SEEN=true DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata 32 | 33 | # openmpi 34 | RUN apt update && \ 35 | apt install -y libopenmpi-dev && \ 36 | rm -rf /var/lib/apt/lists/* 37 | 38 | RUN pip3 install --no-cache-dir cloudpickle psutil pyyaml cffi pycparser numpy~=1.23.0 39 | RUN export HOROVOD_NCCL_INCLUDE=$(python3 -c "import os; import nvidia.nccl.include; print(os.path.dirname(nvidia.nccl.include.__file__))") && \ 40 | export HOROVOD_NCCL_LIB=$(python3 -c "import os; import nvidia.nccl.lib; print(os.path.dirname(nvidia.nccl.lib.__file__))") && \ 41 | MPI_HOME=/usr/local/ HOROVOD_WITH_MPI=1 HOROVOD_NCCL_LINK=SHARED HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_WITH_PYTORCH=1 HOROVOD_WITH_TENSORFLOW=1 pip3 install horovod==0.25.0 -i https://mirrors.tencent.com/pypi/simple/ 42 | 43 | # py3.8 + gpu + nv-tf 1.15.5+nv22.8 44 | FROM ${BASE_IMAGE} as gpu 45 | 46 | ARG NV_TENSORFLOW_VERSION 47 | 48 | # nvidia-tensorflow 49 | RUN pip3 install --no-cache-dir nvidia-pyindex 50 | RUN pip3 install --no-cache-dir nvidia-dali-cuda110==1.16.0 51 | RUN pip3 install --no-cache-dir nvidia-cublas-cu11~=11.10 52 | RUN pip3 install --no-cache-dir nvidia-cusolver-cu11~=11.4 53 | RUN pip3 install --no-cache-dir nvidia-cuda-nvcc-cu11~=11.7 54 | RUN pip3 install --no-cache-dir nvidia-nccl-cu11~=2.12 55 | RUN pip3 install --no-cache-dir nvidia-curand-cu11~=10.2 56 | RUN pip3 install --no-cache-dir nvidia-cufft-cu11~=10.7 57 | RUN pip3 install --no-cache-dir nvidia-cudnn-cu11~=8.5 58 | RUN pip3 install --no-cache-dir nvidia-cusparse-cu11~=11.7 59 | RUN pip3 install --no-cache-dir nvidia-tensorflow==${NV_TENSORFLOW_VERSION} 60 | 61 | # pytorch 62 | RUN pip3 install --no-cache-dir torch --extra-index-url https://download.pytorch.org/whl/cu116 63 | 64 | # horovod 65 | RUN pip3 install --no-cache-dir cloudpickle psutil pyyaml cffi pycparser numpy~=1.23.0 66 | COPY --from=build_horovod /usr/lib/python3.8/site-packages/horovod /usr/lib/python3.8/site-packages/horovod 67 | COPY --from=build_horovod /usr/lib/python3.8/site-packages/horovod-0.25.0.dist-info /usr/lib/python3.8/site-packages/horovod-0.25.0.dist-info 68 | COPY --from=build_horovod /usr/bin/horovodrun /usr/bin/horovodrun 69 | 70 | # nccl 71 | RUN echo $(python3 -c "import os; import nvidia.nccl.lib; print(nvidia.nccl.lib.__path__._path[0])") > /etc/ld.so.conf.d/nccl.conf && ldconfig 72 | 73 | # libcudart.so.11.0 74 | RUN echo $(python3 -c "import os; import nvidia.cuda_runtime.lib; print(nvidia.cuda_runtime.lib.__path__._path[0])") > /etc/ld.so.conf.d/cuda_runtime.conf && ldconfig 75 | -------------------------------------------------------------------------------- /dockerfile/dockerfile.base.torch: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE=common_base_py38 2 | 3 | FROM ${BASE_IMAGE} as cpu 4 | RUN pip3 install --no-cache-dir torch==2.0.1 --index-url https://download.pytorch.org/whl/cpu 5 | 6 | FROM ${BASE_IMAGE} as gpu 7 | # torch 8 | RUN pip3 install --no-cache-dir nvidia-nvtx-cu11==11.7.91 9 | RUN pip3 install --no-cache-dir nvidia-nccl-cu11==2.14.3 10 | RUN pip3 install --no-cache-dir nvidia-cusparse-cu11==11.7.4.91 11 | RUN pip3 install --no-cache-dir nvidia-cublas-cu11==11.10.3.66 12 | RUN pip3 install --no-cache-dir nvidia-cusolver-cu11==11.4.0.1 13 | RUN pip3 install --no-cache-dir nvidia-curand-cu11==10.2.10.91 14 | RUN pip3 install --no-cache-dir nvidia-cufft-cu11==10.9.0.58 15 | RUN pip3 install --no-cache-dir nvidia-cudnn-cu11==8.5.0.96 16 | RUN pip3 install --no-cache-dir nvidia-cuda-cupti-cu11==11.7.101 17 | RUN pip3 install --no-cache-dir nvidia-cuda-runtime-cu11==11.7.99 18 | RUN pip3 install --no-cache-dir nvidia-cuda-nvrtc-cu11==11.7.99 19 | RUN pip3 install --no-cache-dir torch==2.0.1 20 | -------------------------------------------------------------------------------- /dockerfile/dockerfile.dev: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE=cpu_base 2 | ARG DEV_BASE=dev_base 3 | 4 | FROM ${BASE_IMAGE} as dev_base 5 | 6 | RUN wget https://github.com/prometheus/influxdb_exporter/releases/download/v0.10.0/influxdb_exporter-0.10.0.linux-amd64.tar.gz \ 7 | && tar -xf influxdb_exporter-0.10.0.linux-amd64.tar.gz \ 8 | && cp influxdb_exporter-0.10.0.linux-amd64/influxdb_exporter /usr/bin/ \ 9 | && rm -rf influxdb_exporter-0.10.0.linux-amd64* 10 | 11 | RUN apt update && \ 12 | apt install -y lsof && \ 13 | rm -rf /var/lib/apt/lists/* 14 | 15 | # Install monitor 16 | RUN wget -q -O /usr/share/keyrings/grafana.key https://packages.grafana.com/gpg.key && \ 17 | echo "deb [signed-by=/usr/share/keyrings/grafana.key] https://packages.grafana.com/oss/deb stable main" >> /etc/apt/sources.list.d/grafana.list && \ 18 | apt update && \ 19 | apt install -y grafana influxdb && \ 20 | rm -rf /var/lib/apt/lists/* && \ 21 | rm -rf /etc/apt/sources.list.d/grafana.list 22 | 23 | RUN pip3 install --no-cache-dir absl-py 24 | RUN pip3 install --no-cache-dir protobuf numpy 25 | # For aiarena 26 | RUN pip3 install --no-cache-dir pyyaml psutil h5py 27 | 28 | # rl_framework 29 | COPY ./rl_framework/ /rl_framework/ 30 | 31 | RUN cd /rl_framework/common && python3 setup.py bdist_wheel && pip3 install --no-cache-dir dist/*.whl && python3 setup.py clean --all && rm -rf dist/ 32 | RUN cd /rl_framework/learner && python3 setup.py bdist_wheel && pip3 install --no-cache-dir dist/*.whl && python3 setup.py clean --all && rm -rf dist/ 33 | RUN cd /rl_framework/mem_pool && python3 setup.py bdist_wheel && pip3 install --no-cache-dir dist/*.whl && python3 setup.py clean --all && rm -rf dist/ 34 | RUN cd /rl_framework/model_pool && python3 setup.py bdist_wheel && pip3 install --no-cache-dir dist/*.whl && python3 setup.py clean --all && rm -rf dist/ 35 | RUN cd /rl_framework/predictor && python3 setup.py bdist_wheel && pip3 install --no-cache-dir dist/*.whl && python3 setup.py clean --all && rm -rf dist/ 36 | 37 | RUN pip3 install --no-cache-dir --upgrade build && \ 38 | cd /rl_framework/monitor && python3 -m build && \ 39 | pip3 install --no-cache-dir dist/*.whl && rm -rf dist/ 40 | 41 | # hok_env 42 | COPY hok_env /hok_env 43 | RUN pip3 install --no-cache-dir -e /hok_env 44 | 45 | # training code 46 | COPY ./aiarena/remote-gc-server /rl_framework/remote-gc-server 47 | COPY ./aiarena/battle /aiarena/battle 48 | COPY ./aiarena/process /aiarena/process 49 | COPY ./aiarena/__init__.py /aiarena/ 50 | COPY ./aiarena/scripts /aiarena/scripts 51 | COPY ./aiarena/grafana/etc /etc/grafana 52 | COPY ./aiarena/grafana/dashboards /var/lib/grafana/dashboards 53 | 54 | WORKDIR / 55 | 56 | ENV GAMECORE_SERVER_BIND_ADDR=:23432 57 | ENV GAMECORE_SERVER_ADDR="127.0.0.1:23432" 58 | 59 | FROM dev_base as dev_base_v83 60 | ENV KAIWU_V83_GAMECORE=1 61 | 62 | # runtime /aiarena/code 3v3 dir 63 | FROM ${DEV_BASE} as code3v3 64 | ENV CAMP_DEFAULT_MODE=3v3 65 | COPY ./aiarena/3v3/ /aiarena/code/ 66 | 67 | # runtime /aiarena/code 1v1 dir 68 | FROM ${DEV_BASE} as code1v1 69 | ENV CAMP_DEFAULT_MODE=1v1 70 | COPY ./aiarena/1v1/ /aiarena/code/ 71 | 72 | FROM ${DEV_BASE} as battle 73 | COPY ./aiarena/1v1/ /aiarena/1v1/ 74 | COPY ./aiarena/3v3/ /aiarena/3v3/ 75 | -------------------------------------------------------------------------------- /dockerfile/dockerfile.gamecore: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE=cpu_base 2 | 3 | ARG INTERNAL_IMAGE=wine_base 4 | 5 | # Stage1: copy and unzip hok_env_gamecore.zip 6 | FROM ${BASE_IMAGE} as gamecore 7 | RUN apt update && \ 8 | apt install -y unzip && \ 9 | rm -rf /var/lib/apt/lists/* 10 | 11 | COPY ./hok_env_gamecore.zip ./ 12 | 13 | RUN mkdir -p =/rl_framework && \ 14 | unzip ./hok_env_gamecore.zip -d /rl_framework 15 | 16 | # Stage2-1: copy gamecore only 17 | FROM ${BASE_IMAGE} as gamecore_only 18 | COPY --from=gamecore /rl_framework/gamecore/ /rl_framework/gamecore/ 19 | 20 | # Stage2-2: install wine and copy gamecore/license.dat 21 | FROM ${BASE_IMAGE} as wine_base 22 | 23 | RUN dpkg --add-architecture i386 24 | 25 | COPY ./3rd/winehq.key /usr/share/keyrings/winehq-archive.key 26 | 27 | RUN echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/winehq-archive.key] https://mirrors.tuna.tsinghua.edu.cn/wine-builds/ubuntu/ focal main" >> /etc/apt/sources.list.d/winehq.list 28 | 29 | RUN apt update && \ 30 | apt install --install-recommends -y winehq-stable && \ 31 | rm -rf /var/lib/apt/lists/* 32 | 33 | ENV WINEDEBUG=-all 34 | 35 | COPY --from=gamecore /rl_framework/gamecore/ /rl_framework/gamecore/ 36 | COPY ./license.dat /rl_framework/gamecore/core_assets/ 37 | 38 | # Stage3: copy gamecore-server scripts to final image 39 | FROM ${INTERNAL_IMAGE} 40 | 41 | RUN pip3 install --no-cache-dir requests 42 | 43 | COPY ./aiarena/remote-gc-server /rl_framework/remote-gc-server 44 | RUN chmod a+x /rl_framework/gamecore/* /rl_framework/gamecore/bin/* 45 | ENV GAMECORE_PATH=/rl_framework/gamecore 46 | -------------------------------------------------------------------------------- /docs/hok_1v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/docs/hok_1v1.png -------------------------------------------------------------------------------- /docs/replay-tool.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/docs/replay-tool.gif -------------------------------------------------------------------------------- /docs/sgame_folder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/docs/sgame_folder.png -------------------------------------------------------------------------------- /hok_env/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include hok/hok3v3/lib * 2 | include hok/hok3v3/default_hero_config.json 3 | include hok/hok3v3/config.dat 4 | include hok/hok1v1/config.dat 5 | include hok/hok1v1/config.json 6 | include hok/hok1v1/default_hero_config.json 7 | include hok/hok1v1/lib/interface.cpython-36m-x86_64-linux-gnu.so 8 | include hok/hok1v1/lib/interface.cpython-37m-x86_64-linux-gnu.so 9 | include hok/hok1v1/lib/interface.cpython-38-x86_64-linux-gnu.so 10 | include hok/hok1v1/lib/interface.cpython-39-x86_64-linux-gnu.so 11 | include hok/hok1v1/unit_test/config.json 12 | -------------------------------------------------------------------------------- /hok_env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/__init__.py -------------------------------------------------------------------------------- /hok_env/env.yaml: -------------------------------------------------------------------------------- 1 | name: hok_env 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - _openmp_mutex=4.5=1_gnu 7 | - ca-certificates=2021.7.5=h06a4308_1 8 | - certifi=2021.5.30=py36h06a4308_0 9 | - ld_impl_linux-64=2.35.1=h7274673_9 10 | - libffi=3.3=he6710b0_2 11 | - libgcc-ng=9.3.0=h5101ec6_17 12 | - libgomp=9.3.0=h5101ec6_17 13 | - libstdcxx-ng=9.3.0=hd4cf53a_17 14 | - ncurses=6.2=he6710b0_1 15 | - openssl=1.1.1k=h27cfd23_0 16 | - pip=21.0.1=py36h06a4308_0 17 | - python=3.6.13=h12debd9_1 18 | - readline=8.1=h27cfd23_0 19 | - setuptools=52.0.0=py36h06a4308_0 20 | - sqlite=3.36.0=hc218d9a_0 21 | - tk=8.6.10=hbc83047_0 22 | - wheel=0.37.0=pyhd3eb1b0_0 23 | - xz=5.2.5=h7b6447c_0 24 | - zlib=1.2.11=h7b6447c_3 25 | - pip: 26 | - charset-normalizer==2.0.4 27 | - idna==3.2 28 | - numpy==1.19.5 29 | - protobuf==3.9.1 30 | - pyzmq==22.2.1 31 | - requests==2.26.0 32 | - six==1.16.0 33 | - urllib3==1.26.6 34 | 35 | -------------------------------------------------------------------------------- /hok_env/hok/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/__init__.py -------------------------------------------------------------------------------- /hok_env/hok/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/common/__init__.py -------------------------------------------------------------------------------- /hok_env/hok/common/log.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import sys 4 | 5 | from loguru import logger 6 | 7 | INFO = "INFO" 8 | CRITICAL = "CRITICAL" 9 | ERROR = "ERROR" 10 | WARNING = "WARNING" 11 | INFO = "INFO" 12 | DEBUG = "DEBUG" 13 | 14 | g_log_time = { 15 | "feature_process": [], 16 | "reward_process": [], 17 | "result_process": [], 18 | "predict_process": [], 19 | "aiprocess_process": [], 20 | "gamecore_process": [], 21 | "sample_manger_format_data": [], 22 | "send_data": [], 23 | "agent_process": [], 24 | "step": [], 25 | "save_sample": [], 26 | "one_frame": [], 27 | "one_episode": [], 28 | "reset": [], 29 | "step_af": [], 30 | # add new more 31 | } 32 | 33 | 34 | def setup_logger(filename=None, level=None): 35 | logger.remove() 36 | if filename: 37 | logger.add(sys.stdout, level=(level or "ERROR")) 38 | os.makedirs(os.path.dirname(filename), exist_ok=True) 39 | logger.add(filename, rotation="50 MB", level=(level or "INFO")) 40 | else: 41 | logger.add(sys.stdout, level=(level or "INFO")) 42 | 43 | 44 | # log_time 45 | def log_time(text): 46 | def decorator(func): 47 | def wrapper(*args, **kws): 48 | start = datetime.datetime.now() 49 | result = func(*args, **kws) 50 | end = datetime.datetime.now() 51 | time = (end - start).seconds * 1000.0 + (end - start).microseconds / 1000.0 52 | if g_log_time.get(text) is None: 53 | g_log_time[text] = [] 54 | g_log_time[text].append(time) 55 | return result 56 | 57 | return wrapper 58 | 59 | return decorator 60 | 61 | 62 | # log_time_func 63 | def log_time_func(text, end=False): 64 | if g_log_time.get(text) is None: 65 | g_log_time[text] = [] 66 | now = datetime.datetime.now() 67 | if len(g_log_time[text]) > 0: 68 | start = g_log_time[text][-1] 69 | if not isinstance(start, float): 70 | t = (now - start).seconds * 1000.0 + (now - start).microseconds / 1000.0 71 | g_log_time[text][-1] = t 72 | if not end: 73 | g_log_time[text].append(now) 74 | -------------------------------------------------------------------------------- /hok_env/hok/common/server_process.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import time 3 | import socket 4 | import os 5 | 6 | from hok.common.log import logger as LOG 7 | 8 | 9 | class ServerProcess: 10 | def __init__(self) -> None: 11 | self.proc = None 12 | self.addr, self.port = "", 0 13 | 14 | # Get server package and extract to server_path 15 | def _create_server_path(self, server, server_path, server_driver): 16 | # get dst_file 17 | dst_file = None 18 | if server_driver == "local_tar": 19 | dst_file = server 20 | elif server_driver == "url": 21 | os.makedirs(server_path, exist_ok=True) 22 | dst_file = os.path.join(server_path, "server.tgz") 23 | cmd = ["wget", "-O", dst_file, server] 24 | LOG.info(cmd) 25 | subprocess.run(cmd, env=os.environ, check=True) 26 | 27 | # decompress dst_file 28 | os.makedirs(server_path, exist_ok=True) 29 | cmd = ["tar", "-C", server_path, "-xf", dst_file] 30 | LOG.info(cmd) 31 | subprocess.run(cmd, env=os.environ, check=True) 32 | return server_path 33 | 34 | # Extract server files for latter start up 35 | def _extract_server_files(self, server, server_path, server_port, server_driver): 36 | if server_driver in ["local_tar", "url"]: 37 | server_path = self._create_server_path(server, server_path, server_driver) 38 | elif server_driver == "server": 39 | self.addr, self.port = server, server_port 40 | return None 41 | elif server_driver == "common_ai": 42 | self.addr, self.port = "", 0 43 | return None 44 | else: 45 | server_path = server 46 | 47 | return server_path 48 | 49 | # Start server process 50 | def start( 51 | self, 52 | server, 53 | server_path, 54 | server_port, 55 | server_log_path, 56 | server_driver, 57 | ): 58 | 59 | server_path = self._extract_server_files( 60 | server, server_path, server_port, server_driver 61 | ) 62 | if not server_path: 63 | return 64 | 65 | os.makedirs(os.path.dirname(server_log_path), exist_ok=True) 66 | full_cmd = [ 67 | "python", 68 | "code/actor/server.py", 69 | "--server_addr", 70 | "tcp://0.0.0.0:{}".format(server_port), 71 | ] 72 | LOG.info(server_path) 73 | LOG.info(full_cmd) 74 | 75 | # redirect sdtout/stderr to server_log_path 76 | f = open(server_log_path, "w") 77 | self.proc = subprocess.Popen( 78 | full_cmd, 79 | env=os.environ, 80 | stderr=subprocess.STDOUT, 81 | stdout=f, 82 | preexec_fn=os.setsid, 83 | bufsize=10240, 84 | cwd=server_path, 85 | ) 86 | self.addr, self.port = "127.0.0.1", server_port 87 | 88 | def get_server_addr(self): 89 | if self.addr and self.port: 90 | return (self.addr, self.port) 91 | return None 92 | 93 | def _test_connect(self, host, port): 94 | with socket.socket(socket.AF_INET) as s: 95 | try: 96 | s.connect((host, port)) 97 | except ConnectionRefusedError: 98 | return False 99 | return True 100 | 101 | def wait_server_started(self, timeout): 102 | if (not self.addr) or (not self.port): 103 | return 104 | 105 | end_time = time.time() + timeout 106 | while time.time() < end_time: 107 | if self._test_connect(self.addr, self.port): 108 | break 109 | if self.proc and self.proc.poll() is not None: 110 | break 111 | time.sleep(1) 112 | 113 | # Stop server process 114 | def stop(self): 115 | if not self.proc: 116 | return 117 | 118 | self.proc.kill() 119 | if self.proc.stdout: 120 | self.proc.stdout.close() 121 | if self.proc.stderr: 122 | self.proc.stderr.close() 123 | -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | # sys.path.append('.') 6 | hok_path = os.getenv("HOK_GAMECORE_PATH") 7 | cur_path = os.path.dirname(__file__) 8 | sys.path.append(cur_path + "/proto_king/") 9 | 10 | # sys.path.append(cur_path + '/lib/') 11 | from hok.hok1v1.env1v1 import HoK1v1 12 | -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/agent.py: -------------------------------------------------------------------------------- 1 | class AgentBase: 2 | def __init__(self) -> None: 3 | self.keep_latest = True 4 | 5 | self.player_id = None 6 | self.hero_camp = None 7 | 8 | self.hero_type = None 9 | self.is_latest_model = None 10 | self.agent_type = None 11 | 12 | def set_lstm_info(self, lstm_info): 13 | raise Exception("Not implemented") 14 | 15 | def get_lstm_info(self): 16 | raise Exception("Not implemented") 17 | 18 | def process(self, state_dict, battle=False): 19 | raise Exception("Not implemented") 20 | 21 | def reset(self, agent_type=None, model_path=None): 22 | raise Exception("Not implemented") 23 | 24 | def close(self): 25 | raise Exception("Not implemented") 26 | 27 | def set_game_info(self, hero_camp, player_id): 28 | self.hero_camp = hero_camp 29 | self.player_id = player_id 30 | -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/config.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok1v1/config.dat -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "reward_money": "0.006", 3 | "reward_exp": "0.006" , 4 | "reward_hp_point": "2.0", 5 | "reward_ep_rate": "0.75", 6 | "reward_kill": "-0.6", 7 | "reward_dead": "-1.0", 8 | "reward_tower_hp_point": "5.0", 9 | "reward_last_hit": "0.5", 10 | "log_level": "8" 11 | } -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/hero_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | default_hero_config_file = os.path.join( 5 | os.path.dirname(__file__), "default_hero_config.json" 6 | ) 7 | 8 | 9 | def get_default_hero_config(): 10 | default_hero_config = {} 11 | with open(default_hero_config_file) as f: 12 | data = json.load(f) 13 | for _hero_config in data: 14 | default_hero_config[_hero_config["hero_id"]] = _hero_config 15 | return default_hero_config 16 | -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok1v1/lib/__init__.py -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/lib/interface.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok1v1/lib/interface.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/lib/interface.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok1v1/lib/interface.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/lib/interface.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok1v1/lib/interface.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/lib/interface.cpython-39-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok1v1/lib/interface.cpython-39-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/server_delay.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import hok.hok1v1.lib.interface as interface 4 | 5 | from hok.hok1v1.server import AIServer as AIServerBase 6 | from hok.hok1v1.server import ResponceType, default_config_path 7 | 8 | LOG = logging.getLogger(__file__) 9 | 10 | 11 | class AIServer(AIServerBase): 12 | def __init__(self, agent, addr, config_path=default_config_path) -> None: 13 | super().__init__(agent, addr, config_path) 14 | self.last = {} 15 | 16 | def clear_game(self, sgame_id): 17 | super().clear_game(sgame_id) 18 | self.last.pop(sgame_id, None) 19 | 20 | def process(self): 21 | parse_state, sgame_id = self.lib_processor.RecvAIFrameState(self.addr) 22 | self._send_last(sgame_id) 23 | 24 | if ( 25 | parse_state != interface.PARSE_CONTINUE 26 | and parse_state != interface.PARSE_NONE_ACTION 27 | ): 28 | LOG.warn("recv failed: %s", parse_state) 29 | return 30 | 31 | req_pb = None 32 | if parse_state == interface.PARSE_CONTINUE: 33 | req_pb = self.lib_processor.GetAIFrameState(sgame_id) 34 | if req_pb is None: 35 | LOG.warn("GetAIFrameState failed") 36 | return 37 | 38 | ret = self.lib_processor.FeatureProcess(parse_state, sgame_id) 39 | # Failed, return no action 40 | if ret[0] == 0: 41 | LOG.error("step failed: {}".format(ret[1])) 42 | return 43 | if ret[0] == 1: 44 | LOG.error("Parsing gameover information, receive msg again!") 45 | return 46 | elif ret[0] == 2: 47 | # continue to result_process 48 | # SEND_CCD_ONE_HERO, get normal feature vector, break 49 | state = self._state_tuple2np(ret[1:], req_pb.hero_list[0].config_id)[0] 50 | state["req_pb"] = req_pb 51 | state["sgame_id"] = sgame_id 52 | elif ret[0] == 3 or ret[0] == 4 or ret[0] == 5: 53 | # SEND_CCD_FIVE_HERO 54 | if ret[0] == 3: 55 | self._put_empty_rsp(sgame_id) 56 | elif ret[0] == 4: 57 | self._put(ResponceType.NONE, -1, sgame_id) 58 | elif ret[0] == 5: 59 | # 初始化随机动作 60 | self._put(ResponceType.CACHED, int(ret[1]), sgame_id) 61 | return 62 | else: 63 | LOG.error("Unexpected return value: {}".format(ret[0])) 64 | return 65 | 66 | if req_pb.gameover: 67 | LOG.info("game done: {}, {}".format(sgame_id, req_pb.frame_no)) 68 | # 释放旧sgame_id的资源 69 | self.clear_game(sgame_id) 70 | else: 71 | self._restore_lstm_info(self.agent, sgame_id) 72 | _, d_action, _ = self.agent.process(state) 73 | self._save_lstm_info(self.agent, sgame_id) 74 | rp_actions = self._format_actions([d_action]) 75 | if not rp_actions: 76 | return 77 | 78 | ret_code, resp_id = self.lib_processor.ResultProcess(rp_actions, sgame_id) 79 | if ret_code != interface.PROCESS_ACTION_SUCCESS: 80 | LOG.warn("process action failed: {}".format(ret_code)) 81 | return 82 | 83 | self._put(ResponceType.CACHED, resp_id, sgame_id) 84 | 85 | def _put_empty_rsp(self, sgame_id): 86 | return self._put(ResponceType.DEFAULT, -1, sgame_id) 87 | 88 | def _put(self, send_type, msg_id, sgame_id): 89 | self.last[sgame_id] = (send_type, msg_id, sgame_id) 90 | 91 | def _send_last(self, sgame_id): 92 | last = self.last.pop(sgame_id, None) 93 | if not last: 94 | # 无缓存的上一次预测结果 95 | self._send_empty_rsp() 96 | return 97 | 98 | send_type, msg_id, sgame_id = last 99 | ret = self._send(send_type, msg_id, sgame_id) 100 | if not ret: 101 | LOG.warn("send failed, send empty rsp: {}".format(ret)) 102 | self._send_empty_rsp() 103 | -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/unit_test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok1v1/unit_test/__init__.py -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/unit_test/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "log_level": "8" 3 | } -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/unit_test/test_env.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import numpy as np 5 | from hok.hok1v1 import HoK1v1 6 | 7 | 8 | def _generate_legal_action(env, states, common_ai): 9 | actions = [] 10 | shapes = env.action_space() 11 | 12 | split_array = shapes.copy()[:-1] 13 | for i in range(1, len(split_array)): 14 | split_array[i] = split_array[i - 1] + split_array[i] 15 | 16 | for i in range(2): 17 | if common_ai[i]: 18 | actions.append(tuple([0] * 6)) 19 | continue 20 | legal_action = np.split(states[i]["legal_action"], split_array) 21 | # print("legal_action", i, legal_action[0]) 22 | act = [] 23 | for j, _ in enumerate(shapes): 24 | tmp = [] 25 | for k, la in enumerate(legal_action[j]): 26 | if la == 1: 27 | tmp.append(k) 28 | a = random.randint(0, len(tmp) - 1) 29 | # print("for act id {}, avialiable action is {}".format(j, tmp)) 30 | a = tmp[a] 31 | act.append(a) 32 | if j == 0: 33 | if legal_action[0][8]: 34 | act[0] = 8 35 | a = 8 36 | legal_action[5] = legal_action[5].reshape(-1, shapes[-1])[a] 37 | 38 | actions.append(tuple(act)) 39 | return actions 40 | 41 | 42 | def test_send_action(env, common_ai, eval, camp_config): 43 | print("======= test_send_action") 44 | print("camp_config", camp_config) 45 | print("common_ai", common_ai) 46 | print("try to get first state...") 47 | obs, reward, done, state = env.reset( 48 | camp_config, use_common_ai=common_ai, eval=eval 49 | ) 50 | if common_ai[0]: 51 | print("first state: ", state[1].keys()) 52 | else: 53 | print("first state: ", state[0].keys()) 54 | i = 0 55 | print("first frame:", env.cur_frame_no) 56 | 57 | while True: 58 | if i % 100 == 0: 59 | print("----------------------run step ", i) 60 | actions = _generate_legal_action(env, state, common_ai) 61 | obs, reward, done, state = env.step(actions) 62 | if done[0] or done[1]: 63 | break 64 | i += 1 65 | # if i > 10: 66 | # break 67 | env.close_game() 68 | print(state) 69 | 70 | 71 | if __name__ == "__main__": 72 | from hok.common.gamecore_client import GamecoreClient 73 | from hok.hok1v1.env1v1 import interface_default_config 74 | from hok.hok1v1.hero_config import get_default_hero_config 75 | import hok.hok1v1.lib.interface as interface 76 | 77 | lib_processor = interface.Interface() 78 | lib_processor.Init(interface_default_config) 79 | 80 | # please replace the *AI_SERVER_ADDR* with your ip address. 81 | GC_SERVER_ADDR = os.getenv("GAMECORE_SERVER_ADDR", "127.0.0.1:23432") 82 | AI_SERVER_ADDR = os.getenv("AI_SERVER_ADDR", "127.0.0.1") 83 | gamecore_req_timeout = 3000 84 | 85 | print(GC_SERVER_ADDR, AI_SERVER_ADDR) 86 | 87 | AGENT_NUM = 2 88 | addrs = [] 89 | for i in range(AGENT_NUM): 90 | addrs.append("tcp://0.0.0.0:{}".format(35150 + i)) 91 | 92 | game_launcher = GamecoreClient( 93 | server_addr=GC_SERVER_ADDR, 94 | gamecore_req_timeout=gamecore_req_timeout, 95 | default_hero_config=get_default_hero_config(), 96 | ) 97 | 98 | env = HoK1v1( 99 | "test-env", 100 | game_launcher, 101 | lib_processor, 102 | addrs, 103 | aiserver_ip=AI_SERVER_ADDR, 104 | ) 105 | 106 | from hok.common.camp import HERO_DICT, camp_iterator_1v1_roundrobin_camp_heroes 107 | 108 | camp_iter = camp_iterator_1v1_roundrobin_camp_heroes(HERO_DICT.values()) 109 | camp_config = next(camp_iter) 110 | 111 | test_send_action(env, common_ai=[False, True], eval=False, camp_config=camp_config) 112 | -------------------------------------------------------------------------------- /hok_env/hok/hok1v1/version.py: -------------------------------------------------------------------------------- 1 | __all__ = ["__version__", "get_version", "GAMECORE_VERSION"] 2 | 3 | version_info = (45, 1, 5) 4 | GAMECORE_VERSION = "v45_1450123" 5 | 6 | 7 | def get_version(): 8 | """Returns the version as a human-format string.""" 9 | return "%d.%d.%d" % version_info 10 | 11 | 12 | __version__ = get_version() 13 | -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | hok_path = os.getenv("HOK_GAMECORE_PATH") 6 | cur_path = os.path.dirname(__file__) 7 | sys.path.append(cur_path) 8 | sys.path.append(cur_path + "/lib/") 9 | 10 | CONFIG_DAT = os.path.join(cur_path, "config.dat") 11 | -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/config.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok3v3/config.dat -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/env.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | 5 | from hok.common.log import log_time 6 | from hok.common.log import logger as LOG 7 | 8 | 9 | class Environment: 10 | def __init__( 11 | self, 12 | aiservers, 13 | lib_processor, 14 | game_launcher, 15 | runtime_id, 16 | wait_game_max_timeout=30, 17 | aiserver_ip="127.0.0.1", 18 | ): 19 | self.aiservers = aiservers 20 | self.lib_processor = lib_processor 21 | self.game_launcher = game_launcher 22 | self.runtime_id = runtime_id 23 | self.wait_game_max_timeout = wait_game_max_timeout 24 | self.aiserver_ip = aiserver_ip 25 | 26 | self.cur_sgame_ids = [] 27 | 28 | @log_time("feature_process") 29 | def step_feature(self, agent_id): 30 | return self.aiservers[agent_id].recv_and_feature_process() 31 | 32 | @log_time("result_process") 33 | def step_action(self, agent_id, probs, features, frame_state): 34 | return self.aiservers[agent_id].result_process(probs, features, frame_state) 35 | 36 | def close_game(self, force=False): 37 | if not force: 38 | # wait game over 39 | self.game_launcher.wait_game(self.runtime_id, self.wait_game_max_timeout) 40 | 41 | # force close 42 | self.game_launcher.stop_game(self.runtime_id) 43 | 44 | def _get_server(self, use_common_ai): 45 | return [ 46 | None 47 | if use_common_ai[i] 48 | else (self.aiserver_ip, int(server.addr.split(":")[-1])) 49 | for i, server in enumerate(self.aiservers) 50 | ] 51 | 52 | # TODO gym style 53 | def reset( 54 | self, use_common_ai, camp_hero_list, eval_mode=False, extra_abs_key_info=None 55 | ): 56 | """ 57 | extra_abs_key_info: 见start_game的extra_abs_key_info字段 58 | """ 59 | LOG.debug("reset env") 60 | # reset infos 61 | self.lib_processor.Reset(self.cur_sgame_ids) 62 | self.cur_sgame_ids.clear() 63 | 64 | # eval_mode 65 | self.lib_processor.SetEvalMode(eval_mode) 66 | 67 | # stop game & start server & start game 68 | self.game_launcher.stop_game(self.runtime_id) 69 | 70 | for i, is_common_ai in enumerate(use_common_ai): 71 | if is_common_ai: 72 | continue 73 | self.aiservers[i].start() 74 | 75 | self.game_launcher.start_game( 76 | self.runtime_id, 77 | self._get_server(use_common_ai), 78 | camp_hero_list, 79 | eval_mode=eval_mode, 80 | extra_abs_key_info=extra_abs_key_info, 81 | ) 82 | 83 | # process first frame 84 | for i, is_common_ai in enumerate(use_common_ai): 85 | LOG.info(f"Reset info: agent:{i} is_common_ai:{is_common_ai}") 86 | if is_common_ai: 87 | continue 88 | continue_process, features, frame_state = self.aiservers[ 89 | i 90 | ].recv_and_feature_process() 91 | self.cur_sgame_ids.append(frame_state.sgame_id) 92 | 93 | if continue_process: 94 | self.aiservers[i].send_empty_rsp() 95 | -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/hero_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | default_hero_config_file = os.path.join( 5 | os.path.dirname(__file__), "default_hero_config.json" 6 | ) 7 | if os.getenv("KAIWU_V83_GAMECORE") == "1": 8 | interface_default_config = os.path.join(os.path.dirname(__file__), "config.v83.dat") 9 | else: 10 | interface_default_config = os.path.join(os.path.dirname(__file__), "config.dat") 11 | 12 | 13 | def get_default_hero_config(): 14 | default_hero_config = {} 15 | with open(default_hero_config_file) as f: 16 | data = json.load(f) 17 | for _hero_config in data: 18 | default_hero_config[_hero_config["hero_id"]] = _hero_config 19 | return default_hero_config 20 | -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok3v3/lib/__init__.py -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/lib/lib3v3.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok3v3/lib/lib3v3.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/lib/lib3v3.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok3v3/lib/lib3v3.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/lib/lib3v3.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok3v3/lib/lib3v3.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/lib/lib3v3.cpython-39-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok3v3/lib/lib3v3.cpython-39-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/lib/libinterface.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok3v3/lib/libinterface.so -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/unit_test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/hok_env/hok/hok3v3/unit_test/__init__.py -------------------------------------------------------------------------------- /hok_env/hok/hok3v3/unit_test/test_env.py: -------------------------------------------------------------------------------- 1 | import os 2 | from numpy.random import rand 3 | from hok.hok3v3.lib import lib3v3 as interface 4 | from hok.hok3v3.hero_config import get_default_hero_config, interface_default_config 5 | from hok.hok3v3.server import AIServer 6 | from hok.hok3v3.reward import update_reward_config, RewardConfig 7 | from hok.common.gamecore_client import GamecoreClient 8 | from hok.hok3v3.env import Environment 9 | from hok.common.log import setup_logger 10 | from hok.common.log import logger as LOG 11 | 12 | 13 | def get_hok3v3(gamecore_server_addr, aiserver_ip, reward_config): 14 | LOG.info(f"Init libprocessor: {interface_default_config}") 15 | LOG.info(f"Init reward: {reward_config}") 16 | LOG.info(f"Init gamecore environment: {gamecore_server_addr} {aiserver_ip}") 17 | 18 | lib_processor = interface.Interface() 19 | lib_processor.Init(interface_default_config) 20 | 21 | update_reward_config(lib_processor, reward_config) 22 | 23 | aiservers = [] 24 | for i in range(2): 25 | addr = f"tcp://0.0.0.0:{35150 + i}" 26 | aiservers.append(AIServer(addr, lib_processor)) 27 | 28 | game_launcher = GamecoreClient( 29 | server_addr=gamecore_server_addr, 30 | gamecore_req_timeout=30000, 31 | default_hero_config=get_default_hero_config(), 32 | max_frame_num=20000, 33 | ) 34 | 35 | env = Environment( 36 | aiservers, 37 | lib_processor, 38 | game_launcher, 39 | runtime_id="test-env", 40 | aiserver_ip=aiserver_ip, 41 | ) 42 | return env 43 | 44 | 45 | def random_predict(features, frame_state): 46 | _ = features, frame_state 47 | 48 | pred_ret_shape = [(1, 162)] * 3 49 | pred_ret = [] 50 | for shape in pred_ret_shape: 51 | pred_ret.append(rand(*shape).astype("float32")) 52 | return pred_ret 53 | 54 | 55 | def run_test(): 56 | setup_logger() 57 | GC_SERVER_ADDR = os.getenv("GAMECORE_SERVER_ADDR", "127.0.0.1:23432") 58 | # please replace the *AI_SERVER_ADDR* with your ip address. 59 | AI_SERVER_ADDR = os.getenv("AI_SERVER_ADDR", "127.0.0.1") 60 | reward_config = RewardConfig.default_reward_config.copy() 61 | 62 | env = get_hok3v3(GC_SERVER_ADDR, AI_SERVER_ADDR, reward_config) 63 | 64 | use_common_ai = [True, False] 65 | camp_config = { 66 | "mode": "3v3", 67 | "heroes": [ 68 | [{"hero_id": 190}, {"hero_id": 173}, {"hero_id": 117}], 69 | [{"hero_id": 141}, {"hero_id": 111}, {"hero_id": 107}], 70 | ], 71 | } 72 | env.reset(use_common_ai, camp_config, eval_mode=True) 73 | 74 | gameover = False 75 | cnt = 0 76 | while not gameover and cnt <= 200: 77 | if cnt % 100 == 0: 78 | LOG.info(f"----------------------run step {cnt}") 79 | cnt += 1 80 | 81 | for i, is_comon_ai in enumerate(use_common_ai): 82 | if is_comon_ai: 83 | continue 84 | 85 | continue_process, features, frame_state = env.step_feature(i) 86 | gameover = frame_state.gameover 87 | if not continue_process: 88 | continue 89 | 90 | probs = random_predict(features, frame_state) 91 | ok, results = env.step_action(i, probs, features, frame_state) 92 | if not ok: 93 | raise Exception("step action failed") 94 | 95 | env.close_game(force=True) 96 | 97 | 98 | if __name__ == "__main__": 99 | run_test() 100 | -------------------------------------------------------------------------------- /hok_env/hok/version.py: -------------------------------------------------------------------------------- 1 | __all__ = ["__version__", "get_version", "GAMECORE_VERSION"] 2 | 3 | version_info = (45, 1, 5) 4 | GAMECORE_VERSION = "v45_1450123" 5 | 6 | 7 | def get_version(): 8 | """Returns the version as a human-format string.""" 9 | return "%d.%d.%d" % version_info 10 | 11 | 12 | __version__ = get_version() 13 | -------------------------------------------------------------------------------- /hok_env/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools", 4 | "wheel", 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | 8 | [project] 9 | name = "hok" 10 | version = "2.0.4" 11 | description = "Honor of Kings: A MOBA game environment for multi-agent reinforcement learning." 12 | requires-python = ">=3.6, <=3.9" 13 | readme = "README.md" 14 | 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | ] 18 | 19 | dependencies = [ 20 | "requests", 21 | "numpy", 22 | "loguru" 23 | ] 24 | 25 | [project.urls] 26 | "Homepage" = "https://github.com/tencent-ailab/hok_env" 27 | "Bug Tracker" = "https://github.com/tencent-ailab/hok_env/issues" 28 | -------------------------------------------------------------------------------- /hok_env/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import sys, os.path 3 | 4 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "hok")) 5 | from version import __version__ 6 | 7 | # Environment-specific dependencies. 8 | extras = {} 9 | 10 | setup( 11 | name="hok", 12 | version=__version__, 13 | description="Honor of Kings: A MOBA game environment for multi-agent reinforcement learning.", 14 | packages=find_packages(), 15 | include_package_data=True, 16 | zip_safe=False, 17 | python_requires=">=3.6, <3.10", 18 | classifiers=[ 19 | "Programming Language :: Python :: 3", 20 | 'Programming Language :: Python :: 3.7', 21 | ], 22 | ) 23 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/common/rl_framework/__init__.py -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/__init__.py: -------------------------------------------------------------------------------- 1 | from .lib_socket.utils import get_host_ip 2 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/common/rl_framework/common/algorithms/__init__.py -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/algorithms/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/common/rl_framework/common/algorithms/base/__init__.py -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/algorithms/base/algorithm.py: -------------------------------------------------------------------------------- 1 | class Algorithm(object): 2 | def __init__(self, model): 3 | self.model = model 4 | 5 | def build_graph(self, datas, update): 6 | self.update = update 7 | self.model.inference(datas) 8 | self._calculate_loss() 9 | 10 | def get_optimizer(self): 11 | raise NotImplementedError("get optimizer: not implemented") 12 | 13 | def _calculate_loss(self): 14 | raise NotImplementedError("calculate loss: not implemented") 15 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/algorithms/base/model.py: -------------------------------------------------------------------------------- 1 | class Model(object): 2 | def __init__(self, *args): 3 | pass 4 | 5 | def inference(self, feature): 6 | raise NotImplementedError("build model: not implemented!") 7 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/lib_socket/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/common/rl_framework/common/lib_socket/__init__.py -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/lib_socket/tcp_socket.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import struct 3 | import socket 4 | import logging 5 | import time 6 | 7 | 8 | class TcpSocket: 9 | def __init__(self, ip, port): 10 | self.ip = ip 11 | self.port = int(port) 12 | self.sock = None 13 | self._connect(self.ip, self.port) 14 | 15 | def _connect(self, ip, port): 16 | address = (ip, port) 17 | logging.info("address:%s" % str(address)) 18 | while True: 19 | try: 20 | if self.sock: 21 | self.sock.close() 22 | self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 23 | self.sock.connect(address) 24 | return True 25 | except Exception as e: # pylint: disable=broad-except 26 | logging.error("connect failed, address:%s, except:%s" % (address, e)) 27 | time.sleep(1) 28 | 29 | def _send_all(self, request): 30 | try: 31 | _ = self.sock.sendall(request) 32 | return True 33 | except Exception as e: # pylint: disable=broad-except 34 | logging.error("send failed, except:%s" % e) 35 | return False 36 | 37 | def _recv_all(self, recv_len): 38 | recved_len = 0 39 | recv_data = b"" 40 | while recved_len < recv_len: 41 | try: 42 | data = self.sock.recv(recv_len - recved_len) 43 | except Exception as e: # pylint: disable=broad-except 44 | logging.error("recv failed, except:%s" % e) 45 | return False, None 46 | if data == b"": 47 | logging.error("recv failed, data is empty") 48 | return False, None 49 | recv_data = recv_data + data 50 | recved_len += len(data) 51 | 52 | if recved_len != recv_len: 53 | logging.error("recv failed, recved_len != recv_len") 54 | return False, recv_data 55 | else: 56 | return True, recv_data 57 | 58 | def syn_send_recv(self, request): 59 | ret = True 60 | while True: 61 | # check status 62 | if not ret: 63 | logging.error("conn is error, try to reconnect") 64 | self._connect(self.ip, self.port) 65 | time.sleep(1) 66 | 67 | # send request 68 | ret = self._send_all(request) 69 | if not ret: 70 | logging.error("_send_all failed") 71 | continue 72 | 73 | # recv header 74 | head_length = 4 75 | ret, recv_data = self._recv_all(head_length) 76 | if not ret: 77 | logging.error("_recv_all data_len failed") 78 | continue 79 | 80 | # recv proto_data 81 | total_len = struct.unpack("I", recv_data)[0] 82 | total_len = socket.ntohl(total_len) 83 | if total_len - head_length > 0: 84 | ret, proto_data = self._recv_all(total_len - head_length) 85 | recv_data += proto_data 86 | if not ret: 87 | logging.error("_recv_all data failed") 88 | continue 89 | 90 | return recv_data 91 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/lib_socket/utils.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | 4 | def get_host_ip(): 5 | try: 6 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 7 | s.connect(("8.8.8.8", 80)) 8 | ip = s.getsockname()[0] 9 | finally: 10 | s.close() 11 | return ip 12 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/lib_socket/zmq_socket.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import zmq 4 | 5 | 6 | class ZmqSocket: 7 | def __init__(self, ip_port, sock_type="client"): 8 | self.ip_port = ip_port 9 | self.timeout = 1000 * 30 # ms 10 | self.context = zmq.Context() 11 | self.socket = None 12 | self.poller_send = zmq.Poller() 13 | self.poller_recv = zmq.Poller() 14 | self._connect() 15 | 16 | def _connect(self): 17 | if self.socket: 18 | self.socket.setsockopt(zmq.LINGER, 0) 19 | self.socket.close() 20 | self.poller_send.unregister(self.socket) 21 | self.poller_recv.unregister(self.socket) 22 | self.socket = self.context.socket(zmq.REQ) 23 | self.socket.connect(self.ip_port) 24 | self.poller_send.register(self.socket, zmq.POLLOUT) 25 | self.poller_recv.register(self.socket, zmq.POLLIN) 26 | 27 | def syn_send_recv(self, message): 28 | while True: 29 | if self.poller_send.poll(self.timeout): 30 | self.socket.send(message) 31 | else: 32 | logging.error("send timeout, try to reconnect") 33 | self._connect() 34 | continue 35 | 36 | if self.poller_recv.poll(self.timeout): 37 | data = self.socket.recv() 38 | break 39 | else: 40 | logging.error("recv timeout, try to reconnect") 41 | self._connect() 42 | continue 43 | return data 44 | 45 | def syn_recv_send(self, message): 46 | msg = self.syn_recv() 47 | self.syn_send(message) 48 | return msg 49 | 50 | def syn_recv(self): 51 | while True: 52 | socks = self.poller_recv.poll(self.timeout) 53 | # print(socks, type(socks)) 54 | if socks: 55 | data = self.socket.recv() 56 | break 57 | else: 58 | logging.error("recv timeout, try to reconnect") 59 | self._connect() 60 | return data 61 | 62 | def syn_send(self, message): 63 | while True: 64 | socks = self.poller_send.poll(self.timeout) 65 | # print(socks, type(socks)) 66 | if socks: 67 | self.socket.send(message) 68 | break 69 | else: 70 | logging.error("send timeout, try to reconnect") 71 | self._connect() 72 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/logging/__init__.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import sys 4 | 5 | from loguru import logger 6 | 7 | INFO = "INFO" 8 | CRITICAL = "CRITICAL" 9 | ERROR = "ERROR" 10 | WARNING = "WARNING" 11 | INFO = "INFO" 12 | DEBUG = "DEBUG" 13 | 14 | g_log_time = { 15 | "feature_process": [], 16 | "reward_process": [], 17 | "result_process": [], 18 | "predict_process": [], 19 | "aiprocess_process": [], 20 | "gamecore_process": [], 21 | "sample_manger_format_data": [], 22 | "send_data": [], 23 | "agent_process": [], 24 | "step": [], 25 | "save_sample": [], 26 | "one_frame": [], 27 | "one_episode": [], 28 | "reset": [], 29 | "step_af": [], 30 | # add new more 31 | } 32 | 33 | 34 | def setup_logger(filename=None, level=None): 35 | logger.remove() 36 | if filename: 37 | logger.add(sys.stdout, level=(level or "ERROR")) 38 | os.makedirs(os.path.dirname(filename), exist_ok=True) 39 | logger.add(filename, rotation="50 MB", level=(level or "INFO")) 40 | else: 41 | logger.add(sys.stdout, level=(level or "INFO")) 42 | 43 | 44 | # log_time 45 | def log_time(text): 46 | def decorator(func): 47 | def wrapper(*args, **kws): 48 | start = datetime.datetime.now() 49 | result = func(*args, **kws) 50 | end = datetime.datetime.now() 51 | time = (end - start).seconds * 1000.0 + (end - start).microseconds / 1000.0 52 | if g_log_time.get(text) is None: 53 | g_log_time[text] = [] 54 | g_log_time[text].append(time) 55 | return result 56 | 57 | return wrapper 58 | 59 | return decorator 60 | 61 | 62 | # log_time_func 63 | def log_time_func(text, end=False): 64 | if g_log_time.get(text) is None: 65 | g_log_time[text] = [] 66 | now = datetime.datetime.now() 67 | if len(g_log_time[text]) > 0: 68 | start = g_log_time[text][-1] 69 | if not isinstance(start, float): 70 | t = (now - start).seconds * 1000.0 + (now - start).microseconds / 1000.0 71 | g_log_time[text][-1] = t 72 | if not end: 73 | g_log_time[text].append(now) 74 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/stat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/common/rl_framework/common/stat/__init__.py -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/stat/sys_stat.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import psutil 4 | import numpy as np 5 | 6 | 7 | class SysStats: 8 | def __init__(self) -> None: 9 | self.network_sent_MB = None 10 | self.network_recv_MB = None 11 | self.last_network_sent = None 12 | self.last_network_recv = None 13 | 14 | @staticmethod 15 | def cpu_usage(): 16 | cpu_usages = psutil.cpu_percent(percpu=True) 17 | return np.mean(cpu_usages), np.sum(cpu_usages) 18 | 19 | @staticmethod 20 | def cpu_count(): 21 | return psutil.cpu_count() 22 | 23 | @staticmethod 24 | def total_memory_GB(): 25 | mem = psutil.virtual_memory() 26 | return float(mem.total) / (1024 ** 3) 27 | 28 | @staticmethod 29 | def memory_usage_GB(): 30 | mem = psutil.virtual_memory() 31 | return float(mem.used) / (1024 ** 3) 32 | 33 | @staticmethod 34 | def network_stats(duration_sec): 35 | network_sent = 0 36 | network_recv = 0 37 | 38 | curr_network_sent = int( 39 | psutil.net_io_counters()[0] / (1024 ** 2) 40 | ) # 上传的数据总量(MB) 41 | curr_network_recv = int( 42 | psutil.net_io_counters()[1] / (1024 ** 2) 43 | ) # 下载的数据总量(MB) 44 | 45 | if self.last_network_sent: 46 | self.network_sent = curr_network_sent - self.last_network_sent 47 | network_sent = self.network_sent / duration_sec 48 | 49 | if self.last_network_recv: 50 | self.network_recv = curr_network_recv - self.last_network_recv 51 | self.last_network_recv = curr_network_recv 52 | network_recv = self.network_recv / duration_sec 53 | 54 | self.last_network_sent = curr_network_sent 55 | self.last_network_recv = curr_network_recv 56 | 57 | return network_sent, network_recv 58 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/stat/sys_stat_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import unittest 4 | 5 | 6 | class SysStatTest(unittest.TestCase): 7 | def setUp(self) -> None: 8 | pass 9 | 10 | def test_sys_stat(self): 11 | pass 12 | 13 | 14 | if __name__ == "__main__": 15 | unittest.main() 16 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/common/rl_framework/common/utils/__init__.py -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/utils/cmd_argparser.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import argparse 4 | 5 | 6 | def proc_val(value): 7 | return value 8 | 9 | 10 | # aisrv args 11 | def _add_aisrv_args(parser): 12 | parser.add_argument("--job_master_addr", help="job_master_addr") 13 | parser.add_argument("--actor_addrs", type=str, help="actor ip_addrs with port") 14 | 15 | 16 | # actor args 17 | def _add_actor_args(parser): 18 | parser.add_argument("--job_master_addr", help="job_master_addr") 19 | 20 | 21 | # learner args 22 | def _add_learner_args(parser): 23 | parser.add_argument("--job_master_addr", help="job_master_addr") 24 | 25 | 26 | def cmd_args_parse(svr_name): 27 | parser = argparse.ArgumentParser( 28 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, allow_abbrev=False 29 | ) 30 | parser.add_argument("--conf", default="conf/%s.json" % svr_name, help="config file") 31 | 32 | if svr_name == "aisrv": 33 | _add_aisrv_args(parser) 34 | elif svr_name == "actor": 35 | _add_actor_args(parser) 36 | elif svr_name == "learner": 37 | _add_learner_args(parser) 38 | else: 39 | RuntimeError("illegal server name %s" % svr_name) 40 | 41 | args, unknowns = parser.parse_known_args() 42 | for key, value in zip(*[iter(unknowns)] * 2): 43 | key = key.lstrip("-") 44 | value = proc_val(value) 45 | setattr(args, key, value) 46 | 47 | return args 48 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/utils/cmd_argparser_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import sys 4 | import unittest 5 | from unittest.mock import patch 6 | from rl_framework.common.utils.cmd_argparser import cmd_args_parse 7 | 8 | 9 | class CmdArgParserTest(unittest.TestCase): 10 | def setUp(self) -> None: 11 | pass 12 | 13 | def test_aisrv_args(self): 14 | testargs = ["proc", "--actor_adress", "0.0.0.0:8000"] 15 | 16 | with patch.object(sys, "argv", testargs): 17 | args = cmd_args_parse("aisrv") 18 | print(args.actor_adress) 19 | 20 | 21 | if __name__ == "__main__": 22 | unittest.main() 23 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/utils/common_func.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import threading 4 | 5 | 6 | # Singleton 7 | class Singleton(object): 8 | _instance_lock = threading.Lock() 9 | 10 | def __init__(self, cls): 11 | self._cls = cls 12 | self._instance = {} 13 | 14 | def __call__(self, *args, **kwargs): 15 | if self._cls not in self._instance: 16 | with Singleton._instance_lock: 17 | if self._cls not in self._instance: 18 | self._instance[self._cls] = self._cls(*args, **kwargs) 19 | return self._instance[self._cls] 20 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/utils/config_control.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import os 4 | import rapidjson 5 | import configparser 6 | from rl_framework.common.utils.common_func import Singleton 7 | 8 | 9 | @Singleton 10 | class ConfigControl(object): 11 | def __init__(self) -> None: 12 | self.config_file = None 13 | 14 | def parse_configue(self): 15 | if not self.config_file: 16 | return 17 | 18 | config = configparser.ConfigParser() 19 | config.read(self.config_file) 20 | 21 | # main conf 22 | self.run_mode = config.getint("main", "run_mode") 23 | self.log_dir = config.get("main", "log_dir") 24 | 25 | # aisrv conf 26 | self.max_tcp_count = config.getint("aisrv", "max_tcp_count") 27 | self.ip_address = config.get("aisrv", "ip_address") 28 | self.server_port = config.get("aisrv", "server_port") 29 | 30 | # actor conf 31 | 32 | # learner conf 33 | 34 | def set_configue_file(self, config_file): 35 | self.config_file = config_file 36 | 37 | 38 | CONFIG = ConfigControl() 39 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/utils/config_control_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import unittest 4 | from rl_framework.common.utils.config_control import CONFIG 5 | 6 | 7 | class ConfigControlTest(unittest.TestCase): 8 | def setUp(self) -> None: 9 | pass 10 | 11 | def test_load_configure(self): 12 | configue_file = "/data/projects/rl_framework/conf/configue.json" 13 | 14 | CONFIG.set_configue_file(configue_file) 15 | CONFIG.parse_configue() 16 | 17 | print(CONFIG.ip_address) 18 | 19 | 20 | if __name__ == "__main__": 21 | unittest.main() 22 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/utils/trace_malloc.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import tracemalloc 4 | import os 5 | import linecache 6 | import time 7 | 8 | 9 | class MallocTrace(object): 10 | def __init__(self, out) -> None: 11 | self.out = open(out, "w") 12 | self.prev_snapshot = None 13 | self.curr_snapshot = None 14 | 15 | def __del__(self) -> None: 16 | self.out.close() 17 | 18 | @staticmethod 19 | def start(nframe=1): 20 | tracemalloc.start(nframe) 21 | 22 | @staticmethod 23 | def stop(): 24 | tracemalloc.stop() 25 | 26 | def take_snapshot(self): 27 | self.prev_snapshot = self.curr_snapshot 28 | self.curr_snapshot = tracemalloc.take_snapshot() 29 | self.curr_snapshot = self.curr_snapshot.filter_traces( 30 | ( 31 | tracemalloc.Filter(False, ""), 32 | tracemalloc.Filter(False, ""), 33 | tracemalloc.Filter(False, tracemalloc.__file__), 34 | tracemalloc.Filter(False, linecache.__file__), 35 | ) 36 | ) 37 | 38 | def display_snapshot(self, key_type="lineno", limit=10): 39 | top_stats = self.curr_snapshot.statistics(key_type)[:limit] 40 | 41 | self.out.write( 42 | "##%s: Top %d Stats\n" 43 | % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), limit) 44 | ) 45 | for index, stat in enumerate(top_stats, 1): 46 | frame = stat.traceback[0] 47 | filename = os.sep.join(frame.filename.split(os.sep)) 48 | self.out.write( 49 | "#%s: %s:%s: %.1f KiB\n" 50 | % (index, filename, frame.lineno, stat.size / 1024) 51 | ) 52 | for frame in stat.traceback: 53 | line = linecache.getline(frame.filename, frame.lineno).strip() 54 | if line: 55 | self.out.write(" %s\n" % line) 56 | 57 | other = top_stats[limit:] 58 | if other: 59 | size = sum(stat.size for stat in other) 60 | self.out.write("%s other: %.1f KiB\n" % (len(other), size / 1024)) 61 | total = sum(stat.size for stat in top_stats) 62 | self.out.write("Total allocated size: %.1f KiB\n" % (total / 1024)) 63 | self.out.write("\n") 64 | self.out.flush() 65 | 66 | def compare_snapshot(self, limit=10): 67 | if not self.prev_snapshot or not self.curr_snapshot: 68 | return 69 | 70 | top_stats = self.curr_snapshot.compare_to( 71 | self.prev_snapshot, "lineno", cumulative=True 72 | )[:limit] 73 | self.out.write( 74 | "##%s: Top %d Stats\n" 75 | % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), limit) 76 | ) 77 | for index, stat in enumerate(top_stats, 1): 78 | frame = stat.traceback[0] 79 | filename = os.sep.join(frame.filename.split(os.sep)) 80 | self.out.write( 81 | "#%s: %s:%s: %.1f KiB (+%.1f Kib)\n" 82 | % ( 83 | index, 84 | filename, 85 | frame.lineno, 86 | stat.size / 1024, 87 | stat.size_diff / 1024, 88 | ) 89 | ) 90 | for frame in stat.traceback: 91 | line = linecache.getline(frame.filename, frame.lineno).strip() 92 | if line: 93 | self.out.write(" %s\n" % line) 94 | 95 | other = top_stats[limit:] 96 | if other: 97 | size = sum(stat.size for stat in other) 98 | self.out.write("%s other: %.1f KiB\n" % (len(other), size / 1024)) 99 | total = sum(stat.size for stat in top_stats) 100 | self.out.write("Total allocated size: %.1f KiB\n" % (total / 1024)) 101 | self.out.write("\n") 102 | self.out.flush() 103 | -------------------------------------------------------------------------------- /rl_framework/common/rl_framework/common/utils/trace_malloc_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import unittest 4 | import tempfile 5 | 6 | from rl_framework.common.utils.trace_malloc import MallocTrace 7 | 8 | 9 | class MallocTraceTest(unittest.TestCase): 10 | def setUp(self) -> None: 11 | self.out = tempfile.mktemp() 12 | self.malloc_tracer = MallocTrace(out=self.out) 13 | 14 | def test_malloc_trace(self): 15 | self.malloc_tracer.start() 16 | 17 | self.malloc_tracer.take_snapshot() 18 | self.malloc_tracer.display_snapshot() 19 | 20 | # run your code 21 | __ = [dict(zip("abc", (1, 2, 3, 4, 5))) for _ in range(10000)] 22 | 23 | self.malloc_tracer.take_snapshot() 24 | self.malloc_tracer.display_snapshot() 25 | 26 | self.malloc_tracer.stop() 27 | 28 | with open(self.out) as out: 29 | read_lines = out.readlines() 30 | # print(read_lines) 31 | self.assertTrue(len(read_lines) > 0) 32 | 33 | 34 | if __name__ == "__main__": 35 | unittest.main() 36 | -------------------------------------------------------------------------------- /rl_framework/common/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | PROJECT_NAME = "rl-framework-common" 5 | _VERSION = "1.0.0" 6 | 7 | setup( 8 | name=PROJECT_NAME, 9 | version=_VERSION, 10 | packages=find_packages(), 11 | description="rl-framework-common", 12 | long_description="rl-framework-common", 13 | license="Apache 2.0", 14 | keywords="rl-framework game ai training framework - common", 15 | install_requires=["pyzmq", "loguru"], 16 | ) 17 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/algorithms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/algorithms/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/algorithms/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/algorithms/base/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/algorithms/base/algorithm.py: -------------------------------------------------------------------------------- 1 | # @package rl_framework.learner.algorithms.base 2 | # The module defines some base classes, which includes model and algorithm 3 | from abc import abstractmethod 4 | 5 | 6 | # The class is the base class which defines the implementation of the algorithm 7 | class Algorithm(object): 8 | # The constructor. 9 | # @param self The object pointer. 10 | # @param model The object defines the structure of the network; 11 | # Its type is rl_framework.learner.algorithms.base.Model 12 | def __init__(self, model): 13 | # defines the structure of the network 14 | self.model = model 15 | 16 | # Provides the interface for constructing tensorflow graph, 17 | # build_graph() is a virtual function. 18 | # @param self The object pointer. 19 | # @param datas Training data of network,Type needs to be tensorflow.op 20 | # @param update Represents the current number of steps that have been iterated 21 | # @return loss loss of local iteration 22 | # @return other_info The object is used to store the information the user wants to print, 23 | # In the sample code, we store the accuracy. 24 | @abstractmethod 25 | def build_graph(self, datas, update): 26 | raise NotImplementedError("build_graph: not implemented") 27 | # return loss, other_info 28 | 29 | # Provides an interface to the optimizer, get_optimizer() is a virtual function. 30 | # @param self The object pointer. 31 | # @return Returns the optimizer defined in tensorflow. 32 | @abstractmethod 33 | def get_optimizer(self): 34 | raise NotImplementedError("get optimizer: not implemented") 35 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/algorithms/base/model.py: -------------------------------------------------------------------------------- 1 | # @package rl_framework.learner.algorithms.base 2 | # The module defines some base classes, which includes model and algorithm 3 | from abc import abstractmethod 4 | 5 | # The class is the base class which defines the structure of the network 6 | 7 | 8 | class Model(object): 9 | # The constructor. 10 | # @param self The object pointer. 11 | def __init__(self, *args): 12 | pass 13 | 14 | # This function provides the interface defined by the network structure, 15 | # inference() is a virtual function. 16 | # @param feature Input data of network prediction 17 | @abstractmethod 18 | def inference(self, feature): 19 | raise NotImplementedError("build model: not implemented!") 20 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/dataset/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/dataset/lock_free_queue/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/dataset/lock_free_queue/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/dataset/network_dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | 4 | class NetworkDatasetBase(object): 5 | def __init__(self, config_manager, adapter): 6 | raise NotImplementedError("build model: not implemented!") 7 | 8 | @abstractmethod 9 | def get_next_batch(self): 10 | raise NotImplementedError("build model: not implemented!") 11 | 12 | def get_recv_speed(self): 13 | return None 14 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/dataset/network_dataset/common/__init__.py: -------------------------------------------------------------------------------- 1 | def get_mem_pool_key(server_ports, base_path): 2 | mem_pool_keys = [] 3 | for port in server_ports: 4 | filepath = ( 5 | base_path 6 | + "/mem_pool_server_p{}/etc/mem_pool_server_p{}_mcd0.conf".format( 7 | port, port 8 | ) 9 | ) 10 | with open(filepath, "r") as fin: 11 | for line in fin.readlines(): 12 | line = line.strip("\n") 13 | pos = line.find("mem_pool_key") 14 | if pos > 0: 15 | mem_pool_key = line.split("= ")[1] 16 | mem_pool_keys.append(int(mem_pool_key)) 17 | return mem_pool_keys 18 | 19 | 20 | def get_mem_pool_param(param, server_ports, base_path): 21 | if len(server_ports) > 0: 22 | port = server_ports[0] 23 | else: 24 | port = 35200 25 | filepath = ( 26 | base_path 27 | + "/mem_pool_server_p{}/etc/mem_pool_server_p{}_mcd0.conf".format(port, port) 28 | ) 29 | print("mempool conf {}".format(filepath)) 30 | with open(filepath, "r") as fin: 31 | for line in fin.readlines(): 32 | line = line.strip("\n") 33 | pos = line.find(param) 34 | if pos > 0: 35 | mem_pool_param = line.split("= ")[1] 36 | return int(mem_pool_param) 37 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/dataset/network_dataset/pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/dataset/network_dataset/pytorch/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/dataset/network_dataset/pytorch/network_dataset_random.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rl_framework.learner.dataset.network_dataset import NetworkDatasetBase 3 | 4 | 5 | class NetworkDataset(NetworkDatasetBase): 6 | def __init__(self, config_manager, adapter): 7 | self.use_fp16 = config_manager.use_fp16 8 | self.batch_size = config_manager.batch_size 9 | self.adapter = adapter 10 | self.data_shapes = self.adapter.get_data_shapes() 11 | self.sample_length = self.data_shapes[0][0] 12 | self.sample = np.random.random([self.batch_size, self.sample_length]) 13 | if self.use_fp16: 14 | self.sample = self.sample.astype(np.float16) 15 | else: 16 | self.sample = self.sample.astype(np.float32) 17 | 18 | def get_next_batch(self): 19 | return self.sample 20 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/dataset/network_dataset/pytorch/network_dataset_zmq.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import multiprocessing 3 | import os 4 | 5 | import lz4.block 6 | 7 | from rl_framework.learner.dataset.network_dataset.common.batch_process import ( 8 | BatchProcess, 9 | ) 10 | from rl_framework.learner.dataset.network_dataset.common.sample_manager import MemBuffer 11 | from rl_framework.mem_pool.zmq_mem_pool_server.zmq_mem_pool import ZMQMEMPOOL 12 | from rl_framework.common.logging import logger as LOG 13 | 14 | 15 | class NetworkDataset(object): 16 | def __init__(self, config_manager, adapter, port=35200): 17 | self.max_sample = config_manager.max_sample 18 | self.batch_size = config_manager.batch_size 19 | self.adapter = adapter 20 | self.data_shapes = self.adapter.get_data_shapes() 21 | self.use_fp16 = config_manager.use_fp16 22 | self.membuffer = MemBuffer( 23 | config_manager.max_sample, self.data_shapes[0][0], self.use_fp16 24 | ) 25 | 26 | self.batch_process = BatchProcess( 27 | self.batch_size, 28 | self.data_shapes[0][0], 29 | config_manager.batch_process, 30 | self.use_fp16, 31 | ) 32 | 33 | self.port = port 34 | self.zmq_mem_pool = ZMQMEMPOOL(self.port) 35 | self.init_dataset = False 36 | 37 | for i in range(config_manager.sample_process): 38 | pid = multiprocessing.Process(target=self.enqueue_data, args=(i,)) 39 | pid.daemon = True 40 | pid.start() 41 | 42 | self.batch_process.process(self.membuffer.get_sample) 43 | self.last_batch_index = -1 44 | 45 | def get_next_batch(self): 46 | batch_index, sample_buf = self.batch_process.get_batch_data() 47 | if self.last_batch_index >= 0: 48 | self.batch_process.put_free_data(self.last_batch_index) 49 | self.last_batch_index = batch_index 50 | 51 | return sample_buf 52 | 53 | def enqueue_data(self, process_index): 54 | LOG.info( 55 | "sample process port:{} process_index:{} pid:{}".format( 56 | self.port, process_index, os.getpid() 57 | ) 58 | ) 59 | while True: 60 | for sample in self.zmq_mem_pool.pull_samples(): 61 | decompress_data = lz4.block.decompress( 62 | sample, uncompressed_size=3 * 1024 * 1024 63 | ) 64 | sample_list = self.adapter.deserialization(decompress_data) 65 | for sample in sample_list: 66 | self.membuffer.append(sample) 67 | 68 | def get_recv_speed(self): 69 | return self.membuffer.get_speed() 70 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/dataset/network_dataset/tensorflow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/dataset/network_dataset/tensorflow/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/dataset/network_dataset/tensorflow/network_dataset_random.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from rl_framework.learner.dataset.network_dataset import NetworkDatasetBase 4 | 5 | 6 | class NetworkDataset(NetworkDatasetBase): 7 | def __init__(self, config_manager, adapter): 8 | self.use_fp16 = config_manager.use_fp16 9 | self.batch_size = config_manager.batch_size 10 | self.data_shapes = adapter.get_data_shapes() 11 | self.sample_length = self.data_shapes[0][0] 12 | self.sample = np.random.random([self.batch_size, self.sample_length]) 13 | if self.use_fp16: 14 | self.sample = self.sample.astype(np.float16) 15 | self.key_types = [tf.float16] 16 | else: 17 | self.sample = self.sample.astype(np.float32) 18 | self.key_types = [tf.float32] 19 | 20 | def get_next_batch(self): 21 | return tf.py_func(self.next_batch, [], self.key_types) 22 | 23 | def next_batch(self): 24 | return [self.sample] 25 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/dataset/sample_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/dataset/sample_generation/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/dataset/sample_generation/offline_rlinfo_adapter.py: -------------------------------------------------------------------------------- 1 | # @package rl_framework.learner.dataset.sample_generation 2 | # The module defines base class for sample parsing 3 | from abc import abstractmethod 4 | import numpy as np 5 | 6 | 7 | # The class is the base class for sample parsing 8 | class OfflineRlInfoAdapterBase(object): 9 | # The constructor. 10 | # @param self The object pointer. 11 | def __init__(self): 12 | pass 13 | 14 | # Sample production interface, 15 | # deserialization() is a virtual function. 16 | # @param self The object pointer. 17 | # @param receive_data This object needs to contain the data 18 | # to generate batch_size samples 19 | # @return sampes 20 | @abstractmethod 21 | def deserialization(self, receive_data): 22 | raise NotImplementedError("deserialization: not implemented") 23 | 24 | # Sample length acquisition interface, 25 | # get_data_shapes() is a virtual function. 26 | # @return [[sample_len]] 27 | @abstractmethod 28 | def get_data_shapes(): 29 | raise NotImplementedError("deserialization: not implemented") 30 | 31 | 32 | class OfflineRlInfoAdapter(OfflineRlInfoAdapterBase): 33 | def __init__(self, data_shapes): 34 | super().__init__() 35 | self.data_shapes = data_shapes 36 | 37 | def deserialization(self, receive_data): 38 | return self.deserialization_bytes(receive_data) 39 | 40 | def deserialization_bytes(self, receive_data): 41 | data = [] 42 | data.append(np.frombuffer(receive_data, "f4")) 43 | return data 44 | 45 | def get_data_shapes(self): 46 | return [[sum(map(sum, self.data_shapes))]] 47 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/example/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/example/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/framework/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/framework/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/framework/common/__init__.py: -------------------------------------------------------------------------------- 1 | def singleton(cls, *args, **kw): 2 | instances = {} 3 | 4 | def wrapper(*args, **kw): 5 | if cls not in instances: 6 | instances[cls] = cls(*args, **kw) 7 | return instances[cls] 8 | 9 | return wrapper 10 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/framework/pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/framework/pytorch/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/framework/pytorch/apd_datasets.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class Datasets(object): 5 | def __init__(self, dataset): 6 | self.dataset = dataset 7 | 8 | def next(self): 9 | return torch.from_numpy(self.dataset.get_next_batch()) 10 | 11 | def get_recv_speed(self): 12 | return self.dataset.get_recv_speed() 13 | 14 | 15 | class DataPrefetcher: 16 | def __init__(self, dataset, device, use_fp16) -> None: 17 | self.dataset = dataset 18 | self.device = device 19 | self.use_fp16 = use_fp16 20 | self.next_data = None 21 | self.stream = torch.cuda.Stream(device=self.device) 22 | self.preload() 23 | 24 | def preload(self): 25 | self.next_data = self.dataset.get_next_batch() 26 | with torch.cuda.stream(self.stream): 27 | self.next_data = torch.from_numpy(self.next_data).to(device=self.device, non_blocking=True) 28 | if self.use_fp16: 29 | self.next_data = torch.from_numpy(self.next_data).to(dtype=torch.float32, non_blocking=True) 30 | 31 | def next(self): 32 | torch.cuda.current_stream(self.device).wait_stream(self.stream) 33 | next_data = self.next_data 34 | self.preload() 35 | return next_data 36 | 37 | def get_recv_speed(self): 38 | return self.dataset.get_recv_speed() 39 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/framework/pytorch/node_info_ddp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.distributed as dist 4 | 5 | 6 | class NodeInfo(object): 7 | def __init__(self, rank=None, rank_size=None, local_rank=None, local_size=None) -> None: 8 | if rank is not None and rank_size is not None and local_rank is not None and local_size is not None: 9 | self.rank = rank 10 | self.rank_size = rank_size 11 | self.local_rank = local_rank 12 | self.local_size = local_size 13 | # mpirun 14 | if "OMPI_COMM_WORLD_LOCAL_RANK" in os.environ: 15 | self.rank = int(os.environ["OMPI_COMM_WORLD_RANK"]) 16 | self.rank_size = int(os.environ["OMPI_COMM_WORLD_SIZE"]) 17 | self.local_rank = int(os.environ["OMPI_COMM_WORLD_LOCAL_RANK"]) 18 | self.local_size = int(os.environ["OMPI_COMM_WORLD_LOCAL_SIZE"]) 19 | master_uri = "tcp://{ip}:{port}".format(ip=os.environ["MASTER_ADDR"], port=os.environ["MASTER_PORT"]) 20 | if self.rank_size > 1: 21 | dist.init_process_group( 22 | backend="nccl" if torch.cuda.is_available() else "mpi", 23 | init_method=master_uri, 24 | rank=self.rank, 25 | world_size=self.rank_size 26 | ) 27 | # torchrun 28 | elif "LOCAL_RANK" in os.environ: 29 | dist.init_process_group(backend="nccl" if torch.cuda.is_available() else "gloo") 30 | self.rank = int(dist.get_rank()) 31 | self.rank_size = int(dist.get_world_size()) 32 | self.local_rank = int(os.environ["LOCAL_RANK"]) 33 | self.local_size = int(os.environ["LOCAL_WORLD_SIZE"]) 34 | else: 35 | self.rank = 0 36 | self.rank_size = 1 37 | self.local_rank = 0 38 | self.local_size = 1 39 | 40 | self.is_chief_rank = self.rank == 0 41 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/framework/pytorch/node_info_hvd.py: -------------------------------------------------------------------------------- 1 | # definition of node and gradient fusion. 2 | try: 3 | import horovod.torch as hvd 4 | hvd.init() 5 | has_hvd = True 6 | except Exception: # pylint: disable=broad-except 7 | has_hvd = False 8 | 9 | 10 | # The class describes some information about the node 11 | class NodeInfo(object): 12 | # The constructor. Initialization of node details 13 | def __init__(self): 14 | self.has_hvd = has_hvd 15 | if has_hvd: 16 | # The sequence number of the training process in all nodes 17 | self.rank = hvd.rank() 18 | # Total number of training nodes 19 | self.size = hvd.size() 20 | # The sequence number of the training process in local node 21 | self.local_rank = hvd.local_rank() 22 | # Total number of local training nodes 23 | self.local_size = hvd.local_size() 24 | else: 25 | self.rank = 0 26 | self.size = 1 27 | self.local_rank = 0 28 | self.local_size = 1 29 | -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/framework/pytorch/step_context.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class StepContext: 5 | def __init__(self, rank, local_rank, gpu_nums, ip, batch_size): 6 | self.rank = rank 7 | self.local_rank = local_rank 8 | self.ip = ip 9 | self.batch_size = batch_size 10 | self.gpu_nums = gpu_nums 11 | self.is_chief_rank = self.rank == 0 12 | self.total_loss = None 13 | self.info_list = None 14 | self.step = None 15 | self.sample_recv_speed = None 16 | self.sample_consume_speed = None 17 | self.train_has_inf_nan = False 18 | self.grad_has_inf_nan = False 19 | 20 | def set_forward_info(self, total_loss, info_list): 21 | # deep copy gpu tensor to cpu in case that method optm.step() changes parameters 22 | self.total_loss = total_loss.to("cpu", copy=True) if isinstance(total_loss, torch.Tensor) else total_loss 23 | self.info_list = [(item.to("cpu", copy=True) if isinstance(item, torch.Tensor) else item) for item in info_list] 24 | 25 | def check_has_inf_nan(self, total_loss, params): 26 | self.train_has_inf_nan = False 27 | self.grad_has_inf_nan = False 28 | if torch.isnan(total_loss).any() or torch.isinf(total_loss).any(): 29 | self.train_has_inf_nan = True 30 | for param in params: 31 | if torch.isnan(param.grad).any() or torch.isinf(param.grad).any(): 32 | self.grad_has_inf_nan = True 33 | break 34 | 35 | def set_other_info(self, step, sample_recv_speed, sample_consume_speed): 36 | self.step = step 37 | self.sample_recv_speed = sample_recv_speed 38 | self.sample_consume_speed = sample_consume_speed 39 | 40 | def decode(self): 41 | return { 42 | "step": self.step, 43 | "batch_size": self.batch_size, 44 | "gpu_nums": self.gpu_nums, 45 | "sample_recv_speed": self.sample_recv_speed, 46 | "sample_consume_speed": self.sample_consume_speed, 47 | "total_loss": self.total_loss, 48 | "info_list": self.info_list, 49 | "train_has_inf_nan": self.train_has_inf_nan, 50 | "grad_has_inf_nan": self.grad_has_inf_nan, 51 | } -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/framework/tensorflow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/learner/rl_framework/learner/framework/tensorflow/__init__.py -------------------------------------------------------------------------------- /rl_framework/learner/rl_framework/learner/framework/tensorflow/apd_datasets.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class Datasets(object): 5 | def __init__(self, dataset): 6 | self.dataset = dataset 7 | 8 | def next_batch(self): 9 | with tf.name_scope("batch_input"): 10 | return self.dataset.get_next_batch() 11 | 12 | def get_recv_speed(self): 13 | return self.dataset.get_recv_speed() 14 | -------------------------------------------------------------------------------- /rl_framework/learner/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | PROJECT_NAME = "rl-framework-learner" 5 | _VERSION = "1.0.0" 6 | 7 | require_list = [] 8 | 9 | setup( 10 | name=PROJECT_NAME, 11 | version=_VERSION, 12 | packages=find_packages(), 13 | description="rl-framework-learner", 14 | long_description="rl-framework-learner", 15 | license="Apache 2.0", 16 | keywords="rl-framework game ai training framework - learner", 17 | install_requires=require_list, 18 | ) 19 | -------------------------------------------------------------------------------- /rl_framework/mem_pool/rl_framework/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/mem_pool/rl_framework/__init__.py -------------------------------------------------------------------------------- /rl_framework/mem_pool/rl_framework/mem_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .mem_pool_api.mem_pool_apis import MemPoolAPIs 2 | from .mem_pool_api.mem_pool_protocol import SamplingStrategy 3 | -------------------------------------------------------------------------------- /rl_framework/mem_pool/rl_framework/mem_pool/mem_pool_api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/mem_pool/rl_framework/mem_pool/mem_pool_api/__init__.py -------------------------------------------------------------------------------- /rl_framework/mem_pool/rl_framework/mem_pool/mem_pool_api/mem_pool_apis.py: -------------------------------------------------------------------------------- 1 | # @package mem_pool 2 | # Provides a mem pool api class to push samples to mempool and pull sample from mempool 3 | 4 | from rl_framework.common.lib_socket.zmq_socket import ZmqSocket 5 | from rl_framework.common.lib_socket.tcp_socket import TcpSocket 6 | from rl_framework.mem_pool.mem_pool_api.mem_pool_protocol import MemPoolProtocol 7 | 8 | 9 | class MemPoolAPIs(object): 10 | # The constructor. 11 | # @param self The object pointer. 12 | # @param ip mempool server ip 13 | # @param port mempool server port 14 | # @param socket_type mempool server type, 15 | # "zmq": mempool is a python version, use zeromq protocol 16 | # "mcp++": mempool is a mcp++ version, use tcp protocol 17 | def __init__(self, ip, port, socket_type="zmq"): 18 | if socket_type == "zmq": 19 | ip_port = "tcp://%s:%s" % (ip, port) 20 | self._client = ZmqSocket(ip_port, "client") 21 | elif socket_type == "mcp++": 22 | self._client = TcpSocket(ip, port) 23 | else: 24 | raise NotImplementedError 25 | 26 | self.protocol = MemPoolProtocol() 27 | 28 | # Pull sample Interface: randomly pull a sample from mempool 29 | # @param self The object pointer. 30 | # @param strategy sampling strategy type:int. 31 | # @return seq sequence number 32 | # @return sample 33 | def pull_sample(self, strategy): 34 | request = self.protocol.format_get_request(strategy=strategy) 35 | response = self._request(request) 36 | _, seq, _, sample = self.protocol.parse_get_response(response) 37 | return seq, sample 38 | 39 | # Push samples Interface: 40 | # compress each sample by lz4 and send to mempool 41 | # if more than max_sample_num, split to packages, one package include max_sample_num samples 42 | # @param self The object pointer. 43 | # @param samples samples type:list, sample type:str or bytes 44 | # @param priorities priorities type:list, priority type:float 45 | # @param max_sample_num max_sample_num type:int, default 128 46 | # @return ret_array, success or fail 47 | def push_samples(self, samples, priorities=None, max_sample_num=128): 48 | format_samples = self.protocol.format_batch_samples_array( 49 | samples, priorities, max_sample_num 50 | ) 51 | ret_array = [] 52 | for format_sample in format_samples: 53 | ret = self._request(format_sample) 54 | ret_array.append(ret) 55 | return ret_array 56 | 57 | def clean_samples(self): 58 | request = self.protocol.format_clean_request() 59 | response = self._request(request) 60 | return response 61 | 62 | def _request(self, data): 63 | return self._client.syn_send_recv(data) 64 | -------------------------------------------------------------------------------- /rl_framework/mem_pool/rl_framework/mem_pool/zmq_mem_pool_server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/mem_pool/rl_framework/mem_pool/zmq_mem_pool_server/__init__.py -------------------------------------------------------------------------------- /rl_framework/mem_pool/rl_framework/mem_pool/zmq_mem_pool_server/zmq_mem_pool.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import struct 3 | import sys 4 | import time 5 | 6 | import zmq 7 | 8 | from rl_framework.mem_pool.mem_pool_api.mem_pool_protocol import CmdType 9 | from rl_framework.common.logging import logger as LOG 10 | 11 | 12 | class ZMQMEMPOOL(object): 13 | def __init__(self, port, max_message=2000): 14 | self.port = port 15 | self.data_queue = multiprocessing.Queue(max_message) 16 | self.recv_pid = multiprocessing.Process( 17 | target=self.recv_data, args=(self.data_queue,) 18 | ) 19 | self.recv_pid.daemon = True 20 | self.recv_pid.start() 21 | 22 | def recv_data(self, queue): 23 | context = zmq.Context() 24 | recv_socket = context.socket(zmq.REP) 25 | addr = "tcp://*:{}".format(self.port) 26 | recv_socket.bind(addr) 27 | print_start_time = time.time() 28 | put_error_num = 0 29 | while True: 30 | if time.time() - print_start_time > 30: 31 | print_start_time = time.time() 32 | if put_error_num != 0: 33 | LOG.info("queue put error: {}/min".format(put_error_num * 2)) 34 | put_error_num = 0 35 | 36 | data = recv_socket.recv() 37 | # data = recv_socket.recv(copy=False) 38 | recv_socket.send(b"success") 39 | try: 40 | has_deal = 8 41 | cmd_type = struct.unpack("I", data[has_deal : has_deal + 4])[0] 42 | if cmd_type == int(CmdType.KMemSetBatchRequest.value): 43 | queue.put(self.generate_samples(data), block=False) 44 | elif cmd_type == int(CmdType.KMemCleanRequest.value): 45 | queue.clear() 46 | else: 47 | raise NotImplementedError 48 | except Exception: # pylint: disable=broad-except 49 | msg = sys.exc_info()[0] 50 | print(msg) 51 | put_error_num += 1 52 | 53 | def generate_samples(self, data): 54 | sample_list = [] 55 | has_deal = 12 56 | sample_num = struct.unpack("I", data[has_deal : has_deal + 4])[0] 57 | has_deal += 4 58 | for _ in range(sample_num): 59 | data_len = struct.unpack("I", data[has_deal : has_deal + 4])[0] 60 | has_deal += 4 61 | priority = struct.unpack("f", data[has_deal : has_deal + 4])[0] 62 | has_deal += 4 63 | sample_list.append(data[has_deal : has_deal + data_len]) 64 | has_deal += data_len 65 | return sample_list 66 | 67 | def pull_samples(self): 68 | return self.data_queue.get() 69 | -------------------------------------------------------------------------------- /rl_framework/mem_pool/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | PROJECT_NAME = "rl-framework-mem-pool" 5 | _VERSION = "1.0.0" 6 | 7 | setup( 8 | name=PROJECT_NAME, 9 | version=_VERSION, 10 | packages=find_packages(), 11 | description="rl-framework-mem-pool", 12 | long_description="rl-framework-mem-pool", 13 | license="Apache 2.0", 14 | keywords="rl-framework game ai training framework - mem_pool", 15 | install_requires=["lz4"], 16 | ) 17 | -------------------------------------------------------------------------------- /rl_framework/mem_pool/test/test_mem_pool_apis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from rl_framework.mem_pool.mem_pool_api.mem_pool_apis import MemPoolAPIs 4 | from rl_framework.mem_pool.mem_pool_api.mem_pool_protocol import SamplingStrategy 5 | from rl_framework.common.lib_socket.utils import get_host_ip 6 | 7 | 8 | def test_push_samples(): 9 | local_ip = get_host_ip() 10 | api = MemPoolAPIs(local_ip, 35201, "mcp++") 11 | array = [] 12 | priorities = [] 13 | for i in range(1, 10): 14 | array.append(bytes("hello world%s" % (i), encoding="utf8")) 15 | priorities.append(float(i)) 16 | ret = api.push_samples(array, priorities, 3) 17 | print("push samples: ret", ret) 18 | 19 | 20 | def test_pull_sample(): 21 | local_ip = get_host_ip() 22 | api = MemPoolAPIs(local_ip, 35201, "mcp++") 23 | for _ in range(5): 24 | seq, sample = api.pull_sample(SamplingStrategy.PriorityGet.value) 25 | print("get sample: seq %s sample %s" % (seq, sample)) 26 | 27 | 28 | test_push_samples() 29 | print() 30 | test_pull_sample() 31 | -------------------------------------------------------------------------------- /rl_framework/mem_pool/test/test_mem_pool_apis_short.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from rl_framework.mem_pool import MemPoolAPIs 4 | from rl_framework.mem_pool import SamplingStrategy 5 | from rl_framework.common import get_host_ip 6 | 7 | 8 | def test_push_samples(): 9 | local_ip = get_host_ip() 10 | api = MemPoolAPIs(local_ip, 35201, "mcp") 11 | array = [] 12 | priorities = [] 13 | for i in range(1, 5): 14 | array.append(bytes("hello world%s" % (i), encoding="utf8")) 15 | priorities.append(float(i)) 16 | ret = api.push_samples(array, priorities) 17 | print("push samples: ret", ret[8:]) 18 | 19 | 20 | def test_pull_sample(): 21 | local_ip = get_host_ip() 22 | api = MemPoolAPIs(local_ip, 35201, "mcp") 23 | for _ in range(5): 24 | seq, sample = api.pull_sample(SamplingStrategy.PriorityGet.value) 25 | print("get sample: seq %s sample %s" % (seq, sample)) 26 | 27 | 28 | test_push_samples() 29 | print() 30 | test_pull_sample() 31 | -------------------------------------------------------------------------------- /rl_framework/model_pool/pkg/model_pool_pkg/bin/modelpool: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/model_pool/pkg/model_pool_pkg/bin/modelpool -------------------------------------------------------------------------------- /rl_framework/model_pool/pkg/model_pool_pkg/bin/modelpool_proxy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/model_pool/pkg/model_pool_pkg/bin/modelpool_proxy -------------------------------------------------------------------------------- /rl_framework/model_pool/pkg/model_pool_pkg/bin/mv.sh: -------------------------------------------------------------------------------- 1 | sourcePath=$2 #原始模型路径,是文件路径 2 | destPath=$1 #批处理后的存储目录,是目录 3 | 4 | tar -xf $sourcePath -C $destPath/ 5 | #mv $sourcePath $destPath/model 6 | #cd $destPath && tar -xf model && rm -rf model 7 | 8 | ## checkpoint* 9 | if [ `ls $destPath/checkpoints* | wc -l` -ne 0 ];then 10 | cd $destPath && mv checkpoints* checkpoint 11 | fi 12 | 13 | ## footbool* 14 | if [ `ls $destPath/epoch_*_complete | wc -l` -ne 0 ];then 15 | cd $destPath && mv epoch_*_complete checkpoint 16 | fi 17 | 18 | ## save_model 19 | if [ `ls $destPath/saved_model | wc -l` -ne 0 ];then 20 | cd $destPath && mv saved_model/1/model.savedmodel/ savedmodel && rm -rf saved_model 21 | fi 22 | -------------------------------------------------------------------------------- /rl_framework/model_pool/pkg/model_pool_pkg/config/trpc_go.yaml.cpu: -------------------------------------------------------------------------------- 1 | global: #全局配置 2 | namespace: Development #环境类型,分正式production和非正式development两种类型 3 | env_name: test #环境名称,非正式环境下多环境的名称 4 | 5 | server: #服务端配置 6 | app: modelpool #业务的应用名 7 | server: main #进程服务名 8 | admin: 9 | ip: 127.0.0.1 10 | port: 10015 11 | read_timeout: 3000 12 | write_timeout: 60000 13 | bin_path: /usr/local/trpc/bin/ #二进制可执行文件和框架配置文件所在路径 14 | conf_path: /usr/local/trpc/conf/ #业务配置文件所在路径 15 | data_path: /usr/local/trpc/data/ #业务数据文件所在路径 16 | filter: #针对所有service处理函数前后的拦截器列表 17 | - simpledebuglog 18 | - recovery #拦截框架创建的业务处理协程panic 19 | service: #业务服务提供的service,可以有多个 20 | - name: trpc.modelpool.main.ModelPool #service的路由名称 21 | ip: 0.0.0.0 #服务监听ip地址 可使用占位符 ${ip},ip和nic二选一,优先ip 22 | #nic: eth0 23 | port: 10013 #服务监听端口 可使用占位符 ${port} 24 | network: tcp #网络监听类型 tcp udp 25 | protocol: trpc #应用层协议 trpc http 26 | timeout: 1000 #请求最长处理时间 单位 毫秒 #业务服务提供的service,可以有多个 27 | - name: trpc.modelpool.main.ModelPoolHTTP #service的路由名称 28 | ip: 0.0.0.0 #服务监听ip地址 可使用占位符 ${ip},ip和nic二选一,优先ip 29 | #nic: eth0 30 | port: 10014 #服务监听端口 可使用占位符 ${port} 31 | network: tcp #网络监听类型 tcp udp 32 | protocol: http #应用层协议 trpc http 33 | timeout: 1000 #请求最长处理时间 单位 毫秒 34 | 35 | modelpool: 36 | role: slave 37 | fileSavePath: /mnt/ramdisk/files 38 | cluster: __MODELPOOL_CLUSTER_HERE__ 39 | ip: __MODELPOOL_IP_HERE__ 40 | name: __MODELPOOL_NAME_HERE__ 41 | maxStorage: 400MB #单位可选 MB GB TB 42 | statisticsBufferSize: 0 43 | 44 | client: #客户端调用的后端配置 45 | timeout: 1000 #针对所有后端的请求最长处理时间 46 | namespace: Development #针对所有后端的环境 47 | filter: #针对所有后端调用函数前后的拦截器列表 48 | service: #针对单个后端的配置 49 | - name: trpc.modelpool.main.ModelPool #后端服务的service name 50 | namespace: Development #后端服务的环境 51 | network: tcp #后端服务的网络类型 tcp udp 配置优先 52 | protocol: trpc #应用层协议 trpc http 53 | target: dns://__TARGET_TRPC_ADDRESS_HERE__ #请求服务地址 54 | timeout: 1000 #请求最长处理时间 55 | # - name: trpc.modelpool.main.ModelPoolHTTP #后端服务的service name 56 | # namespace: Development #后端服务的环境 57 | # network: tcp #后端服务的网络类型 tcp udp 配置优先 58 | # protocol: http #应用层协议 trpc http 59 | # target: dns://__TARGET_HTTP_ADDRESS_HERE__ #请求服务地址 60 | # timeout: 1000 #请求最长处理时间 61 | 62 | plugins: #插件配置 63 | log: #日志配置 64 | default: #默认日志的配置,可支持多输出 65 | - writer: console #控制台标准输出 默认 66 | level: debug #标准输出日志的级别 67 | - writer: file #本地文件日志 68 | level: info #本地文件滚动日志的级别 69 | writer_config: 70 | filename: ../log/modelpool.log #本地文件滚动日志存放的路径 71 | max_size: 10 #本地文件滚动日志的大小 单位 MB 72 | max_backups: 10 #最大日志文件数 73 | max_age: 7 #最大日志保留天数 74 | compress: false #日志文件是否压缩 75 | 76 | -------------------------------------------------------------------------------- /rl_framework/model_pool/pkg/model_pool_pkg/config/trpc_go.yaml.gpu: -------------------------------------------------------------------------------- 1 | global: #全局配置 2 | namespace: Development #环境类型,分正式production和非正式development两种类型 3 | env_name: test #环境名称,非正式环境下多环境的名称 4 | 5 | server: #服务端配置 6 | app: modelpool #业务的应用名 7 | server: main #进程服务名 8 | admin: 9 | ip: 127.0.0.1 10 | port: 10015 11 | read_timeout: 3000 12 | write_timeout: 60000 13 | bin_path: /usr/local/trpc/bin/ #二进制可执行文件和框架配置文件所在路径 14 | conf_path: /usr/local/trpc/conf/ #业务配置文件所在路径 15 | data_path: /usr/local/trpc/data/ #业务数据文件所在路径 16 | filter: #针对所有service处理函数前后的拦截器列表 17 | - simpledebuglog 18 | - recovery #拦截框架创建的业务处理协程panic 19 | service: #业务服务提供的service,可以有多个 20 | - name: trpc.modelpool.main.ModelPool #service的路由名称 21 | ip: 0.0.0.0 #服务监听ip地址 可使用占位符 ${ip},ip和nic二选一,优先ip 22 | #nic: eth0 23 | port: 10013 #服务监听端口 可使用占位符 ${port} 24 | network: tcp #网络监听类型 tcp udp 25 | protocol: trpc #应用层协议 trpc http 26 | timeout: 1000 #请求最长处理时间 单位 毫秒 #业务服务提供的service,可以有多个 27 | - name: trpc.modelpool.main.ModelPoolHTTP #service的路由名称 28 | ip: 0.0.0.0 #服务监听ip地址 可使用占位符 ${ip},ip和nic二选一,优先ip 29 | #nic: eth0 30 | port: 10014 #服务监听端口 可使用占位符 ${port} 31 | network: tcp #网络监听类型 tcp udp 32 | protocol: http #应用层协议 trpc http 33 | timeout: 1000 #请求最长处理时间 单位 毫秒 34 | 35 | modelpool: 36 | role: master 37 | fileSavePath: files 38 | cluster: 39 | ip: __MODELPOOL_IP_HERE__ 40 | name: 41 | maxStorage: 16GB #单位可选 MB GB TB 42 | statisticsBufferSize: 500 43 | 44 | client: #客户端调用的后端配置 45 | timeout: 1000 #针对所有后端的请求最长处理时间 46 | namespace: Development #针对所有后端的环境 47 | filter: #针对所有后端调用函数前后的拦截器列表 48 | service: #针对单个后端的配置 49 | - name: trpc.modelpool.main.ModelPool #后端服务的service name 50 | namespace: Development #后端服务的环境 51 | network: tcp #后端服务的网络类型 tcp udp 配置优先 52 | protocol: trpc #应用层协议 trpc http 53 | target: dns://127.0.0.1 #请求服务地址 54 | timeout: 1000 #请求最长处理时间 55 | # - name: trpc.modelpool.main.ModelPoolHTTP #后端服务的service name 56 | # namespace: Development #后端服务的环境 57 | # network: tcp #后端服务的网络类型 tcp udp 配置优先 58 | # protocol: http #应用层协议 trpc http 59 | # target: dns://__TARGET_HTTP_ADDRESS_HERE__ #请求服务地址 60 | # timeout: 1000 #请求最长处理时间 61 | 62 | plugins: #插件配置 63 | log: #日志配置 64 | default: #默认日志的配置,可支持多输出 65 | - writer: console #控制台标准输出 默认 66 | level: debug #标准输出日志的级别 67 | - writer: file #本地文件日志 68 | level: info #本地文件滚动日志的级别 69 | writer_config: 70 | filename: ../log/modelpool.log #本地文件滚动日志存放的路径 71 | max_size: 10 #本地文件滚动日志的大小 单位 MB 72 | max_backups: 10 #最大日志文件数 73 | max_age: 7 #最大日志保留天数 74 | compress: false #日志文件是否压缩 75 | monitor: #监控日志 76 | - writer: file 77 | level: info 78 | writer_config: 79 | filename: ../log/app.log 80 | max_size: 2 #文件滚动日志的大小 单位 MB 81 | max_backups: 5 #最大日志文件数 82 | max_age: 7 #最大日志保留天数 83 | compress: false 84 | formatter_config: 85 | time_fmt: "2006-01-02T15:04:05.999Z+0700" 86 | 87 | -------------------------------------------------------------------------------- /rl_framework/model_pool/pkg/model_pool_pkg/op/set_cpu_config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -lt 1 ];then 4 | echo "usage $0 master_ip" 5 | exit -1 6 | fi 7 | 8 | #MODELPOOL_ADDR=$1 9 | MODELPOOL_ADDR=$1":10013" 10 | 11 | ip=`hostname -I | awk '{print $1;}'` 12 | TVMEC_DOCKER_ID=`hostname` 13 | if [ -z "$CLUSTER_CONTEXT" ];then 14 | CLUSTER_CONTEXT='default' 15 | fi 16 | 17 | cd ../config && rm trpc_go.yaml 18 | cd ../config && cp trpc_go.yaml.cpu trpc_go.yaml 19 | 20 | sed -i "s/__TARGET_TRPC_ADDRESS_HERE__/${MODELPOOL_ADDR}/g" ../config/trpc_go.yaml 21 | sed -i "s/__MODELPOOL_CLUSTER_HERE__/${CLUSTER_CONTEXT}/g" ../config/trpc_go.yaml 22 | sed -i "s/__MODELPOOL_IP_HERE__/${ip}/g" ../config/trpc_go.yaml 23 | sed -i "s/__MODELPOOL_NAME_HERE__/${TVMEC_DOCKER_ID}/g" ../config/trpc_go.yaml 24 | -------------------------------------------------------------------------------- /rl_framework/model_pool/pkg/model_pool_pkg/op/set_gpu_config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ip=`hostname -I | awk '{print $1;}'` 4 | 5 | cd ../config && rm trpc_go.yaml 6 | cd ../config && cp trpc_go.yaml.gpu trpc_go.yaml 7 | 8 | sed -i "s/__MODELPOOL_IP_HERE__/${ip}/g" ../config/trpc_go.yaml 9 | -------------------------------------------------------------------------------- /rl_framework/model_pool/pkg/model_pool_pkg/op/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -lt 1 ];then 4 | echo "usage $0 role master_ip log_dir" 5 | exit -1 6 | fi 7 | 8 | role=$1 9 | master_ip=$2 10 | 11 | LOG_DIR=${3-"/aiarena/logs/"}/model_pool 12 | mkdir -p $LOG_DIR 13 | 14 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 15 | cd $SCRIPT_DIR 16 | 17 | MODEL_POOL_FILE_SAVE_PATH=${MODEL_POOL_FILE_SAVE_PATH:-"/mnt/ramdisk/model"} 18 | mkdir -p ${MODEL_POOL_FILE_SAVE_PATH} 19 | chmod +x ../bin/modelpool ../bin/modelpool_proxy 20 | 21 | ln -sfnT $LOG_DIR $SCRIPT_DIR/../log 22 | 23 | if [ $role = "cpu" ];then 24 | bash set_cpu_config.sh $master_ip 25 | cd ../bin && nohup ./modelpool -conf=../config/trpc_go.yaml > /dev/null 2>&1 & 26 | cd ../bin && nohup ./modelpool_proxy -fileSavePath=${MODEL_POOL_FILE_SAVE_PATH} > ${LOG_DIR}/modelpool_proxy.log 2>&1 & 27 | fi 28 | 29 | if [ $role = "gpu" ];then 30 | bash set_gpu_config.sh 31 | cd ../bin && nohup ./modelpool -conf=../config/trpc_go.yaml > /dev/null 2>&1 & 32 | cd ../bin && nohup ./modelpool_proxy -fileSavePath=${MODEL_POOL_FILE_SAVE_PATH} > ${LOG_DIR}/modelpool_proxy.log 2>&1 & 33 | fi 34 | 35 | -------------------------------------------------------------------------------- /rl_framework/model_pool/pkg/model_pool_pkg/op/stop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 3 | cd $SCRIPT_DIR 4 | 5 | rm -rf ../bin/files 6 | rm -rf ../bin/model 7 | 8 | ps -ef | grep "modelpool" | awk '{print $2}' | xargs kill -9 9 | 10 | process1=`ps -ef | grep "modelpool" | grep -v grep | wc -l` 11 | 12 | if [ $process1 -eq 0 ];then 13 | exit 0 14 | else 15 | exit -1 16 | fi 17 | -------------------------------------------------------------------------------- /rl_framework/model_pool/rl_framework/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/model_pool/rl_framework/__init__.py -------------------------------------------------------------------------------- /rl_framework/model_pool/rl_framework/model_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from rl_framework.model_pool.model_pool_api.model_pool_apis import ModelPoolAPIs 2 | from rl_framework.model_pool.model_pool_api.modelPoolClient import ModelPoolClient 3 | -------------------------------------------------------------------------------- /rl_framework/model_pool/rl_framework/model_pool/model_pool_api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/model_pool/rl_framework/model_pool/model_pool_api/__init__.py -------------------------------------------------------------------------------- /rl_framework/model_pool/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | PROJECT_NAME = "rl-framework-model-pool" 5 | _VERSION = "1.0.0" 6 | 7 | require_list = ["timeout_decorator"] 8 | 9 | setup( 10 | name=PROJECT_NAME, 11 | version=_VERSION, 12 | packages=find_packages(), 13 | description="rl-framework-model-pool", 14 | long_description="rl-framework-model-pool", 15 | license="Apache 2.0", 16 | keywords="rl-framework game ai training framework - model_pool", 17 | install_requires=require_list, 18 | ) 19 | -------------------------------------------------------------------------------- /rl_framework/monitor/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include rl_framework/monitor/loglib/loglib.conf 2 | -------------------------------------------------------------------------------- /rl_framework/monitor/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | -------------------------------------------------------------------------------- /rl_framework/monitor/rl_framework/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/monitor/rl_framework/__init__.py -------------------------------------------------------------------------------- /rl_framework/monitor/rl_framework/monitor/__init__.py: -------------------------------------------------------------------------------- 1 | from .loglib.influxdb_handler import InfluxdbMonitorHandler 2 | 3 | __all__ = ["InfluxdbMonitorHandler"] 4 | -------------------------------------------------------------------------------- /rl_framework/monitor/rl_framework/monitor/loglib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/monitor/rl_framework/monitor/loglib/__init__.py -------------------------------------------------------------------------------- /rl_framework/monitor/rl_framework/monitor/loglib/loglib.conf: -------------------------------------------------------------------------------- 1 | [influxdb_handler] 2 | queue_size = -1 3 | port = 8086 4 | database = monitordb 5 | -------------------------------------------------------------------------------- /rl_framework/monitor/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = rl-framework-monitor 3 | version = 1.0.0 4 | description = rl-framework monitor 5 | long_description = file: README.md 6 | long_description_content_type = text/markdown 7 | 8 | [options] 9 | packages = find: 10 | python_requires = >=3.6 11 | install_requires = 12 | influxdb 13 | include_package_data = True 14 | -------------------------------------------------------------------------------- /rl_framework/predictor/README.md: -------------------------------------------------------------------------------- 1 | Predictor模块由Actor调用,负责模型加载和前项推理。 -------------------------------------------------------------------------------- /rl_framework/predictor/rl_framework/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/predictor/rl_framework/__init__.py -------------------------------------------------------------------------------- /rl_framework/predictor/rl_framework/predictor/__init__.py: -------------------------------------------------------------------------------- 1 | # from .inference_server.inference_server import InferenceServer 2 | # from .inference_server.inference_server_apis import InferenceServerAPIs 3 | 4 | from .predictor.infer_input_output import InferInput, InferOutput 5 | from .predictor.base_predictor import BasePredictor 6 | 7 | # from .predictor.local_predictor import LocalCkptPredictor, LocalSavedModelPredictor 8 | # from .predictor.remote_predictor import RemotePredictor 9 | -------------------------------------------------------------------------------- /rl_framework/predictor/rl_framework/predictor/predictor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tencent-ailab/hok_env/08518fa447fa86f16ffc471771da27536ebda1dd/rl_framework/predictor/rl_framework/predictor/predictor/__init__.py -------------------------------------------------------------------------------- /rl_framework/predictor/rl_framework/predictor/predictor/base_predictor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | class BasePredictor(object): 5 | """The BasePredictor class is an abstract base class.""" 6 | 7 | def __init__(self): 8 | pass 9 | 10 | def load_model(self, model_name): 11 | raise NotImplementedError 12 | 13 | def inference(self, input_list, output_list): 14 | raise NotImplementedError 15 | -------------------------------------------------------------------------------- /rl_framework/predictor/rl_framework/predictor/predictor/infer_input_output.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | 4 | 5 | class InferData(object): 6 | """An object of InferData class is used to describe 7 | input or output tensor for an inference request. 8 | 9 | Parameters 10 | ---------- 11 | name : str 12 | The name of input/output whose data will be described by this object 13 | dims : list 14 | The shape of the associated input/output. 15 | data_type : str 16 | The datatype of the associated input/output. 17 | data: numpy array 18 | The data of the associated input/output. 19 | """ 20 | 21 | def __init__(self, name, dims, data_type=None, data=None): 22 | self.name = name 23 | self.dims = dims 24 | self.data_type = data_type 25 | self.data = None 26 | if data is not None: 27 | self.set_data(data) 28 | 29 | def get(self): 30 | """Get all attributes of the tensor. 31 | 32 | Returns 33 | ------ 34 | str 35 | The tensor name. 36 | list 37 | The tensor shape. 38 | str 39 | The tensor datatype. 40 | numpy array 41 | The tensor data in numpy array format. 42 | """ 43 | return self.name, self.dims, self.data_type, self.data 44 | 45 | def get_name(self): 46 | """Get the name of the tensor. 47 | 48 | Returns 49 | ------ 50 | str 51 | The tensor name. 52 | """ 53 | return self.name 54 | 55 | def set_data(self, data): 56 | """Set the tensor data from the specified numpy array for 57 | input/output associated with this object. 58 | 59 | Parameters 60 | ---------- 61 | data : numpy array 62 | The tensor data in numpy array format 63 | 64 | Raises 65 | ------ 66 | Exception 67 | If failed to reshape data with dims. 68 | """ 69 | try: 70 | if self.data_type is not None: 71 | self.data = data.reshape(self.dims).astype(self.data_type) 72 | else: 73 | self.data = data.reshape(self.dims) 74 | except Exception: 75 | logging.error( 76 | "can not convert data shape from {} to {}".format( 77 | str(data.shape), str(self.dims) 78 | ) 79 | ) 80 | raise 81 | 82 | def get_data(self): 83 | """Get the tensor data in numpy array format. 84 | 85 | Returns 86 | ------ 87 | numpy array 88 | The tensor data in numpy array format 89 | """ 90 | return self.data 91 | 92 | 93 | class InferInput(InferData): 94 | """An object of InferInput class is used to describe 95 | input tensor for an inference request. 96 | 97 | Parameters 98 | ---------- 99 | name : str 100 | The name of input whose data will be described by this object 101 | dims : list 102 | The shape of the associated input. 103 | data_type : str 104 | The datatype of the associated input. 105 | data: numpy array 106 | The data of the associated input. 107 | """ 108 | 109 | def __init__(self, name, dims, data_type=None, data=None): 110 | super(InferInput, self).__init__(name, dims, data_type, data) 111 | 112 | 113 | class InferOutput(InferData): 114 | """An object of InferOutput class is used to describe 115 | output tensor for an inference request. 116 | 117 | Parameters 118 | ---------- 119 | name : str 120 | The name of output whose data will be described by this object 121 | dims : list 122 | The shape of the associated output. 123 | data_type : str 124 | The datatype of the associated output. 125 | data: numpy array 126 | The data of the associated output. 127 | """ 128 | 129 | def __init__(self, name, dims, data_type=None, data=None): 130 | super(InferOutput, self).__init__(name, dims, data_type, data) 131 | -------------------------------------------------------------------------------- /rl_framework/predictor/rl_framework/predictor/predictor/local_torch_predictor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import os 4 | from rl_framework.common.logging import logger as LOG 5 | 6 | 7 | class LocalTorchPredictor(object): 8 | def __init__(self, net): 9 | super().__init__() 10 | self.device = torch.device("cpu") 11 | self.net = net.to(self.device) 12 | 13 | def load_model(self, model_path): 14 | model_filename = os.path.join(model_path, "model.pth") 15 | LOG.info("load model: {}", model_filename) 16 | checkpoint = torch.load(model_filename, map_location=self.device) 17 | self.net.load_state_dict(checkpoint["network_state_dict"]) 18 | 19 | def inference(self, data_list): 20 | torch_inputs = [ 21 | torch.from_numpy(nparr).to(torch.float32) for nparr in data_list 22 | ] 23 | format_inputs = self.net.format_data(torch_inputs, inference=True) 24 | self.net.eval() 25 | with torch.no_grad(): 26 | rst_list = self.net(format_inputs, inference=True) 27 | return rst_list 28 | -------------------------------------------------------------------------------- /rl_framework/predictor/rl_framework/predictor/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from rl_framework.predictor import InferInput, InferOutput 3 | 4 | 5 | def cvt_tensor_to_infer_input(input_tensors): 6 | """ 7 | Convert tensor list to infer input list. 8 | 9 | Parameters 10 | ---------- 11 | input_tensors : list of tf.Tensor 12 | A list of input tensors. 13 | 14 | Returns 15 | ---------- 16 | list of InferInput 17 | A list of input tensors. 18 | """ 19 | infer_input_list = [] 20 | for inp in input_tensors: 21 | name = inp.name 22 | shape = [-1 if x is None else x for x in inp.shape.as_list()] 23 | dtype = inp.dtype.as_numpy_dtype 24 | infer_input_list.append(InferInput(name, shape, dtype)) 25 | 26 | return infer_input_list 27 | 28 | 29 | def cvt_tensor_to_infer_output(output_tensors): 30 | """ 31 | Convert tensor list to infer output list. 32 | 33 | Parameters 34 | ---------- 35 | output_tensors : list of tf.Tensor 36 | A list of output tensors. 37 | 38 | Returns 39 | ---------- 40 | list of InferOutput 41 | A list of output tensors. 42 | """ 43 | infer_output_list = [] 44 | for out in output_tensors: 45 | name = out.name 46 | shape = [-1 if x is None else x for x in out.shape.as_list()] 47 | dtype = out.dtype.as_numpy_dtype 48 | infer_output_list.append(InferOutput(name, shape, dtype)) 49 | 50 | return infer_output_list 51 | -------------------------------------------------------------------------------- /rl_framework/predictor/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools import find_packages 3 | 4 | PROJECT_NAME = "rl-framework-predictor" 5 | _VERSION = "1.0.0" 6 | 7 | require_list = [] 8 | cpu_list = [] 9 | gpu_list = [] 10 | 11 | setup( 12 | name=PROJECT_NAME, 13 | version=_VERSION, 14 | packages=find_packages(), 15 | description="rl-framework-predictor", 16 | long_description="rl-framework-predictor", 17 | license="Apache 2.0", 18 | keywords="rl-framework game ai training framework - predictor", 19 | install_requires=require_list, 20 | extras_require={ 21 | "cpu": cpu_list, 22 | "gpu": gpu_list, 23 | }, 24 | ) 25 | -------------------------------------------------------------------------------- /rl_framework/send_model/model_no_syn.py: -------------------------------------------------------------------------------- 1 | from model_syn_base import ModelSynBase 2 | 3 | 4 | class ModelSyn(ModelSynBase): 5 | def __init__(self, *args, **kwargs): 6 | return 7 | 8 | def syn_model(self, *args, **kwargs): 9 | return 10 | -------------------------------------------------------------------------------- /rl_framework/send_model/model_syn_base.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class ModelSynBase(object): 4 | def __init__(self, address): 5 | raise NotImplementedError("__init__: not implemented!") 6 | 7 | def syn_model(self, model_path, model_key = None): 8 | raise NotImplementedError("syn_model: not implemented!") 9 | -------------------------------------------------------------------------------- /rl_framework/send_model/model_syn_model_pool.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import sys 4 | from model_syn_base import ModelSynBase 5 | from rl_framework.model_pool import ModelPoolAPIs 6 | from rl_framework.common.logging import logger as LOG 7 | 8 | class ModelSynModelPool(ModelSynBase): 9 | def __init__(self, address): 10 | self.model_pool_apis = ModelPoolAPIs(address.split(',')) 11 | self.model_pool_apis.check_server_set_up() 12 | self.step=0 13 | 14 | def syn_model(self, model_path, model_key = None): 15 | model, local_md5 = self._read_model(model_path) 16 | if model is None: 17 | return False 18 | if model_key is None: 19 | key = model_path.split("/")[-1] 20 | # key="model_{}".format(self.step) 21 | else: 22 | key = model_key 23 | self.model_pool_apis.push_model(model=model, hyperparam=None, key=key,\ 24 | md5sum=local_md5, save_file_name=key) 25 | self.step += 1 26 | LOG.info("success push model ", key) 27 | return True 28 | 29 | def _read_model(self, model_path): 30 | if not os.path.exists(model_path): 31 | model = None 32 | local_md5 = None 33 | return model, local_md5 34 | else: 35 | with open(model_path, "rb") as fin: 36 | model = fin.read() 37 | #local_md5 = hashlib.md5(model).hexdigest() 38 | local_md5 = None 39 | return model, local_md5 40 | --------------------------------------------------------------------------------