├── __init__.py ├── experiment_func ├── __init__.py ├── __pycache__ │ ├── _DGP_Ohio.cpython-36.pyc │ ├── _DGP_Ohio.cpython-37.pyc │ ├── _DGP_TIGER.cpython-36.pyc │ └── _utility_RL.cpython-36.pyc ├── _DGP_TIGER.py ├── .ipynb_checkpoints │ ├── _DGP_TIGER-checkpoint.py │ ├── _DGP_Ohio-checkpoint.py │ └── _utility_RL-checkpoint.py ├── _DGP_Ohio.py └── _utility_RL.py ├── experiment_script ├── __init__.py ├── Ohio_simu_values.txt ├── Tiger_simu.py ├── .ipynb_checkpoints │ ├── Tiger_simu-checkpoint.py │ ├── Ohio_simu_seq_lags-checkpoint.py │ ├── Ohio_simu_values-checkpoint.py │ └── Ohio_simu_testing-checkpoint.py ├── Ohio_simu_seq_lags.py ├── Ohio_simu_values.py └── Ohio_simu_testing.py ├── .ipynb_checkpoints ├── __init__-checkpoint.py ├── LICENSE-checkpoint ├── README-checkpoint.md └── Tiger_simu-checkpoint.py ├── diag.png ├── __pycache__ ├── _QRF.cpython-36.pyc ├── _DGP_Ohio.cpython-36.pyc ├── __init__.cpython-37.pyc ├── _uti_basic.cpython-36.pyc ├── _utility.cpython-36.pyc ├── _utility_RL.cpython-36.pyc ├── _core_test_fun.cpython-36.pyc └── _Funcs_Real_Ohio.cpython-36.pyc ├── test_func ├── __pycache__ │ ├── _QRF.cpython-34.pyc │ ├── _QRF.cpython-36.pyc │ ├── _QRF.cpython-37.pyc │ ├── _DGP_Ohio.cpython-36.pyc │ ├── _DGP_Ohio.cpython-37.pyc │ ├── _DGP_TIGER.cpython-36.pyc │ ├── _DGP_TIGER.cpython-37.pyc │ ├── __init__.cpython-36.pyc │ ├── __init__.cpython-37.pyc │ ├── _uti_basic.cpython-34.pyc │ ├── _uti_basic.cpython-36.pyc │ ├── _uti_basic.cpython-37.pyc │ ├── _utility.cpython-36.pyc │ ├── _utility.cpython-37.pyc │ ├── _utility_RL.cpython-36.pyc │ ├── _utility_RL.cpython-37.pyc │ ├── _core_test_fun.cpython-34.pyc │ ├── _core_test_fun.cpython-36.pyc │ ├── _core_test_fun.cpython-37.pyc │ ├── core_test_fun.cpython-36.pyc │ └── _Funcs_Real_Ohio.cpython-36.pyc ├── __init__.py ├── .ipynb_checkpoints │ ├── __init__-checkpoint.py │ ├── _uti_basic-checkpoint.py │ ├── _DGP_TIGER-checkpoint.py │ ├── _Funcs_Real_Ohio-checkpoint.py │ ├── _utility-checkpoint.py │ ├── _DGP_Ohio-checkpoint.py │ ├── _utility_RL-checkpoint.py │ └── _QRF-checkpoint.py ├── _uti_basic.py ├── _utility.py ├── _QRF.py └── _core_test_fun.py ├── LICENSE ├── TestMDP.yml └── README.md /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiment_func/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiment_script/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiment_script/Ohio_simu_values.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/__init__-checkpoint.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /diag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/diag.png -------------------------------------------------------------------------------- /__pycache__/_QRF.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_QRF.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/_DGP_Ohio.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_DGP_Ohio.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/_uti_basic.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_uti_basic.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/_utility.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_utility.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/_utility_RL.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_utility_RL.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/_core_test_fun.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_core_test_fun.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_QRF.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_QRF.cpython-34.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_QRF.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_QRF.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_QRF.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_QRF.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/_Funcs_Real_Ohio.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_Funcs_Real_Ohio.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_DGP_Ohio.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_DGP_Ohio.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_DGP_Ohio.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_DGP_Ohio.cpython-37.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_DGP_TIGER.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_DGP_TIGER.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_DGP_TIGER.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_DGP_TIGER.cpython-37.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_uti_basic.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_uti_basic.cpython-34.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_uti_basic.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_uti_basic.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_uti_basic.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_uti_basic.cpython-37.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_utility.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_utility.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_utility.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_utility.cpython-37.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_utility_RL.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_utility_RL.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_utility_RL.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_utility_RL.cpython-37.pyc -------------------------------------------------------------------------------- /experiment_func/__pycache__/_DGP_Ohio.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/experiment_func/__pycache__/_DGP_Ohio.cpython-36.pyc -------------------------------------------------------------------------------- /experiment_func/__pycache__/_DGP_Ohio.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/experiment_func/__pycache__/_DGP_Ohio.cpython-37.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_core_test_fun.cpython-34.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_core_test_fun.cpython-34.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_core_test_fun.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_core_test_fun.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_core_test_fun.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_core_test_fun.cpython-37.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/core_test_fun.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/core_test_fun.cpython-36.pyc -------------------------------------------------------------------------------- /experiment_func/__pycache__/_DGP_TIGER.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/experiment_func/__pycache__/_DGP_TIGER.cpython-36.pyc -------------------------------------------------------------------------------- /experiment_func/__pycache__/_utility_RL.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/experiment_func/__pycache__/_utility_RL.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__pycache__/_Funcs_Real_Ohio.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_Funcs_Real_Ohio.cpython-36.pyc -------------------------------------------------------------------------------- /test_func/__init__.py: -------------------------------------------------------------------------------- 1 | from ._utility import * 2 | from ._uti_basic import * 3 | from ._core_test_fun import * 4 | from ._QRF import * 5 | # from ._utility_RL import * 6 | 7 | # from ._DGP_Ohio import * 8 | # from ._DGP_TIGER import * 9 | # # from ._Funcs_Real_Ohio import * 10 | 11 | __all__ = ["_core_test_fun", "_QRF", "_uti_basic", "_utility"] 12 | -------------------------------------------------------------------------------- /test_func/.ipynb_checkpoints/__init__-checkpoint.py: -------------------------------------------------------------------------------- 1 | from ._utility import * 2 | from ._uti_basic import * 3 | from ._core_test_fun import * 4 | from ._QRF import * 5 | # from ._utility_RL import * 6 | 7 | # from ._DGP_Ohio import * 8 | # from ._DGP_TIGER import * 9 | # # from ._Funcs_Real_Ohio import * 10 | 11 | __all__ = ["_core_test_fun", "_QRF", "_uti_basic", "_utility"] 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 RunzheStat 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/LICENSE-checkpoint: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 RunzheStat 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /TestMDP.yml: -------------------------------------------------------------------------------- 1 | name: TestMDP 2 | channels: 3 | - conda-forge 4 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main 5 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ 6 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ 7 | - defaults 8 | dependencies: 9 | - _r-mutex=1.0.0=anacondar_1 10 | - bwidget=1.9.11=1 11 | - bzip2=1.0.8=h1de35cc_0 12 | - ca-certificates=2020.1.1=0 13 | - cairo=1.14.12=hc4e6be7_4 14 | - cctools=927.0.2=h5ba7a2e_4 15 | - certifi=2020.4.5.1=py36_0 16 | - clang=10.0.0=default_hf57f61e_0 17 | - clang_osx-64=10.0.0=h05bbb7f_1 18 | - clangxx=10.0.0=default_hf57f61e_0 19 | - clangxx_osx-64=10.0.0=h05bbb7f_1 20 | - compiler-rt=10.0.0=h47ead80_0 21 | - compiler-rt_osx-64=10.0.0=hbcc88fd_0 22 | - curl=7.67.0=ha441bb4_0 23 | - dill=0.3.1.1=py36_0 24 | - fontconfig=2.13.0=h5d5b041_1 25 | - freetype=2.9.1=hb4e5f40_0 26 | - fribidi=1.0.5=h1de35cc_0 27 | - gettext=0.19.8.1=h15daf44_3 28 | - gfortran_osx-64=4.8.5=h22b1bf0_8 29 | - glib=2.63.1=hd977a24_0 30 | - graphite2=1.3.13=h2098e52_0 31 | - gsl=2.4=h1de35cc_4 32 | - harfbuzz=1.8.8=hb8d4a28_0 33 | - icu=58.2=h0a44026_3 34 | - intel-openmp=2020.1=216 35 | - joblib=0.15.1=py_0 36 | - jpeg=9b=he5867d9_2 37 | - krb5=1.16.4=hddcf347_0 38 | - ld64=450.3=h3c32e8a_4 39 | - libblas=3.8.0=11_openblas 40 | - libcblas=3.8.0=11_openblas 41 | - libcurl=7.67.0=h051b688_0 42 | - libcxx=10.0.0=1 43 | - libedit=3.1.20181209=hb402a30_0 44 | - libffi=3.2.1=h0a44026_6 45 | - libgfortran=3.0.1=h93005f0_2 46 | - libiconv=1.16=h1de35cc_0 47 | - liblapack=3.8.0=11_openblas 48 | - libllvm10=10.0.0=h21ff451_0 49 | - libopenblas=0.3.6=hdc02c5d_2 50 | - libpng=1.6.37=ha441bb4_0 51 | - libssh2=1.9.0=ha12b0ac_1 52 | - libtiff=4.1.0=hcb84e12_1 53 | - libxml2=2.9.9=hf6e021a_1 54 | - llvm-openmp=10.0.0=h28b9765_0 55 | - llvm-tools=10.0.0=h21ff451_0 56 | - lz4-c=1.9.2=h0a44026_0 57 | - make=4.2.1=h3efe00b_1 58 | - mkl=2020.1=216 59 | - mkl_random=1.1.1=py36h0130604_0 60 | - multiprocess=0.70.9=py36h37b9a7d_1 61 | - ncurses=6.2=h0a44026_1 62 | - numpy=1.18.5=py36hdc5ca10_0 63 | - openssl=1.1.1g=h1de35cc_0 64 | - pandas=1.0.4=py36hcc1bba6_0 65 | - pango=1.42.4=h7e27002_1 66 | - patsy=0.5.1=py36_0 67 | - pcre=8.43=h0a44026_0 68 | - pip=20.0.2=py36_3 69 | - pixman=0.38.0=h1de35cc_0 70 | - python=3.6.8=haf84260_0 71 | - python-dateutil=2.8.1=py_0 72 | - python_abi=3.6=1_cp36m 73 | - pytz=2020.1=py_0 74 | - r-base=3.6.1=hcb44179_1 75 | - r-sys=3.2=r36h46e59ec_0 76 | - readline=7.0=h1de35cc_5 77 | - scikit-learn=0.23.1=py36hef903b7_0 78 | - scipy=1.2.1=py36hbd7caa9_1 79 | - setuptools=47.1.1=py36_0 80 | - six=1.15.0=py_0 81 | - sqlite=3.31.1=h5c1f38d_1 82 | - statsmodels=0.11.1=py36h37b9a7d_1 83 | - tapi=1000.10.8=ha1b3eb9_0 84 | - threadpoolctl=2.1.0=pyh5ca1d4c_0 85 | - tk=8.6.8=ha441bb4_0 86 | - tktable=2.10=h1de35cc_0 87 | - wheel=0.34.2=py36_0 88 | - xz=5.2.5=h1de35cc_0 89 | - zlib=1.2.11=h1de35cc_3 90 | - zstd=1.4.4=h1990bb4_3 91 | prefix: /Users/mac/opt/anaconda3/envs/TestMDP 92 | 93 | -------------------------------------------------------------------------------- /test_func/_uti_basic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ############################################################################# 4 | import time 5 | now = time.time 6 | import smtplib, ssl 7 | from multiprocessing import Pool 8 | import multiprocessing 9 | 10 | n_cores = multiprocessing.cpu_count() 11 | ############################################################################# 12 | dash = "--------------------------------------" 13 | DASH = "\n" + "--------------------------------------" + "\n" 14 | Dash = "\n" + dash 15 | dasH = dash + "\n" 16 | ############################################################################# 17 | #%% utility funs 18 | 19 | def fun(f, q_in, q_out): 20 | while True: 21 | i, x = q_in.get() 22 | if i is None: 23 | break 24 | q_out.put((i, f(x))) 25 | 26 | def parmap(f, X, nprocs=multiprocessing.cpu_count()-2): 27 | q_in = multiprocessing.Queue(1) 28 | q_out = multiprocessing.Queue() 29 | 30 | proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out)) 31 | for _ in range(nprocs)] 32 | for p in proc: 33 | p.daemon = True 34 | p.start() 35 | 36 | sent = [q_in.put((i, x)) for i, x in enumerate(X)] 37 | [q_in.put((None, None)) for _ in range(nprocs)] 38 | res = [q_out.get() for _ in range(len(sent))] 39 | 40 | [p.join() for p in proc] 41 | 42 | return [x for i, x in sorted(res)] 43 | 44 | def send_email(message = None, email_address = "13300180059@fudan.edu.cn", title = "Your results are ready!", 45 | receiver_email = "Same"): # py.notify.me@gmail.com 46 | port = 465 # For SSL 47 | # Create a secure SSL context 48 | context = ssl.create_default_context() 49 | sender_email = email_address # "py.notify.me@gmail.com" 50 | if receiver_email == "Same": 51 | receiver_email = email_address 52 | email_content = message 53 | 54 | a = """ 55 | 56 | """ 57 | 58 | message = """\ 59 | Subject: """ + title + a 60 | message += email_content 61 | 62 | with smtplib.SMTP_SSL("mail.fudan.edu.cn", port, context=context) as server: # "smtp.gmail.com" 63 | server.login(email_address,"w19950722") #("py.notify.me@gmail.com", "w19950722") 64 | server.sendmail(sender_email, receiver_email, message) 65 | 66 | ############################################################################# 67 | def rep_seeds(fun,rep_times): 68 | """ 69 | non-parallel-version of pool.map 70 | """ 71 | return list(map(fun, range(rep_times))) 72 | 73 | def rep_seeds_print(fun,rep_times,init_seed): 74 | r = [] 75 | start = now() 76 | for seed in range(rep_times): 77 | r.append(fun(seed + init_seed)) 78 | if seed % 25 == 0: 79 | print(round((seed+1)/rep_times*100,2),"% DONE", round((now() - start)/60,2), "mins" ) 80 | return r 81 | ############################################################################# 82 | 83 | def round_list(thelist,dec): 84 | """ 85 | extend np.round to list 86 | """ 87 | return [round(a,dec) for a in thelist] 88 | 89 | def print_time_cost(seed,total_rep,time): 90 | print(round((seed+1/total_rep)*100,3),"% DONE, takes", round((time)/60,3)," mins \n") 91 | 92 | def is_disc(v, n): 93 | return len(set(v)) <= n 94 | -------------------------------------------------------------------------------- /test_func/.ipynb_checkpoints/_uti_basic-checkpoint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ############################################################################# 4 | import time 5 | now = time.time 6 | import smtplib, ssl 7 | from multiprocessing import Pool 8 | import multiprocessing 9 | 10 | n_cores = multiprocessing.cpu_count() 11 | ############################################################################# 12 | dash = "--------------------------------------" 13 | DASH = "\n" + "--------------------------------------" + "\n" 14 | Dash = "\n" + dash 15 | dasH = dash + "\n" 16 | ############################################################################# 17 | #%% utility funs 18 | 19 | def fun(f, q_in, q_out): 20 | while True: 21 | i, x = q_in.get() 22 | if i is None: 23 | break 24 | q_out.put((i, f(x))) 25 | 26 | def parmap(f, X, nprocs=multiprocessing.cpu_count()-2): 27 | q_in = multiprocessing.Queue(1) 28 | q_out = multiprocessing.Queue() 29 | 30 | proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out)) 31 | for _ in range(nprocs)] 32 | for p in proc: 33 | p.daemon = True 34 | p.start() 35 | 36 | sent = [q_in.put((i, x)) for i, x in enumerate(X)] 37 | [q_in.put((None, None)) for _ in range(nprocs)] 38 | res = [q_out.get() for _ in range(len(sent))] 39 | 40 | [p.join() for p in proc] 41 | 42 | return [x for i, x in sorted(res)] 43 | 44 | def send_email(message = None, email_address = "13300180059@fudan.edu.cn", title = "Your results are ready!", 45 | receiver_email = "Same"): # py.notify.me@gmail.com 46 | port = 465 # For SSL 47 | # Create a secure SSL context 48 | context = ssl.create_default_context() 49 | sender_email = email_address # "py.notify.me@gmail.com" 50 | if receiver_email == "Same": 51 | receiver_email = email_address 52 | email_content = message 53 | 54 | a = """ 55 | 56 | """ 57 | 58 | message = """\ 59 | Subject: """ + title + a 60 | message += email_content 61 | 62 | with smtplib.SMTP_SSL("mail.fudan.edu.cn", port, context=context) as server: # "smtp.gmail.com" 63 | server.login(email_address,"w19950722") #("py.notify.me@gmail.com", "w19950722") 64 | server.sendmail(sender_email, receiver_email, message) 65 | 66 | ############################################################################# 67 | def rep_seeds(fun,rep_times): 68 | """ 69 | non-parallel-version of pool.map 70 | """ 71 | return list(map(fun, range(rep_times))) 72 | 73 | def rep_seeds_print(fun,rep_times,init_seed): 74 | r = [] 75 | start = now() 76 | for seed in range(rep_times): 77 | r.append(fun(seed + init_seed)) 78 | if seed % 25 == 0: 79 | print(round((seed+1)/rep_times*100,2),"% DONE", round((now() - start)/60,2), "mins" ) 80 | return r 81 | ############################################################################# 82 | 83 | def round_list(thelist,dec): 84 | """ 85 | extend np.round to list 86 | """ 87 | return [round(a,dec) for a in thelist] 88 | 89 | def print_time_cost(seed,total_rep,time): 90 | print(round((seed+1/total_rep)*100,3),"% DONE, takes", round((time)/60,3)," mins \n") 91 | 92 | def is_disc(v, n): 93 | return len(set(v)) <= n 94 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/README-checkpoint.md: -------------------------------------------------------------------------------- 1 | 2 | # Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making 3 | 4 | This repository contains the implementation for the paper "Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making" (ICML 2020) in Python. 5 | 6 | ## Summary of the paper 7 | 8 | The Markov assumption (MA) is fundamental to the empirical validity of reinforcement learning. In this paper, we propose a novel Forward-Backward Learning procedure to test MA in sequential decision making. The proposed test does not assume any parametric form on the joint distribution of the observed data and plays an important role for identifying the optimal policy in high-order Markov decision processes and partially observable MDPs. We apply our test to both synthetic datasets and a real data example from mobile health studies to illustrate its usefulness. 9 | 10 | drawing 11 | 12 | 13 | 14 | ## Requirements 15 | Change your working directory to this main folder, run `conda env create --file TestMDP.yml` to create the Conda environment, and then run `conda activate TestMDP` to activate the environment. 16 | 17 | ## File Overview 18 | 2. `/test_func`: main functions for the proposed test 19 | 1. `_core_test_fun.py`: main functions for the proposed test, including Algorithm 1 and 2 in the paper, and their componnets. 20 | 5. `_QRF.py`: the random forests regressor used in our experiments. 21 | 6. `_uti_basic.py` and `_utility.py`: helper functions 22 | 1. `/experiment_script`: scripts for reproducing results. See next section. 23 | 2. `/experiment_func`: supporting functions for the experiments presented in the paper 24 | 2. `_DGP_Ohio.py`: simulate data and evaluate policies for the HMDP synthetic data section. 25 | 3. `_DGP_TIGER.py`: simulate data for the POMDP synthetic data section. 26 | 7. `_utility_RL.py`: RL algorithms used in the experiments, including FQI, FQE and related functions. 27 | 28 | ## How to reproduce results in the paper 29 | Simply run the corresponding scripts: 30 | 31 | 1. Figure 2: `Ohio_simu_testing.py` 32 | 2. Figure 3: `Ohio_simu_values.py` and `Ohio_simu_seq_lags.py` 33 | 3. Figure 4: `Tiger_simu.py` 34 | 35 | ## How to test the Markov property for your own data 36 | 1. run `from _core_test_fun import *` to import required functions 37 | 2. Algorithm 1: decide whether or not your data satisfies J-th order Markov property 38 | 1. make sure your data, the observed trajectories, is a list of [X, A], each for one trajectory. Here, X is a T by dim_state_variable array for observed states, and A is a T by dim_action_variable array for observed actions. 39 | 2. run `test(data = data, J = J)`, and the output is the p-value. More optional parameters can be found in the file. 40 | 3. Algorithm 2: decide whether the system is an MDP (and its order) or the system is most likely to be a POMDP 41 | 1. make sure your data and parameters satisfy the requirement for `test()`. 42 | 2. specify the significance level alpha and order upper bound K. 43 | 2. run `selectOrder(data = data, K = K, alpha = alpha)`. More optional parameters can be found in the file. 44 | 45 | 46 | 47 | ## Citation 48 | 49 | Please cite our paper 50 | [Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making (ICML 2020)](https://arxiv.org/abs/2002.01751) 51 | 52 | ``` 53 | @article{Shi2020DoesTM, 54 | title={Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making}, 55 | author={Chengchun Shi and Runzhe Wan and Rui Song and Wenbin Lu and Ling Leng}, 56 | journal={ArXiv}, 57 | year={2020}, 58 | volume={abs/2002.01751} 59 | } 60 | ``` 61 | 62 | 63 | ## Contributing 64 | 65 | All contributions welcome! All content in this repository is licensed under the MIT license. 66 | 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making 3 | 4 | This repository contains the implementation for the paper "Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making" (ICML 2020) in Python. 5 | 6 | ## Summary of the paper 7 | 8 | The Markov assumption (MA) is fundamental to the empirical validity of reinforcement learning. In this paper, we propose a novel Forward-Backward Learning procedure to test MA in sequential decision making. The proposed test does not assume any parametric form on the joint distribution of the observed data and plays an important role for identifying the optimal policy in high-order Markov decision processes and partially observable MDPs. We apply our test to both synthetic datasets and a real data example from mobile health studies to illustrate its usefulness. 9 | 10 | drawing 11 | 12 | 13 | 14 | ## Requirements 15 | Change your working directory to this main folder, run `conda env create --file TestMDP.yml` to create the Conda environment, and then run `conda activate TestMDP` to activate the environment. 16 | 17 | ## File Overview 18 | 2. `/test_func`: main functions for the proposed test 19 | 1. `_core_test_fun.py`: main functions for the proposed test, including Algorithm 1 and 2 in the paper, and their componnets. 20 | 5. `_QRF.py`: the random forests regressor used in our experiments. 21 | 6. `_uti_basic.py` and `_utility.py`: helper functions 22 | 1. `/experiment_script`: scripts for reproducing results. See next section. 23 | 2. `/experiment_func`: supporting functions for the experiments presented in the paper: 24 | 2. `_DGP_Ohio.py`: simulate data and evaluate policies for the HMDP synthetic data section. 25 | 3. `_DGP_TIGER.py`: simulate data for the POMDP synthetic data section. 26 | 7. `_utility_RL.py`: RL algorithms used in the experiments, including FQI, FQE and related functions. 27 | 28 | ## How to reproduce results in the paper 29 | Simply run the corresponding scripts: 30 | 31 | 1. Figure 2: `Ohio_simu_testing.py` 32 | 2. Figure 3: `Ohio_simu_values.py` and `Ohio_simu_seq_lags.py` 33 | 3. Figure 4: `Tiger_simu.py` 34 | 35 | ## How to test the Markov property for your own data 36 | 1. run `from _core_test_fun import *` to import required functions 37 | 2. Algorithm 1: decide whether or not your data satisfies J-th order Markov property 38 | 1. make sure your data, the observed trajectories, is a list of [X, A], each for one trajectory. Here, X is a T by dim_state_variable array for observed states, and A is a T by dim_action_variable array for observed actions. 39 | 2. run `test(data = data, J = J)`, and the output is the p-value. More optional parameters can be found in the file. 40 | 3. Algorithm 2: decide whether the system is an MDP (and its order) or the system is most likely to be a POMDP 41 | 1. make sure your data and parameters satisfy the requirement for `test()`. 42 | 2. specify the significance level alpha and order upper bound K. 43 | 2. run `selectOrder(data = data, K = K, alpha = alpha)`. More optional parameters can be found in the file. 44 | 45 | 46 | 47 | ## Citation 48 | 49 | Please cite our paper 50 | [Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making (ICML 2020)](http://proceedings.mlr.press/v119/shi20c/shi20c.pdf) 51 | 52 | ``` 53 | @inproceedings{shi2020does, 54 | title={Does the Markov decision process fit the data: testing for the Markov property in sequential decision making}, 55 | author={Shi, Chengchun and Wan, Runzhe and Song, Rui and Lu, Wenbin and Leng, Ling}, 56 | booktitle={International Conference on Machine Learning}, 57 | pages={8807--8817}, 58 | year={2020}, 59 | organization={PMLR} 60 | } 61 | ``` 62 | 63 | 64 | ## Contributing 65 | 66 | All contributions welcome! All content in this repository is licensed under the MIT license. 67 | 68 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/Tiger_simu-checkpoint.py: -------------------------------------------------------------------------------- 1 | from code import * 2 | import os 3 | os.environ["OMP_NUM_THREADS"] = "1" 4 | ##################################### 5 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 6 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 7 | # the difference with standard cross-validation is negligible and will not affect our findings. 8 | ##################################### 9 | def one_time(seed = 1, J = 1, 10 | N = 100, T = 20, T_def = 0, 11 | B = 100, Q = 10, 12 | behav_def = 0, obs_def = "alt", 13 | paras = [100, 3,20], weighted = True, include_reward = False, 14 | method = "QRF"): 15 | """ 16 | include_reward: if include reward to our test 17 | T_def: 18 | 0: length = T with always listen 19 | 1: truncation 20 | T: the final length 21 | """ 22 | ### generate data 23 | fixed_state_comp = (obs_def == "null") 24 | MDPs = simu_tiger(N = N, T = T, seed = seed, 25 | behav_def = behav_def, obs_def = obs_def, 26 | T_def = T_def, include_reward = include_reward, fixed_state_comp = fixed_state_comp) 27 | T += 1 # due to the DGP 28 | ### Preprocess 29 | if fixed_state_comp: 30 | MDPs, fixed_state_comp = MDPs 31 | else: 32 | fixed_state_comp = None 33 | if T_def == 1: 34 | MDPs = truncateMDP(MDPs,T) 35 | if not include_reward: 36 | MDPs = [a[:2] for a in MDPs] 37 | N = len(MDPs) 38 | ### Calculate 39 | if paras == "CV_once": 40 | return lam_est(data = MDPs, J = J, B = B, Q = Q, paras = paras, include_reward = include_reward, 41 | fixed_state_comp = fixed_state_comp, method = method) 42 | return test(data = MDPs, J = J, B = B, Q = Q, paras = paras, #print_time = print_time, 43 | include_reward = include_reward, fixed_state_comp = fixed_state_comp, method = method) 44 | 45 | 46 | def one_setting_one_J(rep_times = 10, J = 1, 47 | N = 100, T = 20, T_def = 0, 48 | B = 100, Q = 10, 49 | behav_def = 0, obs_def = "alt", 50 | include_reward = False, mute = True, 51 | paras = "CV_once", init_seed = 0, parallel = True, method = "QRF"): 52 | if paras == "CV_once": 53 | paras = one_time(seed = 0, J = J, 54 | N = N, T = T, B = B, Q = Q, 55 | behav_def = behav_def, obs_def = obs_def, 56 | paras = "CV_once", 57 | T_def = T_def, include_reward = include_reward, method = method) 58 | print("CV paras:", paras) 59 | 60 | def one_test(seed): 61 | return one_time(seed = seed, J = J, 62 | N = N, T = T, B = B, Q = Q, 63 | behav_def = behav_def, obs_def = obs_def, 64 | T_def = T_def, include_reward = include_reward, 65 | paras = paras, method = method) 66 | p_values = parmap(one_test,range(init_seed, init_seed + rep_times), parallel) 67 | if not mute: 68 | print("rejection rates are:", rej_rate_quick(p_values)) 69 | return p_values 70 | print("Import DONE!") 71 | 72 | print("n_cores = ", n_cores) 73 | 74 | 75 | for obs_def in ["null", "alt"]: 76 | for N in [50, 100, 200]: 77 | for J in range(1, 11): 78 | p_values = one_setting_one_J(rep_times = 500, J = J, 79 | N = N, T = 20, T_def = 0, 80 | B = 100, Q = 10, 81 | behav_def = 0, obs_def = obs_def, 82 | include_reward = False, mute = False, 83 | paras = "CV_once", init_seed = 0, parallel = n_cores, method = "QRF") 84 | rej_rate_quick(p_values) -------------------------------------------------------------------------------- /experiment_script/Tiger_simu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os, sys 4 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 5 | 6 | sys.path.insert(0, package_path + "/test_func") 7 | from _core_test_fun import * 8 | 9 | sys.path.insert(0, package_path + "/experiment_func") 10 | from _DGP_TIGER import * 11 | 12 | os.environ["OMP_NUM_THREADS"] = "1" 13 | ##################################### 14 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 15 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 16 | # the difference with standard cross-validation is negligible and will not affect our findings. 17 | ##################################### 18 | def one_time(seed = 1, J = 1, 19 | N = 100, T = 20, T_def = 0, 20 | B = 100, Q = 10, 21 | behav_def = 0, obs_def = "alt", 22 | paras = [100, 3,20], weighted = True, include_reward = False, 23 | method = "QRF"): 24 | """ 25 | include_reward: if include reward to our test 26 | T_def: 27 | 0: length = T with always listen 28 | 1: truncation 29 | T: the final length 30 | """ 31 | ### generate data 32 | fixed_state_comp = (obs_def == "null") 33 | MDPs = simu_tiger(N = N, T = T, seed = seed, 34 | behav_def = behav_def, obs_def = obs_def, 35 | T_def = T_def, include_reward = include_reward, fixed_state_comp = fixed_state_comp) 36 | T += 1 # due to the DGP 37 | ### Preprocess 38 | if fixed_state_comp: 39 | MDPs, fixed_state_comp = MDPs 40 | else: 41 | fixed_state_comp = None 42 | if T_def == 1: 43 | MDPs = truncateMDP(MDPs,T) 44 | if not include_reward: 45 | MDPs = [a[:2] for a in MDPs] 46 | N = len(MDPs) 47 | ### Calculate 48 | if paras == "CV_once": 49 | return lam_est(data = MDPs, J = J, B = B, Q = Q, paras = paras, include_reward = include_reward, 50 | fixed_state_comp = fixed_state_comp, method = method) 51 | return test(data = MDPs, J = J, B = B, Q = Q, paras = paras, #print_time = print_time, 52 | include_reward = include_reward, fixed_state_comp = fixed_state_comp, method = method) 53 | 54 | 55 | def one_setting_one_J(rep_times = 10, J = 1, 56 | N = 100, T = 20, T_def = 0, 57 | B = 100, Q = 10, 58 | behav_def = 0, obs_def = "alt", 59 | include_reward = False, mute = True, 60 | paras = "CV_once", init_seed = 0, parallel = True, method = "QRF"): 61 | if paras == "CV_once": 62 | paras = one_time(seed = 0, J = J, 63 | N = N, T = T, B = B, Q = Q, 64 | behav_def = behav_def, obs_def = obs_def, 65 | paras = "CV_once", 66 | T_def = T_def, include_reward = include_reward, method = method) 67 | print("CV paras:", paras) 68 | 69 | def one_test(seed): 70 | return one_time(seed = seed, J = J, 71 | N = N, T = T, B = B, Q = Q, 72 | behav_def = behav_def, obs_def = obs_def, 73 | T_def = T_def, include_reward = include_reward, 74 | paras = paras, method = method) 75 | p_values = parmap(one_test,range(init_seed, init_seed + rep_times), parallel) 76 | if not mute: 77 | print("rejection rates are:", rej_rate_quick(p_values)) 78 | return p_values 79 | print("Import DONE!") 80 | 81 | print("n_cores = ", n_cores) 82 | 83 | 84 | for obs_def in ["null", "alt"]: 85 | for N in [50, 100, 200]: 86 | for J in range(1, 11): 87 | p_values = one_setting_one_J(rep_times = 500, J = J, 88 | N = N, T = 20, T_def = 0, 89 | B = 100, Q = 10, 90 | behav_def = 0, obs_def = obs_def, 91 | include_reward = False, mute = False, 92 | paras = "CV_once", init_seed = 0, parallel = n_cores, method = "QRF") 93 | rej_rate_quick(p_values) -------------------------------------------------------------------------------- /experiment_script/.ipynb_checkpoints/Tiger_simu-checkpoint.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os, sys 4 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 5 | 6 | sys.path.insert(0, package_path + "/test_func") 7 | from _core_test_fun import * 8 | 9 | sys.path.insert(0, package_path + "/experiment_func") 10 | from _DGP_TIGER import * 11 | 12 | os.environ["OMP_NUM_THREADS"] = "1" 13 | ##################################### 14 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 15 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 16 | # the difference with standard cross-validation is negligible and will not affect our findings. 17 | ##################################### 18 | def one_time(seed = 1, J = 1, 19 | N = 100, T = 20, T_def = 0, 20 | B = 100, Q = 10, 21 | behav_def = 0, obs_def = "alt", 22 | paras = [100, 3,20], weighted = True, include_reward = False, 23 | method = "QRF"): 24 | """ 25 | include_reward: if include reward to our test 26 | T_def: 27 | 0: length = T with always listen 28 | 1: truncation 29 | T: the final length 30 | """ 31 | ### generate data 32 | fixed_state_comp = (obs_def == "null") 33 | MDPs = simu_tiger(N = N, T = T, seed = seed, 34 | behav_def = behav_def, obs_def = obs_def, 35 | T_def = T_def, include_reward = include_reward, fixed_state_comp = fixed_state_comp) 36 | T += 1 # due to the DGP 37 | ### Preprocess 38 | if fixed_state_comp: 39 | MDPs, fixed_state_comp = MDPs 40 | else: 41 | fixed_state_comp = None 42 | if T_def == 1: 43 | MDPs = truncateMDP(MDPs,T) 44 | if not include_reward: 45 | MDPs = [a[:2] for a in MDPs] 46 | N = len(MDPs) 47 | ### Calculate 48 | if paras == "CV_once": 49 | return lam_est(data = MDPs, J = J, B = B, Q = Q, paras = paras, include_reward = include_reward, 50 | fixed_state_comp = fixed_state_comp, method = method) 51 | return test(data = MDPs, J = J, B = B, Q = Q, paras = paras, #print_time = print_time, 52 | include_reward = include_reward, fixed_state_comp = fixed_state_comp, method = method) 53 | 54 | 55 | def one_setting_one_J(rep_times = 10, J = 1, 56 | N = 100, T = 20, T_def = 0, 57 | B = 100, Q = 10, 58 | behav_def = 0, obs_def = "alt", 59 | include_reward = False, mute = True, 60 | paras = "CV_once", init_seed = 0, parallel = True, method = "QRF"): 61 | if paras == "CV_once": 62 | paras = one_time(seed = 0, J = J, 63 | N = N, T = T, B = B, Q = Q, 64 | behav_def = behav_def, obs_def = obs_def, 65 | paras = "CV_once", 66 | T_def = T_def, include_reward = include_reward, method = method) 67 | print("CV paras:", paras) 68 | 69 | def one_test(seed): 70 | return one_time(seed = seed, J = J, 71 | N = N, T = T, B = B, Q = Q, 72 | behav_def = behav_def, obs_def = obs_def, 73 | T_def = T_def, include_reward = include_reward, 74 | paras = paras, method = method) 75 | p_values = parmap(one_test,range(init_seed, init_seed + rep_times), parallel) 76 | if not mute: 77 | print("rejection rates are:", rej_rate_quick(p_values)) 78 | return p_values 79 | print("Import DONE!") 80 | 81 | print("n_cores = ", n_cores) 82 | 83 | 84 | for obs_def in ["null", "alt"]: 85 | for N in [50, 100, 200]: 86 | for J in range(1, 11): 87 | p_values = one_setting_one_J(rep_times = 500, J = J, 88 | N = N, T = 20, T_def = 0, 89 | B = 100, Q = 10, 90 | behav_def = 0, obs_def = obs_def, 91 | include_reward = False, mute = False, 92 | paras = "CV_once", init_seed = 0, parallel = n_cores, method = "QRF") 93 | rej_rate_quick(p_values) -------------------------------------------------------------------------------- /experiment_script/Ohio_simu_seq_lags.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os, sys 4 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 5 | 6 | sys.path.insert(0, package_path + "/test_func") 7 | from _core_test_fun import * 8 | 9 | sys.path.insert(0, package_path + "/experiment_func") 10 | from _DGP_Ohio import * 11 | from _utility_RL import * 12 | 13 | os.environ["OMP_NUM_THREADS"] = "1" 14 | 15 | ##################################### 16 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 17 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 18 | # the difference with standard cross-validation is negligible and will not affect our findings. 19 | ##################################### 20 | 21 | def one_time_seq(seed = 1, J_upper = 10, alpha_range = [0.02, 0.01, 0.005], 22 | N = 10, T = 7 * 8 * 24, B = 100, Q = 10, sd_G = 3, 23 | para_ranges = None, n_trees = 100, 24 | ): 25 | ## generate data 26 | data = simu_Ohio(T, N, seed = seed, sd_G = sd_G) 27 | data = burn_in(data, first_T = 10) 28 | T -= 10 29 | # for value evaluation [we will use the original transition], 30 | # do not use normalized data[will not be dominated like testing] 31 | value_data = data 32 | testing_data = [a[:2] for a in normalize(data)] 33 | time = now() 34 | p_values = [] 35 | for J in range(1, J_upper + 1): 36 | p_value = test(data = testing_data, J = J, B = B, Q = Q, paras = para_ranges[J - 1], 37 | n_trees = n_trees, print_time = False, method = "QRF") 38 | p_values.append(p_value) 39 | if p_value > alpha_range[0]: 40 | break 41 | lags = [] 42 | for alpha in alpha_range: 43 | for i in range(J_upper): 44 | if p_values[i] > alpha: 45 | lags.append(i + 1) 46 | break 47 | if i == J_upper - 1: 48 | lags.append(J_upper) 49 | 50 | if seed % 50 == 0: 51 | print("** testing time:", now() - time, " for seed = ", seed,"**"); time = now() 52 | return [lags, p_values] 53 | 54 | def one_setting_seq(rep_times = 500, N = 10, T = 24 * 56, B = 100, Q = 10, sd_G = 3, 55 | n_trees = 100, alpha_range = [0.02, 0.01, 0.005], 56 | init_seed = 0, 57 | file = None, J_low = 1, J_upper = 10, 58 | parallel = 10): 59 | # CV_paras for each J 60 | para_ranges = [] 61 | data = simu_Ohio(T, N, seed = 0, sd_G = sd_G) 62 | data = burn_in(data, first_T = 10) 63 | T -= 10 64 | testing_data = [a[:2] for a in normalize(data)] 65 | for J in range(1, J_upper + 1): 66 | paras = lam_est(data = testing_data, J = J, B = B, Q = Q, paras = "CV_once", n_trees = n_trees, method = "QRF") 67 | para_ranges.append(paras) 68 | def one_time(seed): 69 | r = one_time_seq(seed = seed, J_upper = J_upper, alpha_range = alpha_range, 70 | N = N, T = T, B = B, Q = Q, sd_G = sd_G, 71 | para_ranges = para_ranges, n_trees = n_trees) 72 | if seed % 50 == 0: 73 | print(seed, "Done!\n") 74 | return r 75 | 76 | r = parmap(one_time, range(init_seed, init_seed + rep_times), parallel) 77 | # different alphas 78 | lagss, p_valuess = [a[0] for a in r], [a[1] for a in r] 79 | print(lagss, DASH, DASH, p_valuess, DASH) 80 | lags_each_alpha = [] 81 | for i in range(len(alpha_range)): 82 | lags_each_alpha.append([a[i] for a in lagss]) 83 | r = [lags_each_alpha, p_valuess] 84 | if file is not None: 85 | print(DASH + str([N, sd_G]), file = file) 86 | for i in range(4): 87 | print(str(r[i]) + dasH, file = file) 88 | return r 89 | print("import DONE!", "num of cores:", n_cores, DASH) 90 | 91 | ##################################### 92 | path = "Ohio_seq.txt" 93 | file = open(path, 'w') 94 | rr = [] 95 | times = 500 96 | sd_G = 3 97 | for N in [10, 15, 20]: 98 | print([N, sd_G],": \n") 99 | r = one_setting_seq(rep_times = times, N = N, T = 24 * 7 * 8, sd_G = 3, 100 | n_trees = 100, B = 100, Q = 10, alpha_range = [0.01, 0.005], 101 | init_seed = 0, 102 | file = file, J_low = 1, J_upper = 10, 103 | parallel = n_cores) 104 | rr.append(r) 105 | file.close() 106 | 107 | 108 | with open("Ohio_seq.list", 'wb') as file: 109 | pickle.dump(rr, file) 110 | file.close() 111 | -------------------------------------------------------------------------------- /experiment_script/.ipynb_checkpoints/Ohio_simu_seq_lags-checkpoint.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os, sys 4 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 5 | 6 | sys.path.insert(0, package_path + "/test_func") 7 | from _core_test_fun import * 8 | 9 | sys.path.insert(0, package_path + "/experiment_func") 10 | from _DGP_Ohio import * 11 | from _utility_RL import * 12 | 13 | os.environ["OMP_NUM_THREADS"] = "1" 14 | 15 | ##################################### 16 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 17 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 18 | # the difference with standard cross-validation is negligible and will not affect our findings. 19 | ##################################### 20 | 21 | def one_time_seq(seed = 1, J_upper = 10, alpha_range = [0.02, 0.01, 0.005], 22 | N = 10, T = 7 * 8 * 24, B = 100, Q = 10, sd_G = 3, 23 | para_ranges = None, n_trees = 100, 24 | ): 25 | ## generate data 26 | data = simu_Ohio(T, N, seed = seed, sd_G = sd_G) 27 | data = burn_in(data, first_T = 10) 28 | T -= 10 29 | # for value evaluation [we will use the original transition], 30 | # do not use normalized data[will not be dominated like testing] 31 | value_data = data 32 | testing_data = [a[:2] for a in normalize(data)] 33 | time = now() 34 | p_values = [] 35 | for J in range(1, J_upper + 1): 36 | p_value = test(data = testing_data, J = J, B = B, Q = Q, paras = para_ranges[J - 1], 37 | n_trees = n_trees, print_time = False, method = "QRF") 38 | p_values.append(p_value) 39 | if p_value > alpha_range[0]: 40 | break 41 | lags = [] 42 | for alpha in alpha_range: 43 | for i in range(J_upper): 44 | if p_values[i] > alpha: 45 | lags.append(i + 1) 46 | break 47 | if i == J_upper - 1: 48 | lags.append(J_upper) 49 | 50 | if seed % 50 == 0: 51 | print("** testing time:", now() - time, " for seed = ", seed,"**"); time = now() 52 | return [lags, p_values] 53 | 54 | def one_setting_seq(rep_times = 500, N = 10, T = 24 * 56, B = 100, Q = 10, sd_G = 3, 55 | n_trees = 100, alpha_range = [0.02, 0.01, 0.005], 56 | init_seed = 0, 57 | file = None, J_low = 1, J_upper = 10, 58 | parallel = 10): 59 | # CV_paras for each J 60 | para_ranges = [] 61 | data = simu_Ohio(T, N, seed = 0, sd_G = sd_G) 62 | data = burn_in(data, first_T = 10) 63 | T -= 10 64 | testing_data = [a[:2] for a in normalize(data)] 65 | for J in range(1, J_upper + 1): 66 | paras = lam_est(data = testing_data, J = J, B = B, Q = Q, paras = "CV_once", n_trees = n_trees, method = "QRF") 67 | para_ranges.append(paras) 68 | def one_time(seed): 69 | r = one_time_seq(seed = seed, J_upper = J_upper, alpha_range = alpha_range, 70 | N = N, T = T, B = B, Q = Q, sd_G = sd_G, 71 | para_ranges = para_ranges, n_trees = n_trees) 72 | if seed % 50 == 0: 73 | print(seed, "Done!\n") 74 | return r 75 | 76 | r = parmap(one_time, range(init_seed, init_seed + rep_times), parallel) 77 | # different alphas 78 | lagss, p_valuess = [a[0] for a in r], [a[1] for a in r] 79 | print(lagss, DASH, DASH, p_valuess, DASH) 80 | lags_each_alpha = [] 81 | for i in range(len(alpha_range)): 82 | lags_each_alpha.append([a[i] for a in lagss]) 83 | r = [lags_each_alpha, p_valuess] 84 | if file is not None: 85 | print(DASH + str([N, sd_G]), file = file) 86 | for i in range(4): 87 | print(str(r[i]) + dasH, file = file) 88 | return r 89 | print("import DONE!", "num of cores:", n_cores, DASH) 90 | 91 | ##################################### 92 | path = "Ohio_seq.txt" 93 | file = open(path, 'w') 94 | rr = [] 95 | times = 500 96 | sd_G = 3 97 | for N in [10, 15, 20]: 98 | print([N, sd_G],": \n") 99 | r = one_setting_seq(rep_times = times, N = N, T = 24 * 7 * 8, sd_G = 3, 100 | n_trees = 100, B = 100, Q = 10, alpha_range = [0.01, 0.005], 101 | init_seed = 0, 102 | file = file, J_low = 1, J_upper = 10, 103 | parallel = n_cores) 104 | rr.append(r) 105 | file.close() 106 | 107 | 108 | with open("Ohio_seq.list", 'wb') as file: 109 | pickle.dump(rr, file) 110 | file.close() 111 | -------------------------------------------------------------------------------- /test_func/.ipynb_checkpoints/_DGP_TIGER-checkpoint.py: -------------------------------------------------------------------------------- 1 | #%% packages 2 | #%% 3 | from ._QRF import * 4 | from ._uti_basic import * 5 | from ._utility import * 6 | ############################################################################# 7 | ############################################################################# 8 | 9 | def list2Matrix(List): 10 | # return a n * 1 matrix 11 | return np.array(np.expand_dims(np.array(List),1)) 12 | 13 | #%% 14 | 15 | def TIGER_dynamics(state, action): 16 | p_correct = 0.7 # larger -> more POMDP 17 | # obs -> action -> obs, reward 18 | if action == 0: # listen 19 | p = rbin(1, p_correct) 20 | obs = p * state + (1-p) * (0-state) 21 | reward = -1 22 | else: # action = -1 or 1 23 | if action == state: 24 | reward = -100 25 | else: # no tiger door 26 | reward = 10 27 | obs = 3 # end status 28 | return reward, obs 29 | 30 | 31 | def TIGER_choose_action(obs, behav_def = 0): 32 | """ 33 | behav_def: 34 | 0. always listen 35 | 1. random 36 | 2. adaptive 37 | """ 38 | p_listen = 0.9 # for random policy 39 | T_must_obs = 10 # for adaptive plicy 40 | 41 | if behav_def == 0: 42 | return 0 # always listen 43 | elif behav_def == 1: 44 | if rbin(1, p_listen): 45 | return 0 46 | elif rbin(1, 0.5): 47 | return 1 48 | else: 49 | return -1 50 | elif behav_def == 2: 51 | """ based on obs, Chengchun's approach 52 | 1. if n <= T_must_obs: obs 53 | 2. else: n > T_must_obs 时 p_listen = (1- max(p_left,p_right)) * 2, o.w. open the door accourding to the prob. 54 | """ 55 | if obs[1] <= T_must_obs: 56 | return 0 57 | else: 58 | p_l = obs[0] 59 | p_listen = (1- max(p_l,1 - p_l)) * 2 60 | if rbin(1, p_listen): 61 | return 0 62 | elif rbin(1, p_l): 63 | return -1 64 | else: 65 | return 1 66 | 67 | def simu_tiger(N = 1, T = 20, seed = 1, behav_def = 0, obs_def = "alt", T_def = 0, include_reward = True, fixed_state_comp = False): 68 | """ 69 | T: spycify the game here 70 | A: "listen"/ "open_l" / "open_r" ---- 0 / -1 / +1 71 | State: "l" / "r" : -1 / +1 72 | Obervation: hear "l" / "r" 73 | Reward: -1, 10, - 100 74 | Returns: a list (len = N) of [$O_{T*dim_O},A_{T*1}$] or [O,A,R] 75 | 76 | behav_def: 77 | 0. always listen 78 | 1. random 79 | 2. adaptive 80 | obs_def: 81 | "alt": [1,-1] 82 | 1: [p] 83 | 2: [p,n] 84 | T_def: 85 | 0: length = T with always listen 86 | 1: truncation 87 | """ 88 | # gamma = .9 89 | 90 | MDPs = [] 91 | rseed(seed); npseed(seed) 92 | init_state = rbin(1, .5, N) * 2 - 1 93 | true_states = [] 94 | 95 | if T_def == 1: 96 | def stop(obs,t): 97 | return obs != 3 98 | else: 99 | def stop(obs,t): 100 | return t < T 101 | 102 | for i in range(N): 103 | ## Initialization 104 | state = init_state[i] 105 | obs, obs_hist = 0, [0] 106 | A = [] 107 | R = [0] # for alignment purpose 108 | O, O_1 = [[0.5, 0]], [0.5] 109 | t, left_cnt = 0, 0 110 | 111 | while(stop(obs,t)): # not in the Terminal state 112 | ## choose actiom, receive reward and state trainsition [observations] 113 | action = TIGER_choose_action(obs = O[-1], behav_def = behav_def) # obs = [p,n], old version 114 | reward, obs = TIGER_dynamics(state,action) 115 | 116 | ## record 117 | left_cnt += (obs == -1) 118 | t += 1 119 | # for obs_def_0 120 | obs_hist.append(obs) 121 | # for obs_def_1 122 | O_1.append(left_cnt/t) 123 | # for action choosing and obs_def_2 124 | if obs == 3: 125 | O.append([left_cnt/(t-1),t]) 126 | else: 127 | O.append([left_cnt/t,t]) 128 | A.append(action) 129 | R.append(reward) 130 | A.append(3) 131 | 132 | if obs_def == "alt": 133 | O = list2Matrix(obs_hist) 134 | elif obs_def == "null": 135 | # O = list2Matrix(obs_hist) 136 | if fixed_state_comp: 137 | O = list2Matrix(obs_hist) 138 | true_states.append(state) 139 | else: 140 | O = np.array([[a,state] for a in obs_hist]) 141 | # print(O.shape) 142 | elif obs_def == 1: 143 | O = list2Matrix(O_1) 144 | elif obs_def == 2: 145 | O = np.array(O) 146 | if include_reward: 147 | MDP = [O, list2Matrix(A), list2Matrix(R)] 148 | else: 149 | MDP = [O, list2Matrix(A)] 150 | MDPs.append(MDP) 151 | if fixed_state_comp: 152 | return [MDPs,true_states] 153 | return MDPs 154 | 155 | -------------------------------------------------------------------------------- /experiment_func/_DGP_TIGER.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | #%% packages 5 | #%% 6 | import os, sys 7 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 8 | 9 | sys.path.insert(0, package_path + "/test_func") 10 | from _core_test_fun import * 11 | ############################################################################# 12 | ############################################################################# 13 | 14 | def list2Matrix(List): 15 | # return a n * 1 matrix 16 | return np.array(np.expand_dims(np.array(List),1)) 17 | 18 | #%% 19 | 20 | def TIGER_dynamics(state, action): 21 | p_correct = 0.7 # larger -> more POMDP 22 | # obs -> action -> obs, reward 23 | if action == 0: # listen 24 | p = rbin(1, p_correct) 25 | obs = p * state + (1-p) * (0-state) 26 | reward = -1 27 | else: # action = -1 or 1 28 | if action == state: 29 | reward = -100 30 | else: # no tiger door 31 | reward = 10 32 | obs = 3 # end status 33 | return reward, obs 34 | 35 | 36 | def TIGER_choose_action(obs, behav_def = 0): 37 | """ 38 | behav_def: 39 | 0. always listen 40 | 1. random 41 | 2. adaptive 42 | """ 43 | p_listen = 0.9 # for random policy 44 | T_must_obs = 10 # for adaptive plicy 45 | 46 | if behav_def == 0: 47 | return 0 # always listen 48 | elif behav_def == 1: 49 | if rbin(1, p_listen): 50 | return 0 51 | elif rbin(1, 0.5): 52 | return 1 53 | else: 54 | return -1 55 | elif behav_def == 2: 56 | """ based on obs, Chengchun's approach 57 | 1. if n <= T_must_obs: obs 58 | 2. else: n > T_must_obs 时 p_listen = (1- max(p_left,p_right)) * 2, o.w. open the door accourding to the prob. 59 | """ 60 | if obs[1] <= T_must_obs: 61 | return 0 62 | else: 63 | p_l = obs[0] 64 | p_listen = (1- max(p_l,1 - p_l)) * 2 65 | if rbin(1, p_listen): 66 | return 0 67 | elif rbin(1, p_l): 68 | return -1 69 | else: 70 | return 1 71 | 72 | def simu_tiger(N = 1, T = 20, seed = 1, behav_def = 0, obs_def = "alt", T_def = 0, include_reward = True, fixed_state_comp = False): 73 | """ 74 | T: spycify the game here 75 | A: "listen"/ "open_l" / "open_r" ---- 0 / -1 / +1 76 | State: "l" / "r" : -1 / +1 77 | Obervation: hear "l" / "r" 78 | Reward: -1, 10, - 100 79 | Returns: a list (len = N) of [$O_{T*dim_O},A_{T*1}$] or [O,A,R] 80 | 81 | behav_def: 82 | 0. always listen 83 | 1. random 84 | 2. adaptive 85 | obs_def: 86 | "alt": [1,-1] 87 | 1: [p] 88 | 2: [p,n] 89 | T_def: 90 | 0: length = T with always listen 91 | 1: truncation 92 | """ 93 | # gamma = .9 94 | 95 | MDPs = [] 96 | rseed(seed); npseed(seed) 97 | init_state = rbin(1, .5, N) * 2 - 1 98 | true_states = [] 99 | 100 | if T_def == 1: 101 | def stop(obs,t): 102 | return obs != 3 103 | else: 104 | def stop(obs,t): 105 | return t < T 106 | 107 | for i in range(N): 108 | ## Initialization 109 | state = init_state[i] 110 | obs, obs_hist = 0, [0] 111 | A = [] 112 | R = [0] # for alignment purpose 113 | O, O_1 = [[0.5, 0]], [0.5] 114 | t, left_cnt = 0, 0 115 | 116 | while(stop(obs,t)): # not in the Terminal state 117 | ## choose actiom, receive reward and state trainsition [observations] 118 | action = TIGER_choose_action(obs = O[-1], behav_def = behav_def) # obs = [p,n], old version 119 | reward, obs = TIGER_dynamics(state,action) 120 | 121 | ## record 122 | left_cnt += (obs == -1) 123 | t += 1 124 | # for obs_def_0 125 | obs_hist.append(obs) 126 | # for obs_def_1 127 | O_1.append(left_cnt/t) 128 | # for action choosing and obs_def_2 129 | if obs == 3: 130 | O.append([left_cnt/(t-1),t]) 131 | else: 132 | O.append([left_cnt/t,t]) 133 | A.append(action) 134 | R.append(reward) 135 | A.append(3) 136 | 137 | if obs_def == "alt": 138 | O = list2Matrix(obs_hist) 139 | elif obs_def == "null": 140 | # O = list2Matrix(obs_hist) 141 | if fixed_state_comp: 142 | O = list2Matrix(obs_hist) 143 | true_states.append(state) 144 | else: 145 | O = np.array([[a,state] for a in obs_hist]) 146 | # print(O.shape) 147 | elif obs_def == 1: 148 | O = list2Matrix(O_1) 149 | elif obs_def == 2: 150 | O = np.array(O) 151 | if include_reward: 152 | MDP = [O, list2Matrix(A), list2Matrix(R)] 153 | else: 154 | MDP = [O, list2Matrix(A)] 155 | MDPs.append(MDP) 156 | if fixed_state_comp: 157 | return [MDPs,true_states] 158 | return MDPs 159 | 160 | -------------------------------------------------------------------------------- /experiment_func/.ipynb_checkpoints/_DGP_TIGER-checkpoint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | #%% packages 5 | #%% 6 | import os, sys 7 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 8 | 9 | sys.path.insert(0, package_path + "/test_func") 10 | from _core_test_fun import * 11 | ############################################################################# 12 | ############################################################################# 13 | 14 | def list2Matrix(List): 15 | # return a n * 1 matrix 16 | return np.array(np.expand_dims(np.array(List),1)) 17 | 18 | #%% 19 | 20 | def TIGER_dynamics(state, action): 21 | p_correct = 0.7 # larger -> more POMDP 22 | # obs -> action -> obs, reward 23 | if action == 0: # listen 24 | p = rbin(1, p_correct) 25 | obs = p * state + (1-p) * (0-state) 26 | reward = -1 27 | else: # action = -1 or 1 28 | if action == state: 29 | reward = -100 30 | else: # no tiger door 31 | reward = 10 32 | obs = 3 # end status 33 | return reward, obs 34 | 35 | 36 | def TIGER_choose_action(obs, behav_def = 0): 37 | """ 38 | behav_def: 39 | 0. always listen 40 | 1. random 41 | 2. adaptive 42 | """ 43 | p_listen = 0.9 # for random policy 44 | T_must_obs = 10 # for adaptive plicy 45 | 46 | if behav_def == 0: 47 | return 0 # always listen 48 | elif behav_def == 1: 49 | if rbin(1, p_listen): 50 | return 0 51 | elif rbin(1, 0.5): 52 | return 1 53 | else: 54 | return -1 55 | elif behav_def == 2: 56 | """ based on obs, Chengchun's approach 57 | 1. if n <= T_must_obs: obs 58 | 2. else: n > T_must_obs 时 p_listen = (1- max(p_left,p_right)) * 2, o.w. open the door accourding to the prob. 59 | """ 60 | if obs[1] <= T_must_obs: 61 | return 0 62 | else: 63 | p_l = obs[0] 64 | p_listen = (1- max(p_l,1 - p_l)) * 2 65 | if rbin(1, p_listen): 66 | return 0 67 | elif rbin(1, p_l): 68 | return -1 69 | else: 70 | return 1 71 | 72 | def simu_tiger(N = 1, T = 20, seed = 1, behav_def = 0, obs_def = "alt", T_def = 0, include_reward = True, fixed_state_comp = False): 73 | """ 74 | T: spycify the game here 75 | A: "listen"/ "open_l" / "open_r" ---- 0 / -1 / +1 76 | State: "l" / "r" : -1 / +1 77 | Obervation: hear "l" / "r" 78 | Reward: -1, 10, - 100 79 | Returns: a list (len = N) of [$O_{T*dim_O},A_{T*1}$] or [O,A,R] 80 | 81 | behav_def: 82 | 0. always listen 83 | 1. random 84 | 2. adaptive 85 | obs_def: 86 | "alt": [1,-1] 87 | 1: [p] 88 | 2: [p,n] 89 | T_def: 90 | 0: length = T with always listen 91 | 1: truncation 92 | """ 93 | # gamma = .9 94 | 95 | MDPs = [] 96 | rseed(seed); npseed(seed) 97 | init_state = rbin(1, .5, N) * 2 - 1 98 | true_states = [] 99 | 100 | if T_def == 1: 101 | def stop(obs,t): 102 | return obs != 3 103 | else: 104 | def stop(obs,t): 105 | return t < T 106 | 107 | for i in range(N): 108 | ## Initialization 109 | state = init_state[i] 110 | obs, obs_hist = 0, [0] 111 | A = [] 112 | R = [0] # for alignment purpose 113 | O, O_1 = [[0.5, 0]], [0.5] 114 | t, left_cnt = 0, 0 115 | 116 | while(stop(obs,t)): # not in the Terminal state 117 | ## choose actiom, receive reward and state trainsition [observations] 118 | action = TIGER_choose_action(obs = O[-1], behav_def = behav_def) # obs = [p,n], old version 119 | reward, obs = TIGER_dynamics(state,action) 120 | 121 | ## record 122 | left_cnt += (obs == -1) 123 | t += 1 124 | # for obs_def_0 125 | obs_hist.append(obs) 126 | # for obs_def_1 127 | O_1.append(left_cnt/t) 128 | # for action choosing and obs_def_2 129 | if obs == 3: 130 | O.append([left_cnt/(t-1),t]) 131 | else: 132 | O.append([left_cnt/t,t]) 133 | A.append(action) 134 | R.append(reward) 135 | A.append(3) 136 | 137 | if obs_def == "alt": 138 | O = list2Matrix(obs_hist) 139 | elif obs_def == "null": 140 | # O = list2Matrix(obs_hist) 141 | if fixed_state_comp: 142 | O = list2Matrix(obs_hist) 143 | true_states.append(state) 144 | else: 145 | O = np.array([[a,state] for a in obs_hist]) 146 | # print(O.shape) 147 | elif obs_def == 1: 148 | O = list2Matrix(O_1) 149 | elif obs_def == 2: 150 | O = np.array(O) 151 | if include_reward: 152 | MDP = [O, list2Matrix(A), list2Matrix(R)] 153 | else: 154 | MDP = [O, list2Matrix(A)] 155 | MDPs.append(MDP) 156 | if fixed_state_comp: 157 | return [MDPs,true_states] 158 | return MDPs 159 | 160 | -------------------------------------------------------------------------------- /test_func/.ipynb_checkpoints/_Funcs_Real_Ohio-checkpoint.py: -------------------------------------------------------------------------------- 1 | ############################################################################# 2 | #%% 3 | from ._utility import * 4 | from ._QRF import * 5 | from ._core_test_fun import * 6 | from ._DGP_Ohio import * 7 | from ._uti_basic import * 8 | from ._utility_RL import * 9 | os.environ["OMP_NUM_THREADS"] = "1" 10 | n_cores = multiprocessing.cpu_count() 11 | ############################################################################# 12 | #%% Data 13 | import sys, os 14 | path = os.getcwd() + "/code_data/Data_Ohio.csv" 15 | data = pd.read_csv(path, header = 0) 16 | data0 = np.array(data) 17 | data0 = data0[:,1:] # no row index 18 | ############################################################################# 19 | 20 | def generate_initial_states(N_init = 100, J_upper = 5, seed = 0): 21 | """generate initial states for comparison of values in the Ohio real data. 22 | """ 23 | init_obs, init_A = simu_Ohio(T = J_upper, N = N_init, 24 | seed = seed, sd_G = 3, matrix_output = True, is_real = True) 25 | init_A = init_A.reshape(1, J_upper, N_init) 26 | initial_states = np.concatenate([init_obs, init_A], 0) 27 | initial_states = initial_states.reshape((4 * J_upper, N_init), order = "F") 28 | initial_states = initial_states[:(J_upper * 4 - 1), :] 29 | return initial_states.T 30 | 31 | 32 | def process_data_Nsplit(index, T): 33 | """ 34 | data: a list(len-N) of [T*3 states, T * 1 actions T * 1 rewards] 35 | T: length = 1100 36 | """ 37 | data, J_data = [], [] 38 | for i in index: 39 | temp = data0[T * i : T * (i+1)].copy() 40 | states = temp[:, :3] 41 | actions = temp[:, 3].reshape(-1, 1) 42 | rewards = np.roll(apply_v(Glucose2Reward, states[:, 0]), shift = -1).reshape(-1, 1) 43 | J_data.append([states, actions]) 44 | data.append([states, actions, rewards]) 45 | return data.copy(), normalize(J_data.copy()) 46 | 47 | 48 | ############################################################################# 49 | ############################################################################# 50 | #%% 51 | 52 | def real_ohio_Nsplit(J_upper = 10, gamma = 0.9, gamma_eval = 0.9, alpha = 0.02, RF_paras = "CV", n_trees = 100, 53 | N_init = 100, seed = 1, n_set = 20, 54 | parallel = False, T_unify = 1100, threshold = 1e-4): 55 | a = now() 56 | init_states = generate_initial_states(N_init = N_init, J_upper = J_upper, seed = seed) # N * (J_upper * 4 - 1) 57 | 58 | arr = [i for i in range(6)] 59 | rseed(seed); npseed(seed) 60 | all_possible_train_set = permutation(list(combinations(arr, 3)) ) 61 | def one_time(i): 62 | time = now() 63 | train_set = all_possible_train_set[i] 64 | eval_set = set([i for i in range(6)]) - set(train_set) 65 | train_data, J_data = process_data_Nsplit(train_set, T = T_unify) 66 | eval_data, _ = process_data_Nsplit(eval_set, T = T_unify) 67 | 68 | ### Given a J, get the optimal policy and evaluate its performance 69 | eval_PatternSets = MDP2Trans(MDPs = eval_data, J = J_upper, action_in_states = True) 70 | values = [] 71 | for J in range(1, J_upper + 1): 72 | ### Learn the optimal policies 73 | Learning_PatternSets = MDP2Trans(MDPs = train_data, J = J, action_in_states = True) 74 | Q_func = NFQ(PatternSets = Learning_PatternSets, gamma = gamma, 75 | RF_paras = RF_paras, n_trees = n_trees, threshold = threshold) 76 | ### Evaluate the policy: learned Q and observed trajectories 77 | V_func = FQE(PatternSets = eval_PatternSets, Q_func = Q_func, J = J, 78 | gamma = gamma_eval, RF_paras = RF_paras, n_trees = n_trees, 79 | threshold = threshold) 80 | 81 | ### Evaluate using init states 82 | values_integration = V_func(init_states) 83 | value = np.round(np.mean(values_integration), 4) 84 | values.append(value) 85 | ### Store results 86 | print("The ", i + 1, "round ends with Time cost:", np.round(now() - time,2), "\n") 87 | 88 | return values 89 | 90 | r_values = parmap(one_time, range(n_set)) 91 | r_values = np.array(r_values) 92 | 93 | print("mean:", np.mean(r_values, 0), "\n", "std:", np.std(r_values, 0)) 94 | print("time cost: ", now() - a) 95 | return r_values 96 | 97 | ############################################################################# 98 | #%% Decide the order with all data 99 | 100 | def decide_J(data, J_range, paras = "CV", n_trees = 100, T = 1100): 101 | data_J = [] 102 | for i in range(6): 103 | temp = data[T * i : T * (i + 1)] 104 | temp = [temp[:, :3], temp[:, 3].reshape(-1, 1)] 105 | data_J.append(temp) 106 | data_J = normalize(data_J) 107 | r = selectOrder(data_J, B = 200, Q = 10, L = 3, alpha = 0.1, K = 10, paras="CV", n_trees = n_trees) 108 | return r 109 | 110 | # def decide_J(data, J_range, paras = "CV", n_trees = 100, T = 1100): 111 | # data_J = [] 112 | # for i in range(6): 113 | # temp = data[T * i : T * (i + 1)] 114 | # temp = [temp[:, :3], temp[:, 3].reshape(-1, 1)] 115 | # data_J.append(temp) 116 | # data_J = normalize(data_J) 117 | # def test_one_J(J): 118 | # return test(data_J, J = J, B = 200, Q = 10, paras = paras, n_trees = n_trees) 119 | # r = parmap(test_one_J, J_range, n_cores) 120 | # print(r) 121 | # return r 122 | 123 | -------------------------------------------------------------------------------- /experiment_script/Ohio_simu_values.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os, sys 4 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 5 | 6 | sys.path.insert(0, package_path + "/test_func") 7 | from _core_test_fun import * 8 | 9 | sys.path.insert(0, package_path + "/experiment_func") 10 | from _DGP_Ohio import * 11 | from _utility_RL import * 12 | 13 | os.environ["OMP_NUM_THREADS"] = "1" 14 | ##################################### 15 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 16 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 17 | # the difference with standard cross-validation is negligible and will not affect our findings. 18 | ##################################### 19 | def one_time_value_only(seed = 1, J = 1, J_upper = 10, 20 | N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3, 21 | gamma_NFQ = 0.9, 22 | T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4, 23 | paras = "CV_once", n_trees = 100, 24 | first_T = 10, true_lag = 4): 25 | ## generate data 26 | a = now() 27 | data = simu_Ohio(T, N, seed = seed, sd_G = sd_G) 28 | data = burn_in(data,first_T) 29 | T -= first_T 30 | value_data = data 31 | testing_data = [a[:2] for a in normalize(data)] 32 | ## this one time is used to get paras 33 | if paras == "CV_once": 34 | return lam_est(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees) 35 | time = now() 36 | Learning_PatternSets = MDP2Trans(MDPs = value_data, J = J, action_in_states = True) 37 | Q_func = NFQ(PatternSets = Learning_PatternSets, gamma = gamma_NFQ, 38 | RF_paras = paras, n_trees = n_trees, threshold = thre_eval) 39 | if seed % 100 == 0: 40 | print("** Learning [for value] time cost:", np.round(now() - time, 3) , "**"); time = now() 41 | J_values = eval_Ohio_policy(Q_func = Q_func, J_Q = J, J_upper = J_upper, 42 | T = T_eval, gamma = gamma_eval, N = N_eval, 43 | sd_G = sd_G, seed = 0)#, true_lag = 4) 44 | return np.mean(J_values) 45 | 46 | 47 | def one_setting_one_J_value_only(rep_times = 500, J = 1, J_upper = 10, 48 | N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3, 49 | paras = "CV_once", n_trees = 100, 50 | gamma_NFQ = 0.9, 51 | T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4, 52 | parallel = False, path = None): 53 | a = now() 54 | if paras == "CV_once": 55 | paras = one_time_value_only(seed = 0, J = J, J_upper = J_upper, 56 | N = N, T = T, B = B, Q = Q, 57 | sd_G = sd_G, gamma_NFQ = gamma_NFQ, 58 | T_eval = T_eval, N_eval = N_eval, 59 | gamma_eval = gamma_eval, thre_eval = thre_eval, 60 | paras = "CV_once", n_trees = n_trees) 61 | def one_time(seed): 62 | return one_time_value_only(seed = seed, J = J, J_upper = J_upper, 63 | N = N, T = T, B = B, Q = Q, sd_G = sd_G, 64 | gamma_NFQ = gamma_NFQ, 65 | T_eval = T_eval, N_eval = N_eval, 66 | gamma_eval = gamma_eval, thre_eval = thre_eval, 67 | paras = paras, n_trees = n_trees) 68 | 69 | values = parmap(one_time, range(rep_times), parallel) 70 | 71 | print("total time cost for one J:", np.round(now() - a, 3), Dash) 72 | 73 | u_val = np.round(np.mean(values),4) 74 | sd_val = np.round(np.std(values),4) 75 | 76 | return values, u_val, sd_val 77 | 78 | 79 | 80 | def one_setting_value_only(rep_times = 500, 81 | N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3, 82 | paras = "CV_once", n_trees = 100, 83 | gamma_NFQ = 0.9, 84 | T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4, 85 | parallel = False, file = None): 86 | rr = [] 87 | value_details = [] 88 | for J in range(1, 11): 89 | r = one_setting_one_J_value_only(rep_times = rep_times, J = J, J_upper = 10, 90 | N = N, T = T, B = B, Q = Q, sd_G = sd_G, 91 | paras = paras, n_trees = n_trees, 92 | gamma_NFQ = gamma_NFQ, 93 | T_eval = T_eval, N_eval = N_eval, 94 | gamma_eval = gamma_eval, thre_eval = thre_eval, 95 | parallel = parallel) 96 | rr.append([r[1], r[2]]) 97 | value_details.append(r[0]) 98 | print("the currect results for J = ", J, ":\n", rr, DASH) 99 | print_content = "N = " + str(N) + "sd = " + str(sd_G) + ":" + str(rr) 100 | print(print_content, file = file) 101 | return rr, value_details 102 | 103 | print("import DONE!", "num of cores:", n_cores, DASH) 104 | 105 | 106 | #%% Time Cost 107 | 108 | 109 | path = "Ohio_simu_values.txt" # 0128 reruned and reproduced 110 | file = open(path, 'w') 111 | reps = 500 112 | gamma = 0.9 113 | T_eval = 60 114 | sd_G = 3 115 | value_details = [] 116 | mean_values = [] 117 | 118 | for N in [10, 15, 20]: 119 | print(DASH, "[N, sd_G] = ", [N, sd_G], DASH) 120 | r, value_detail = one_setting_value_only(rep_times = reps, 121 | N = N, T = 8 * 7 * 24, B = 100, Q = 10, sd_G = sd_G, 122 | paras = "CV_once", n_trees = 100, 123 | gamma_NFQ = gamma, 124 | T_eval = 60, N_eval = 100, 125 | gamma_eval = gamma, thre_eval = 1e-4, 126 | parallel = n_cores, file = file) 127 | print(DASH, "[N, sd_G] = ", [N, sd_G], "r:", r, DASH) 128 | mean_values.append(r) 129 | value_details.append(value_detail) 130 | file.close() 131 | 132 | res = [mean_values, value_details] 133 | with open("Ohio_simu_value.list", 'wb') as file: 134 | pickle.dump(res, file) 135 | file.close() -------------------------------------------------------------------------------- /experiment_script/.ipynb_checkpoints/Ohio_simu_values-checkpoint.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os, sys 4 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 5 | 6 | sys.path.insert(0, package_path + "/test_func") 7 | from _core_test_fun import * 8 | 9 | sys.path.insert(0, package_path + "/experiment_func") 10 | from _DGP_Ohio import * 11 | from _utility_RL import * 12 | 13 | os.environ["OMP_NUM_THREADS"] = "1" 14 | ##################################### 15 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 16 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 17 | # the difference with standard cross-validation is negligible and will not affect our findings. 18 | ##################################### 19 | def one_time_value_only(seed = 1, J = 1, J_upper = 10, 20 | N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3, 21 | gamma_NFQ = 0.9, 22 | T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4, 23 | paras = "CV_once", n_trees = 100, 24 | first_T = 10, true_lag = 4): 25 | ## generate data 26 | a = now() 27 | data = simu_Ohio(T, N, seed = seed, sd_G = sd_G) 28 | data = burn_in(data,first_T) 29 | T -= first_T 30 | value_data = data 31 | testing_data = [a[:2] for a in normalize(data)] 32 | ## this one time is used to get paras 33 | if paras == "CV_once": 34 | return lam_est(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees) 35 | time = now() 36 | Learning_PatternSets = MDP2Trans(MDPs = value_data, J = J, action_in_states = True) 37 | Q_func = NFQ(PatternSets = Learning_PatternSets, gamma = gamma_NFQ, 38 | RF_paras = paras, n_trees = n_trees, threshold = thre_eval) 39 | if seed % 100 == 0: 40 | print("** Learning [for value] time cost:", np.round(now() - time, 3) , "**"); time = now() 41 | J_values = eval_Ohio_policy(Q_func = Q_func, J_Q = J, J_upper = J_upper, 42 | T = T_eval, gamma = gamma_eval, N = N_eval, 43 | sd_G = sd_G, seed = 0)#, true_lag = 4) 44 | return np.mean(J_values) 45 | 46 | 47 | def one_setting_one_J_value_only(rep_times = 500, J = 1, J_upper = 10, 48 | N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3, 49 | paras = "CV_once", n_trees = 100, 50 | gamma_NFQ = 0.9, 51 | T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4, 52 | parallel = False, path = None): 53 | a = now() 54 | if paras == "CV_once": 55 | paras = one_time_value_only(seed = 0, J = J, J_upper = J_upper, 56 | N = N, T = T, B = B, Q = Q, 57 | sd_G = sd_G, gamma_NFQ = gamma_NFQ, 58 | T_eval = T_eval, N_eval = N_eval, 59 | gamma_eval = gamma_eval, thre_eval = thre_eval, 60 | paras = "CV_once", n_trees = n_trees) 61 | def one_time(seed): 62 | return one_time_value_only(seed = seed, J = J, J_upper = J_upper, 63 | N = N, T = T, B = B, Q = Q, sd_G = sd_G, 64 | gamma_NFQ = gamma_NFQ, 65 | T_eval = T_eval, N_eval = N_eval, 66 | gamma_eval = gamma_eval, thre_eval = thre_eval, 67 | paras = paras, n_trees = n_trees) 68 | 69 | values = parmap(one_time, range(rep_times), parallel) 70 | 71 | print("total time cost for one J:", np.round(now() - a, 3), Dash) 72 | 73 | u_val = np.round(np.mean(values),4) 74 | sd_val = np.round(np.std(values),4) 75 | 76 | return values, u_val, sd_val 77 | 78 | 79 | 80 | def one_setting_value_only(rep_times = 500, 81 | N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3, 82 | paras = "CV_once", n_trees = 100, 83 | gamma_NFQ = 0.9, 84 | T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4, 85 | parallel = False, file = None): 86 | rr = [] 87 | value_details = [] 88 | for J in range(1, 11): 89 | r = one_setting_one_J_value_only(rep_times = rep_times, J = J, J_upper = 10, 90 | N = N, T = T, B = B, Q = Q, sd_G = sd_G, 91 | paras = paras, n_trees = n_trees, 92 | gamma_NFQ = gamma_NFQ, 93 | T_eval = T_eval, N_eval = N_eval, 94 | gamma_eval = gamma_eval, thre_eval = thre_eval, 95 | parallel = parallel) 96 | rr.append([r[1], r[2]]) 97 | value_details.append(r[0]) 98 | print("the currect results for J = ", J, ":\n", rr, DASH) 99 | print_content = "N = " + str(N) + "sd = " + str(sd_G) + ":" + str(rr) 100 | print(print_content, file = file) 101 | return rr, value_details 102 | 103 | print("import DONE!", "num of cores:", n_cores, DASH) 104 | 105 | 106 | #%% Time Cost 107 | 108 | 109 | path = "Ohio_simu_values.txt" # 0128 reruned and reproduced 110 | file = open(path, 'w') 111 | reps = 500 112 | gamma = 0.9 113 | T_eval = 60 114 | sd_G = 3 115 | value_details = [] 116 | mean_values = [] 117 | 118 | for N in [10, 15, 20]: 119 | print(DASH, "[N, sd_G] = ", [N, sd_G], DASH) 120 | r, value_detail = one_setting_value_only(rep_times = reps, 121 | N = N, T = 8 * 7 * 24, B = 100, Q = 10, sd_G = sd_G, 122 | paras = "CV_once", n_trees = 100, 123 | gamma_NFQ = gamma, 124 | T_eval = 60, N_eval = 100, 125 | gamma_eval = gamma, thre_eval = 1e-4, 126 | parallel = n_cores, file = file) 127 | print(DASH, "[N, sd_G] = ", [N, sd_G], "r:", r, DASH) 128 | mean_values.append(r) 129 | value_details.append(value_detail) 130 | file.close() 131 | 132 | res = [mean_values, value_details] 133 | with open("Ohio_simu_value.list", 'wb') as file: 134 | pickle.dump(res, file) 135 | file.close() -------------------------------------------------------------------------------- /test_func/_utility.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ############################################################################# 4 | ############################################################################# 5 | #%% packages 6 | import numpy as np 7 | import scipy as sp 8 | from scipy.linalg import sqrtm 9 | import pandas as pd 10 | from numpy import absolute as np_abs 11 | from random import seed as rseed 12 | from numpy.random import seed as npseed 13 | from numpy.random import normal as rnorm 14 | from numpy.random import uniform as runi 15 | from numpy.random import binomial as rbin 16 | from numpy.random import shuffle,randn, permutation # randn(d1,d2) is d1*d2 i.i.d N(0,1) 17 | from numpy import array as arr 18 | from numpy import sqrt, cos, sin, exp, dot, diag, quantile, zeros, roll, multiply, stack, concatenate 19 | from numpy import concatenate as v_add 20 | from numpy.linalg import norm 21 | from numpy import apply_along_axis as apply 22 | from sklearn.preprocessing import StandardScaler 23 | import pickle 24 | from sklearn.ensemble import RandomForestRegressor as RF 25 | from sklearn.model_selection import GridSearchCV 26 | from itertools import combinations 27 | import operator 28 | import time 29 | now = time.time 30 | from sklearn.ensemble import RandomForestRegressor as RandomForest 31 | from sklearn.model_selection import KFold 32 | from statsmodels.stats import proportion as prop 33 | import os 34 | 35 | ############################################################################# 36 | ############################################################################# 37 | 38 | #%% utility funs 39 | 40 | def CI_prop(n,p): 41 | """ 42 | Input: In n reps, observed proportion p 43 | Output: the 95% CI of this p 44 | """ 45 | r = prop.proportion_confint(n * p, n, alpha = 0.05, method='binom_test') 46 | return np.round([r[0], r[1]],4) 47 | 48 | def normalize_unit_sd(array): 49 | def temp(v): 50 | return v / np.std(v) 51 | return np.array(apply(temp, 0, array)) 52 | 53 | def apply_v(f,v): 54 | return np.array([f(a) for a in v]) 55 | 56 | def burn_in(data,first_T): 57 | if len(data[0]) == 2: 58 | return [[patient[0][first_T:,:], patient[1][first_T:,:]] for patient in data] 59 | else: 60 | return [[patient[0][first_T:,:], patient[1][first_T:,:], patient[2][first_T:,:]] for patient in data] 61 | 62 | flatten = lambda l: [item for sublist in l for item in sublist] 63 | 64 | def is_null(true_lag,J): 65 | if J >= true_lag: 66 | return "(H0)" 67 | else: 68 | return "(H1)" 69 | 70 | def list2Matrix(List): 71 | # return a n * 1 matrix 72 | return np.array(np.expand_dims(np.array(List),1)) 73 | 74 | def round_list(thelist,dec): 75 | """ 76 | extend np.round to list 77 | """ 78 | return [round(a,dec) for a in thelist] 79 | 80 | 81 | def normalize(data, centralized = False): 82 | """ 83 | normalize the simulated data 84 | data: len-n of [T*dx,T*da] 85 | Returns: data 86 | """ 87 | state, action = [a[0].copy() for a in data], [a[1].copy() for a in data] 88 | n = len(data) 89 | dx = state[0].shape[1] 90 | 91 | ### States 92 | for i in range(dx): 93 | s = np.array([a[:,i] for a in state]) 94 | mean, sd = np.mean(s), np.std(s) 95 | for j in range(n): 96 | if centralized: 97 | state[j][:,i] -= mean 98 | if sd != 0: 99 | state[j][:,i] = state[j][:,i] / sd 100 | 101 | ### Action: 102 | a = np.array(action) 103 | mean, sd = np.mean(a), np.std(a) 104 | # sd = 1 105 | # action = [ a / sd for a in action] 106 | 107 | ### Reward 108 | if len(data[0]) == 3: 109 | reward = [a[2] for a in data] 110 | a = np.array(reward) 111 | mean, sd = np.mean(a), np.std(a) 112 | if sd == 0: 113 | sd = 1 114 | if centralized: 115 | reward = [ (a - mean) / sd for a in reward] 116 | else: 117 | reward = [ a / sd for a in reward] 118 | return [[state[i],action[i],reward[i]] for i in range(n)] 119 | else: 120 | return [[state[i],action[i]] for i in range(n)] 121 | 122 | #%% utility funs 123 | 124 | 125 | def p_value(test_stat,sim_test_stats): 126 | """ 127 | one testing result (p-value), Bootstrap-based. 128 | 129 | Default: the larger, the significant 130 | Return: p-value 131 | """ 132 | return round(1 - sum(np.abs(test_stat) > np.abs(sim_test_stats)) / len(sim_test_stats),4) 133 | 134 | def rej_rate(p_values, alphas): 135 | rep_times = len(p_values) 136 | p_values = np.array(p_values) 137 | RRs = [] 138 | for alpha in alphas: 139 | RR = sum(p_values < alpha) / rep_times 140 | RRs.append(RR) 141 | print("Under alpha", alpha, "the rejection rate is:", RR) 142 | return RRs 143 | 144 | def rej_rate_quite(p_values,alphas,file = None): 145 | rep_times = len(p_values) 146 | p_values = np.array(p_values) 147 | 148 | RRs = [] 149 | for alpha in alphas: 150 | RR = sum(p_values < alpha) / rep_times 151 | RRs.append(RR) 152 | return RRs 153 | 154 | 155 | def rej_rate_quick(p): 156 | r = [] 157 | T = len(p) 158 | p = np.array(p) 159 | for i in [0.01,0.05,0.1]: 160 | r.append(np.sum( p < i) / T) 161 | return r 162 | 163 | def rej_rate_seq(results): 164 | """ 165 | Imput: a list (len = times) of [1,0] 166 | Output: [0.2,0.7] 167 | """ 168 | results = np.array(results) 169 | times = results.shape[0] 170 | return np.sum(results,0) / times 171 | 172 | 173 | 174 | def seq_rej_rate_mul_J(ps,alphas): 175 | """ 176 | ps: len-J_upper list of np.array(times * 2) 177 | Output: if always rej, then rej 178 | """ 179 | rej = [] 180 | for alpha in alphas: 181 | aa = [np.array(p) < alpha for p in ps] 182 | bb = np.sum(np.array(aa), 0) == len(ps) 183 | rate = np.round(np.mean(bb, 0),3) 184 | rej.append(rate) 185 | return rej 186 | 187 | 188 | #%% 189 | def truncateMDP(MDPs,T): 190 | data = [] 191 | l = len(MDPs[0]) 192 | for MDP in MDPs: 193 | if (MDP[0].shape[0]) >= T: 194 | data.append([MDP[i][:T] for i in range(l)]) 195 | return data 196 | 197 | 198 | def p_sd(T): 199 | r = [] 200 | for p_true in [0.01,0.05,0.1]: 201 | r.append(np.round(np.sqrt(p_true * (1 - p_true) / T),4)) 202 | return r 203 | 204 | def latex_ohio_one_T_sd_G_mul_j(a, file): 205 | for J in range(len(a)): 206 | print("J = ", J + 1, end = " " , file = file) 207 | aa = a[J] 208 | for alpha in range(3): 209 | print(aa[alpha][0],"& ", end = "", file = file) # max 210 | print("\n", file = file) 211 | 212 | def print_progress(i, N): 213 | if (i * 100 // N == 0): 214 | print("#", end = "", flush = True) -------------------------------------------------------------------------------- /test_func/.ipynb_checkpoints/_utility-checkpoint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ############################################################################# 4 | ############################################################################# 5 | #%% packages 6 | import numpy as np 7 | import scipy as sp 8 | from scipy.linalg import sqrtm 9 | import pandas as pd 10 | from numpy import absolute as np_abs 11 | from random import seed as rseed 12 | from numpy.random import seed as npseed 13 | from numpy.random import normal as rnorm 14 | from numpy.random import uniform as runi 15 | from numpy.random import binomial as rbin 16 | from numpy.random import shuffle,randn, permutation # randn(d1,d2) is d1*d2 i.i.d N(0,1) 17 | from numpy import array as arr 18 | from numpy import sqrt, cos, sin, exp, dot, diag, quantile, zeros, roll, multiply, stack, concatenate 19 | from numpy import concatenate as v_add 20 | from numpy.linalg import norm 21 | from numpy import apply_along_axis as apply 22 | from sklearn.preprocessing import StandardScaler 23 | import pickle 24 | from sklearn.ensemble import RandomForestRegressor as RF 25 | from sklearn.model_selection import GridSearchCV 26 | from itertools import combinations 27 | import operator 28 | import time 29 | now = time.time 30 | from sklearn.ensemble import RandomForestRegressor as RandomForest 31 | from sklearn.model_selection import KFold 32 | from statsmodels.stats import proportion as prop 33 | import os 34 | 35 | ############################################################################# 36 | ############################################################################# 37 | 38 | #%% utility funs 39 | 40 | def CI_prop(n,p): 41 | """ 42 | Input: In n reps, observed proportion p 43 | Output: the 95% CI of this p 44 | """ 45 | r = prop.proportion_confint(n * p, n, alpha = 0.05, method='binom_test') 46 | return np.round([r[0], r[1]],4) 47 | 48 | def normalize_unit_sd(array): 49 | def temp(v): 50 | return v / np.std(v) 51 | return np.array(apply(temp, 0, array)) 52 | 53 | def apply_v(f,v): 54 | return np.array([f(a) for a in v]) 55 | 56 | def burn_in(data,first_T): 57 | if len(data[0]) == 2: 58 | return [[patient[0][first_T:,:], patient[1][first_T:,:]] for patient in data] 59 | else: 60 | return [[patient[0][first_T:,:], patient[1][first_T:,:], patient[2][first_T:,:]] for patient in data] 61 | 62 | flatten = lambda l: [item for sublist in l for item in sublist] 63 | 64 | def is_null(true_lag,J): 65 | if J >= true_lag: 66 | return "(H0)" 67 | else: 68 | return "(H1)" 69 | 70 | def list2Matrix(List): 71 | # return a n * 1 matrix 72 | return np.array(np.expand_dims(np.array(List),1)) 73 | 74 | def round_list(thelist,dec): 75 | """ 76 | extend np.round to list 77 | """ 78 | return [round(a,dec) for a in thelist] 79 | 80 | 81 | def normalize(data, centralized = False): 82 | """ 83 | normalize the simulated data 84 | data: len-n of [T*dx,T*da] 85 | Returns: data 86 | """ 87 | state, action = [a[0].copy() for a in data], [a[1].copy() for a in data] 88 | n = len(data) 89 | dx = state[0].shape[1] 90 | 91 | ### States 92 | for i in range(dx): 93 | s = np.array([a[:,i] for a in state]) 94 | mean, sd = np.mean(s), np.std(s) 95 | for j in range(n): 96 | if centralized: 97 | state[j][:,i] -= mean 98 | if sd != 0: 99 | state[j][:,i] = state[j][:,i] / sd 100 | 101 | ### Action: 102 | a = np.array(action) 103 | mean, sd = np.mean(a), np.std(a) 104 | # sd = 1 105 | # action = [ a / sd for a in action] 106 | 107 | ### Reward 108 | if len(data[0]) == 3: 109 | reward = [a[2] for a in data] 110 | a = np.array(reward) 111 | mean, sd = np.mean(a), np.std(a) 112 | if sd == 0: 113 | sd = 1 114 | if centralized: 115 | reward = [ (a - mean) / sd for a in reward] 116 | else: 117 | reward = [ a / sd for a in reward] 118 | return [[state[i],action[i],reward[i]] for i in range(n)] 119 | else: 120 | return [[state[i],action[i]] for i in range(n)] 121 | 122 | #%% utility funs 123 | 124 | 125 | def p_value(test_stat,sim_test_stats): 126 | """ 127 | one testing result (p-value), Bootstrap-based. 128 | 129 | Default: the larger, the significant 130 | Return: p-value 131 | """ 132 | return round(1 - sum(np.abs(test_stat) > np.abs(sim_test_stats)) / len(sim_test_stats),4) 133 | 134 | def rej_rate(p_values, alphas): 135 | rep_times = len(p_values) 136 | p_values = np.array(p_values) 137 | RRs = [] 138 | for alpha in alphas: 139 | RR = sum(p_values < alpha) / rep_times 140 | RRs.append(RR) 141 | print("Under alpha", alpha, "the rejection rate is:", RR) 142 | return RRs 143 | 144 | def rej_rate_quite(p_values,alphas,file = None): 145 | rep_times = len(p_values) 146 | p_values = np.array(p_values) 147 | 148 | RRs = [] 149 | for alpha in alphas: 150 | RR = sum(p_values < alpha) / rep_times 151 | RRs.append(RR) 152 | return RRs 153 | 154 | 155 | def rej_rate_quick(p): 156 | r = [] 157 | T = len(p) 158 | p = np.array(p) 159 | for i in [0.01,0.05,0.1]: 160 | r.append(np.sum( p < i) / T) 161 | return r 162 | 163 | def rej_rate_seq(results): 164 | """ 165 | Imput: a list (len = times) of [1,0] 166 | Output: [0.2,0.7] 167 | """ 168 | results = np.array(results) 169 | times = results.shape[0] 170 | return np.sum(results,0) / times 171 | 172 | 173 | 174 | def seq_rej_rate_mul_J(ps,alphas): 175 | """ 176 | ps: len-J_upper list of np.array(times * 2) 177 | Output: if always rej, then rej 178 | """ 179 | rej = [] 180 | for alpha in alphas: 181 | aa = [np.array(p) < alpha for p in ps] 182 | bb = np.sum(np.array(aa), 0) == len(ps) 183 | rate = np.round(np.mean(bb, 0),3) 184 | rej.append(rate) 185 | return rej 186 | 187 | 188 | #%% 189 | def truncateMDP(MDPs,T): 190 | data = [] 191 | l = len(MDPs[0]) 192 | for MDP in MDPs: 193 | if (MDP[0].shape[0]) >= T: 194 | data.append([MDP[i][:T] for i in range(l)]) 195 | return data 196 | 197 | 198 | def p_sd(T): 199 | r = [] 200 | for p_true in [0.01,0.05,0.1]: 201 | r.append(np.round(np.sqrt(p_true * (1 - p_true) / T),4)) 202 | return r 203 | 204 | def latex_ohio_one_T_sd_G_mul_j(a, file): 205 | for J in range(len(a)): 206 | print("J = ", J + 1, end = " " , file = file) 207 | aa = a[J] 208 | for alpha in range(3): 209 | print(aa[alpha][0],"& ", end = "", file = file) # max 210 | print("\n", file = file) 211 | 212 | def print_progress(i, N): 213 | if (i * 100 // N == 0): 214 | print("#", end = "", flush = True) -------------------------------------------------------------------------------- /test_func/.ipynb_checkpoints/_DGP_Ohio-checkpoint.py: -------------------------------------------------------------------------------- 1 | #%% packages 2 | 3 | ################################################################################################ 4 | from ._utility import * 5 | from ._utility_RL import * 6 | ################################################ OHIO ########################################## 7 | ################################################################################################ 8 | # the following parameters will not change with the LM fitting 9 | const = 39.03 10 | init_u_G = 162 11 | init_sd_G = 60 12 | p_D, u_D, sd_D = 0.17, 44.4, 35.5 13 | p_E, u_E, sd_E = 0.05, 4.9, 1.04 14 | p_A = [0.805, 0.084, 0.072, 0.029, 0.010] # new discritization 15 | range_a = [0, 1, 2, 3, 4] 16 | 17 | ########################################## 18 | # left to right: t-4, .. , t-1 19 | coefficients = [-0.008 , 0.106 , -0.481 , 1.171 , # glucose 20 | 0.008 , -0.004 , 0.08 , 0.23 , # diet 21 | 0.009 , -1.542 , 3.097 , -3.489 , # exercise 22 | -0.30402253, -2.02343638, -0.3310525 , -0.43941028] # action 23 | 24 | def Glucose2Reward(gl, definition = 1): 25 | # Q: too sensitive? 26 | low_gl = 80 27 | high_gl = 140 28 | return np.select([gl>=high_gl, gl<=low_gl, low_gl actions [N * 1] -> 1 * N 137 | actions[t, :] = A_t 138 | 139 | # collect rewards 140 | Values = est_values(obs, gamma = gamma, init_T = J_upper) 141 | return Values 142 | 143 | def est_values(obs, gamma = 0.9, init_T = 10): 144 | """ Tool to calculate culmulative rewards from observation (glucose histroy) 145 | Input: the observed trajectories (possibly based on the optimal policy) 146 | 3 * T * N 147 | Output: the collected culmulative rewards 148 | 149 | init_T: when the glucose becomes stable 150 | """ 151 | Values = [] 152 | N = obs.shape[2] 153 | T = obs.shape[1] 154 | for i in range(N): 155 | rewards = np.roll(apply_v(Glucose2Reward, obs[0, init_T:, i]), shift = -1).reshape(-1, 1) 156 | est_Value = np.round(cum_r(rewards, gamma), 3) 157 | Values.append(est_Value[0]) 158 | return Values 159 | 160 | 161 | def init_MDPs_real(T, N, sd_G, seed = 0): # version of new -> bad simu results 162 | """ 163 | Randomly initialize 164 | 1. G_t [0,..., T_true_lag]; 165 | 2. errors for G_t 166 | 3. when to take how many diets/exercises [matters?] 167 | Outputs: 168 | init G_t and its future erroes; all D_t and E_t 169 | """ 170 | rseed(seed); npseed(seed) 171 | true_lag = 4 172 | obs = np.zeros((3, T, N)) # [Gi, D, Ex] 173 | e_D = abs(rnorm(u_D, sd_D, T * N)) 174 | e_E = abs(rnorm(u_E, sd_E, T * N)) 175 | e_G = rnorm(0, sd_G, T * N).reshape((T, N)) 176 | 177 | obs[0, :true_lag, :] = rnorm(init_u_G, init_sd_G, true_lag * N).reshape(true_lag, N) 178 | obs[1, :, :] = (rbin(1, p_D, T * N) * e_D).reshape((T, N)) 179 | obs[2, :, :] = (rbin(1, p_E, T * N) * e_E).reshape((T, N)) 180 | 181 | return obs, e_G -------------------------------------------------------------------------------- /experiment_func/_DGP_Ohio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | #%% packages 5 | 6 | ################################################################################################ 7 | import os, sys 8 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 9 | 10 | sys.path.insert(0, package_path + "/test_func") 11 | from _core_test_fun import * 12 | from _utility_RL import * 13 | 14 | ################################################ OHIO ########################################## 15 | ################################################################################################ 16 | # the following parameters will not change with the LM fitting 17 | const = 39.03 18 | init_u_G = 162 19 | init_sd_G = 60 20 | p_D, u_D, sd_D = 0.17, 44.4, 35.5 21 | p_E, u_E, sd_E = 0.05, 4.9, 1.04 22 | p_A = [0.805, 0.084, 0.072, 0.029, 0.010] # new discritization 23 | range_a = [0, 1, 2, 3, 4] 24 | 25 | ########################################## 26 | # left to right: t-4, .. , t-1 27 | coefficients = [-0.008 , 0.106 , -0.481 , 1.171 , # glucose 28 | 0.008 , -0.004 , 0.08 , 0.23 , # diet 29 | 0.009 , -1.542 , 3.097 , -3.489 , # exercise 30 | -0.30402253, -2.02343638, -0.3310525 , -0.43941028] # action 31 | 32 | def Glucose2Reward(gl, definition = 1): 33 | # Q: too sensitive? 34 | low_gl = 80 35 | high_gl = 140 36 | return np.select([gl>=high_gl, gl<=low_gl, low_gl actions [N * 1] -> 1 * N 145 | actions[t, :] = A_t 146 | 147 | # collect rewards 148 | Values = est_values(obs, gamma = gamma, init_T = J_upper) 149 | return Values 150 | 151 | def est_values(obs, gamma = 0.9, init_T = 10): 152 | """ Tool to calculate culmulative rewards from observation (glucose histroy) 153 | Input: the observed trajectories (possibly based on the optimal policy) 154 | 3 * T * N 155 | Output: the collected culmulative rewards 156 | 157 | init_T: when the glucose becomes stable 158 | """ 159 | Values = [] 160 | N = obs.shape[2] 161 | T = obs.shape[1] 162 | for i in range(N): 163 | rewards = np.roll(apply_v(Glucose2Reward, obs[0, init_T:, i]), shift = -1).reshape(-1, 1) 164 | est_Value = np.round(cum_r(rewards, gamma), 3) 165 | Values.append(est_Value[0]) 166 | return Values 167 | 168 | 169 | def init_MDPs_real(T, N, sd_G, seed = 0): # version of new -> bad simu results 170 | """ 171 | Randomly initialize 172 | 1. G_t [0,..., T_true_lag]; 173 | 2. errors for G_t 174 | 3. when to take how many diets/exercises [matters?] 175 | Outputs: 176 | init G_t and its future erroes; all D_t and E_t 177 | """ 178 | rseed(seed); npseed(seed) 179 | true_lag = 4 180 | obs = np.zeros((3, T, N)) # [Gi, D, Ex] 181 | e_D = abs(rnorm(u_D, sd_D, T * N)) 182 | e_E = abs(rnorm(u_E, sd_E, T * N)) 183 | e_G = rnorm(0, sd_G, T * N).reshape((T, N)) 184 | 185 | obs[0, :true_lag, :] = rnorm(init_u_G, init_sd_G, true_lag * N).reshape(true_lag, N) 186 | obs[1, :, :] = (rbin(1, p_D, T * N) * e_D).reshape((T, N)) 187 | obs[2, :, :] = (rbin(1, p_E, T * N) * e_E).reshape((T, N)) 188 | 189 | return obs, e_G -------------------------------------------------------------------------------- /experiment_func/.ipynb_checkpoints/_DGP_Ohio-checkpoint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | #%% packages 5 | 6 | ################################################################################################ 7 | import os, sys 8 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 9 | 10 | sys.path.insert(0, package_path + "/test_func") 11 | from _core_test_fun import * 12 | from _utility_RL import * 13 | 14 | ################################################ OHIO ########################################## 15 | ################################################################################################ 16 | # the following parameters will not change with the LM fitting 17 | const = 39.03 18 | init_u_G = 162 19 | init_sd_G = 60 20 | p_D, u_D, sd_D = 0.17, 44.4, 35.5 21 | p_E, u_E, sd_E = 0.05, 4.9, 1.04 22 | p_A = [0.805, 0.084, 0.072, 0.029, 0.010] # new discritization 23 | range_a = [0, 1, 2, 3, 4] 24 | 25 | ########################################## 26 | # left to right: t-4, .. , t-1 27 | coefficients = [-0.008 , 0.106 , -0.481 , 1.171 , # glucose 28 | 0.008 , -0.004 , 0.08 , 0.23 , # diet 29 | 0.009 , -1.542 , 3.097 , -3.489 , # exercise 30 | -0.30402253, -2.02343638, -0.3310525 , -0.43941028] # action 31 | 32 | def Glucose2Reward(gl, definition = 1): 33 | # Q: too sensitive? 34 | low_gl = 80 35 | high_gl = 140 36 | return np.select([gl>=high_gl, gl<=low_gl, low_gl actions [N * 1] -> 1 * N 145 | actions[t, :] = A_t 146 | 147 | # collect rewards 148 | Values = est_values(obs, gamma = gamma, init_T = J_upper) 149 | return Values 150 | 151 | def est_values(obs, gamma = 0.9, init_T = 10): 152 | """ Tool to calculate culmulative rewards from observation (glucose histroy) 153 | Input: the observed trajectories (possibly based on the optimal policy) 154 | 3 * T * N 155 | Output: the collected culmulative rewards 156 | 157 | init_T: when the glucose becomes stable 158 | """ 159 | Values = [] 160 | N = obs.shape[2] 161 | T = obs.shape[1] 162 | for i in range(N): 163 | rewards = np.roll(apply_v(Glucose2Reward, obs[0, init_T:, i]), shift = -1).reshape(-1, 1) 164 | est_Value = np.round(cum_r(rewards, gamma), 3) 165 | Values.append(est_Value[0]) 166 | return Values 167 | 168 | 169 | def init_MDPs_real(T, N, sd_G, seed = 0): # version of new -> bad simu results 170 | """ 171 | Randomly initialize 172 | 1. G_t [0,..., T_true_lag]; 173 | 2. errors for G_t 174 | 3. when to take how many diets/exercises [matters?] 175 | Outputs: 176 | init G_t and its future erroes; all D_t and E_t 177 | """ 178 | rseed(seed); npseed(seed) 179 | true_lag = 4 180 | obs = np.zeros((3, T, N)) # [Gi, D, Ex] 181 | e_D = abs(rnorm(u_D, sd_D, T * N)) 182 | e_E = abs(rnorm(u_E, sd_E, T * N)) 183 | e_G = rnorm(0, sd_G, T * N).reshape((T, N)) 184 | 185 | obs[0, :true_lag, :] = rnorm(init_u_G, init_sd_G, true_lag * N).reshape(true_lag, N) 186 | obs[1, :, :] = (rbin(1, p_D, T * N) * e_D).reshape((T, N)) 187 | obs[2, :, :] = (rbin(1, p_E, T * N) * e_E).reshape((T, N)) 188 | 189 | return obs, e_G -------------------------------------------------------------------------------- /experiment_script/Ohio_simu_testing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os, sys 4 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 5 | 6 | sys.path.insert(0, package_path + "/test_func") 7 | from _core_test_fun import * 8 | 9 | sys.path.insert(0, package_path + "/experiment_func") 10 | from _DGP_Ohio import * 11 | 12 | os.environ["OMP_NUM_THREADS"] = "1" 13 | ##################################### 14 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 15 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 16 | # the difference with standard cross-validation is negligible and will not affect our findings. 17 | ##################################### 18 | 19 | def one_time(seed = 1, J = 1, J_upper = 10, 20 | N = 30, T = 2 * 24, B = 200, Q = 10, sd_G = 5, 21 | gamma_NFQ = 0.95, 22 | T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4, 23 | paras = "CV", n_trees = 200, 24 | first_T = 10, 25 | do_eval = True): 26 | ## generate data 27 | data = simu_Ohio(T, N, seed = seed, sd_G = sd_G) 28 | data = burn_in(data,first_T) 29 | T -= first_T 30 | # for value evaluation [we will use the original transition], 31 | # do not use normalized data[will not be dominated like testing] 32 | value_data = data 33 | testing_data = [a[:2] for a in normalize(data)] 34 | ## this one time is used to get paras 35 | if paras == "CV_once": 36 | return lam_est(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees) 37 | time = now() 38 | p_value = test(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees, print_time = False, method = "QRF") 39 | if seed % 100 == 0: 40 | print("** testing time:", now() - time, " for seed = ", seed,"**"); time = now() 41 | 42 | if do_eval: # for the currect J, get data, learn a function, and evaluate via simulations 43 | Learning_PatternSets = MDP2Trans(MDPs = value_data, J = J, action_in_states = True) 44 | Q_func = NFQ(PatternSets = Learning_PatternSets, gamma = gamma_NFQ, 45 | RF_paras = paras, n_trees = n_trees, threshold = thre_eval) 46 | J_values = eval_Ohio_policy(Q_func = Q_func, J_Q = J, J_upper = J_upper, 47 | T = T_eval, gamma = gamma_eval, N = N_eval, 48 | sd_G = sd_G, seed = 0) 49 | return [p_value, np.mean(J_values)] 50 | else: 51 | return p_value 52 | 53 | 54 | def one_setting_one_J(rep_times = 2, J = 1, J_upper = 5, 55 | N = 20, T = 2 * 24, B = 2, Q = 10, sd_G = 5, 56 | paras = "CV_once", n_trees = 20, 57 | init_seed = 0, do_eval = False, parallel = False, email = False): 58 | a = now() 59 | if paras == "CV_once": 60 | paras = one_time(seed = 0, J = J, J_upper = J_upper, 61 | N = N, T = T, B = B, Q = Q, 62 | sd_G = sd_G, 63 | paras = "CV_once", n_trees = n_trees, 64 | do_eval = do_eval) 65 | print("CV paras:",paras) 66 | 67 | def one_test(seed): 68 | return one_time(seed = seed, J = J, J_upper = J_upper, 69 | N = N, T = T, B = B, Q = Q, sd_G = sd_G, 70 | paras = paras, n_trees = n_trees, 71 | do_eval = do_eval) 72 | if parallel: 73 | if rep_times == 500 and do_eval: 74 | r = [] 75 | for i in range(5): # connection 76 | r_i = parmap(one_test, range(init_seed + i * 100, init_seed + (i + 1) * 100), parallel) 77 | print("the first", (i + 1) * 100, "reps in 500 reps Done: \n", 78 | rej_rate([a[0] for a in r_i], [.1,.05,.01]), 79 | "\n with time cost: \n", now() - a) 80 | r += r_i 81 | else: 82 | r = parmap(one_test, range(init_seed, init_seed + rep_times), parallel) 83 | else: 84 | r = rep_seeds_print(one_test,rep_times,init_seed) 85 | print("total testing time cost for one J:", np.round(now() - a,3),Dash) 86 | if do_eval: 87 | p_values = [a[0] for a in r] 88 | rej_rates = rej_rate(p_values, [.1,.05,.01]) 89 | values = [a[1] for a in r] 90 | if email: 91 | send_email("J = " + str(J) + "with testing results: \n" + str(rej_rates) + \ 92 | "\n and values: \n" + str([np.mean(values), np.std(values)])) 93 | return rej_rates, np.round(np.mean(values),4), np.round(np.std(values),4) 94 | else: 95 | rej_rates = rej_rate(r, [.1,.05,.01]) 96 | return rej_rates 97 | 98 | def one_setting_mul_J(rep_times = 50, N = 30, T = 24 * 2, B = 200, Q = 10, sd_G = 5, 99 | paras = "CV_once", n_trees = 200, 100 | init_seed = 0, 101 | file = None, email = False, J_low = 1, J_upper = 5, 102 | do_eval = True, parallel = False, print_every_J = False): 103 | J_rej_rates, J_values = [], [] 104 | true_lag = 4 105 | ## Prepare log 106 | setting = [N, T, sd_G] 107 | email_contents = "" 108 | email_setting = [rep_times, N, T, B, sd_G] 109 | email_str = "rep_times, N, T, B, sd_G" 110 | print(dash, "Setting running [N,T,sd_G]:", setting, dash, "\n") 111 | if file is not None: 112 | print(dash, "Setting running [N,T,sd_G]:", setting, dash, "\n", file = file) 113 | ## Testing and value results for each J, with one true_lag 114 | for J in range(J_low, J_upper + 1): 115 | a = now() 116 | rej_rates, mean_value, std_value = one_setting_one_J(rep_times = rep_times, J = J, J_upper = J_upper, 117 | N = N, T = T, B = B,Q=Q, sd_G = sd_G, 118 | paras = paras, n_trees = n_trees, 119 | init_seed = init_seed, 120 | do_eval = do_eval, parallel = parallel) 121 | 122 | #### Store results 123 | J_rej_rates.append(rej_rates) 124 | J_values.append([mean_value, std_value]) # sd_over_500(mean_over_10) 125 | 126 | #### Prepare log 127 | print_res = ' '.join(["\n", "Above: when true_lag = ",str(true_lag), 128 | "and we do J = ", str(J), "testing",str(is_null(true_lag = true_lag, J = J)), 129 | "[supremum-based, integration-based]", "\n The average and std of values: \n", 130 | str([mean_value, std_value])]) 131 | print(print_res) 132 | if file is not None: 133 | print(print_res, file = file) 134 | 135 | print_time = ' '.join(["Time cost:", str(np.round( (now() - a)/60,2)), "mins","\n",DASH]) 136 | print(print_time) 137 | if file is not None: 138 | print(print_time, file = file) 139 | 140 | log = "12_16, lag4 OLS AWS" + ", init_seed - " + str(init_seed) + "\n" + dash + "\n" 141 | email_this_J = email_str + "\n" + str(email_setting)+ '; J=' + str(J) + ' DONE!\n' \ 142 | +'alpha = [.1,.05,.01]' + ', [supremum-based, integration-based] \n' + str(rej_rates) + "\n" \ 143 | + str([mean_value, std_value]) + "\n" 144 | email_contents += dash + "\n" + email_this_J 145 | if print_every_J: 146 | print(J_rej_rates, DASH, J_values) 147 | ## Final printing out 148 | if email: 149 | send_email(log + email_contents) 150 | if file is not None: # print latex 151 | latex_ohio_one_T_sd_G_mul_j(J_rej_rates,file) 152 | 153 | return J_rej_rates, J_values 154 | 155 | print("Import DONE!") 156 | 157 | 158 | rr = [] 159 | for N in [10, 15, 20]: 160 | r = one_setting_mul_J(rep_times = 500, N = N, T = 7 * 8 * 24, sd_G = 3, 161 | paras = "CV_once", n_trees = 100, 162 | B = 100, init_seed = 0, 163 | J_low = 1, J_upper = 10, 164 | do_eval = False, parallel = n_cores, print_every_J = True) 165 | print(r) 166 | rr.append(r) 167 | print(rr) 168 | -------------------------------------------------------------------------------- /experiment_script/.ipynb_checkpoints/Ohio_simu_testing-checkpoint.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os, sys 4 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 5 | 6 | sys.path.insert(0, package_path + "/test_func") 7 | from _core_test_fun import * 8 | 9 | sys.path.insert(0, package_path + "/experiment_func") 10 | from _DGP_Ohio import * 11 | 12 | os.environ["OMP_NUM_THREADS"] = "1" 13 | ##################################### 14 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 15 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 16 | # the difference with standard cross-validation is negligible and will not affect our findings. 17 | ##################################### 18 | 19 | def one_time(seed = 1, J = 1, J_upper = 10, 20 | N = 30, T = 2 * 24, B = 200, Q = 10, sd_G = 5, 21 | gamma_NFQ = 0.95, 22 | T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4, 23 | paras = "CV", n_trees = 200, 24 | first_T = 10, 25 | do_eval = True): 26 | ## generate data 27 | data = simu_Ohio(T, N, seed = seed, sd_G = sd_G) 28 | data = burn_in(data,first_T) 29 | T -= first_T 30 | # for value evaluation [we will use the original transition], 31 | # do not use normalized data[will not be dominated like testing] 32 | value_data = data 33 | testing_data = [a[:2] for a in normalize(data)] 34 | ## this one time is used to get paras 35 | if paras == "CV_once": 36 | return lam_est(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees) 37 | time = now() 38 | p_value = test(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees, print_time = False, method = "QRF") 39 | if seed % 100 == 0: 40 | print("** testing time:", now() - time, " for seed = ", seed,"**"); time = now() 41 | 42 | if do_eval: # for the currect J, get data, learn a function, and evaluate via simulations 43 | Learning_PatternSets = MDP2Trans(MDPs = value_data, J = J, action_in_states = True) 44 | Q_func = NFQ(PatternSets = Learning_PatternSets, gamma = gamma_NFQ, 45 | RF_paras = paras, n_trees = n_trees, threshold = thre_eval) 46 | J_values = eval_Ohio_policy(Q_func = Q_func, J_Q = J, J_upper = J_upper, 47 | T = T_eval, gamma = gamma_eval, N = N_eval, 48 | sd_G = sd_G, seed = 0) 49 | return [p_value, np.mean(J_values)] 50 | else: 51 | return p_value 52 | 53 | 54 | def one_setting_one_J(rep_times = 2, J = 1, J_upper = 5, 55 | N = 20, T = 2 * 24, B = 2, Q = 10, sd_G = 5, 56 | paras = "CV_once", n_trees = 20, 57 | init_seed = 0, do_eval = False, parallel = False, email = False): 58 | a = now() 59 | if paras == "CV_once": 60 | paras = one_time(seed = 0, J = J, J_upper = J_upper, 61 | N = N, T = T, B = B, Q = Q, 62 | sd_G = sd_G, 63 | paras = "CV_once", n_trees = n_trees, 64 | do_eval = do_eval) 65 | print("CV paras:",paras) 66 | 67 | def one_test(seed): 68 | return one_time(seed = seed, J = J, J_upper = J_upper, 69 | N = N, T = T, B = B, Q = Q, sd_G = sd_G, 70 | paras = paras, n_trees = n_trees, 71 | do_eval = do_eval) 72 | if parallel: 73 | if rep_times == 500 and do_eval: 74 | r = [] 75 | for i in range(5): # connection 76 | r_i = parmap(one_test, range(init_seed + i * 100, init_seed + (i + 1) * 100), parallel) 77 | print("the first", (i + 1) * 100, "reps in 500 reps Done: \n", 78 | rej_rate([a[0] for a in r_i], [.1,.05,.01]), 79 | "\n with time cost: \n", now() - a) 80 | r += r_i 81 | else: 82 | r = parmap(one_test, range(init_seed, init_seed + rep_times), parallel) 83 | else: 84 | r = rep_seeds_print(one_test,rep_times,init_seed) 85 | print("total testing time cost for one J:", np.round(now() - a,3),Dash) 86 | if do_eval: 87 | p_values = [a[0] for a in r] 88 | rej_rates = rej_rate(p_values, [.1,.05,.01]) 89 | values = [a[1] for a in r] 90 | if email: 91 | send_email("J = " + str(J) + "with testing results: \n" + str(rej_rates) + \ 92 | "\n and values: \n" + str([np.mean(values), np.std(values)])) 93 | return rej_rates, np.round(np.mean(values),4), np.round(np.std(values),4) 94 | else: 95 | rej_rates = rej_rate(r, [.1,.05,.01]) 96 | return rej_rates 97 | 98 | def one_setting_mul_J(rep_times = 50, N = 30, T = 24 * 2, B = 200, Q = 10, sd_G = 5, 99 | paras = "CV_once", n_trees = 200, 100 | init_seed = 0, 101 | file = None, email = False, J_low = 1, J_upper = 5, 102 | do_eval = True, parallel = False, print_every_J = False): 103 | J_rej_rates, J_values = [], [] 104 | true_lag = 4 105 | ## Prepare log 106 | setting = [N, T, sd_G] 107 | email_contents = "" 108 | email_setting = [rep_times, N, T, B, sd_G] 109 | email_str = "rep_times, N, T, B, sd_G" 110 | print(dash, "Setting running [N,T,sd_G]:", setting, dash, "\n") 111 | if file is not None: 112 | print(dash, "Setting running [N,T,sd_G]:", setting, dash, "\n", file = file) 113 | ## Testing and value results for each J, with one true_lag 114 | for J in range(J_low, J_upper + 1): 115 | a = now() 116 | rej_rates, mean_value, std_value = one_setting_one_J(rep_times = rep_times, J = J, J_upper = J_upper, 117 | N = N, T = T, B = B,Q=Q, sd_G = sd_G, 118 | paras = paras, n_trees = n_trees, 119 | init_seed = init_seed, 120 | do_eval = do_eval, parallel = parallel) 121 | 122 | #### Store results 123 | J_rej_rates.append(rej_rates) 124 | J_values.append([mean_value, std_value]) # sd_over_500(mean_over_10) 125 | 126 | #### Prepare log 127 | print_res = ' '.join(["\n", "Above: when true_lag = ",str(true_lag), 128 | "and we do J = ", str(J), "testing",str(is_null(true_lag = true_lag, J = J)), 129 | "[supremum-based, integration-based]", "\n The average and std of values: \n", 130 | str([mean_value, std_value])]) 131 | print(print_res) 132 | if file is not None: 133 | print(print_res, file = file) 134 | 135 | print_time = ' '.join(["Time cost:", str(np.round( (now() - a)/60,2)), "mins","\n",DASH]) 136 | print(print_time) 137 | if file is not None: 138 | print(print_time, file = file) 139 | 140 | log = "12_16, lag4 OLS AWS" + ", init_seed - " + str(init_seed) + "\n" + dash + "\n" 141 | email_this_J = email_str + "\n" + str(email_setting)+ '; J=' + str(J) + ' DONE!\n' \ 142 | +'alpha = [.1,.05,.01]' + ', [supremum-based, integration-based] \n' + str(rej_rates) + "\n" \ 143 | + str([mean_value, std_value]) + "\n" 144 | email_contents += dash + "\n" + email_this_J 145 | if print_every_J: 146 | print(J_rej_rates, DASH, J_values) 147 | ## Final printing out 148 | if email: 149 | send_email(log + email_contents) 150 | if file is not None: # print latex 151 | latex_ohio_one_T_sd_G_mul_j(J_rej_rates,file) 152 | 153 | return J_rej_rates, J_values 154 | 155 | print("Import DONE!") 156 | 157 | 158 | rr = [] 159 | for N in [10, 15, 20]: 160 | r = one_setting_mul_J(rep_times = 500, N = N, T = 7 * 8 * 24, sd_G = 3, 161 | paras = "CV_once", n_trees = 100, 162 | B = 100, init_seed = 0, 163 | J_low = 1, J_upper = 10, 164 | do_eval = False, parallel = n_cores, print_every_J = True) 165 | print(r) 166 | rr.append(r) 167 | print(rr) 168 | -------------------------------------------------------------------------------- /test_func/.ipynb_checkpoints/_utility_RL-checkpoint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | """ 5 | ########################################################################## 6 | from ._utility import * 7 | from ._uti_basic import * 8 | ########################################################################## 9 | param_grid = {'max_depth': [2, 6, 10], 'min_samples_leaf': [5, 10, 20]} 10 | n_jobs = multiprocessing.cpu_count() 11 | ########################################################################## 12 | def change_rate(y_old, y_new): 13 | return norm(y_old - y_new)**2 / norm(y_old)**2 14 | 15 | def flatten(l): 16 | # list of sublist -> list 17 | return [item for sublist in l for item in sublist] 18 | 19 | def cum_r(rewards, gamma): 20 | """ rewards -> culmulative reward 21 | """ 22 | return sum( 23 | map(operator.mul, [gamma ** j for j in range(len(rewards))], rewards)) 24 | cum_rewards = cum_r 25 | ########################################################################## 26 | 27 | #%% Prepare training data for the Fitted-Q: based on state (mul-J) transition and observed rewards 28 | 29 | def ObsAct2State(obs, actions, t, J, multiple_N = False): 30 | """ Based on our discussion on 12/03, to form a lag-J states from history obs and A 31 | For RL purpose. The testing part is clear. 32 | To make A_t, we need to define S_t, which is (with lag-J) (e.g., when lag-1, S_t+1 only depneds on X_t and A_t): 33 | O_(t-J + 1), A_(t - J+1), ..., O_t 34 | """ 35 | if not multiple_N: 36 | if J == 1: 37 | s = obs[t, :].ravel(order='C') 38 | else: 39 | s = np.hstack([obs[(t - J + 1): t, :], actions[(t - J + 1):t]]).ravel(order='C') 40 | s = np.append(s, obs[t, :].ravel()) 41 | return s 42 | else: # obs: 3 * T * N 43 | N = obs.shape[2] 44 | dim_obs = 3 45 | if J == 1: 46 | s = obs[:, t, :] 47 | else: # target: (4 * J_Q - 1) * N 48 | s = np.vstack(([ 49 | obs[:, (t - J + 1 ):t, :], 50 | actions[(t - J + 1):t, :].reshape((1, J - 1, N))])) # extend_dim for first one 51 | s = s.reshape(((dim_obs + 1) * (J - 1), N), order = 'F') 52 | obs_0 = obs[:, t, :] # 3 * N 53 | s = np.vstack([s, obs_0]) 54 | return s # dim * N 55 | 56 | 57 | 58 | def MDP2Trans(MDPs, J, action_in_states = False, combined = True): 59 | """ 60 | Input: a list (len-N) of trajectory [state matrix [T * 3], actions, rewards] - I need to modify evaluate.ipynb 61 | Output: a list of (s,a,s',u) (combined together) 62 | """ 63 | def MDP2Trans_one_traj(i): 64 | obs, actions, utilities = MDPs[i] 65 | T = obs.shape[0] 66 | result = [] 67 | for t in range(J - 1, T - 1): 68 | s = ObsAct2State(obs, actions, t, J) 69 | ss = ObsAct2State(obs, actions, t + 1, J) 70 | 71 | a = actions[t] 72 | u = utilities[t] 73 | result.append([s, a, ss, u]) 74 | return result 75 | r = rep_seeds(MDP2Trans_one_traj, len(MDPs) - 1) 76 | if combined: 77 | return flatten(r) # put every patient together; not into a metrix 78 | else: 79 | return r 80 | 81 | ########################################################################## 82 | """ Fitted Q 83 | 1. fit (x,a) -> q(x,a) 84 | 2. update q(a,x) = max_{a'}(q(a',x')) + gamma * r # (x',r) is observed 85 | """ 86 | ########################################################################## 87 | # %% Main functions for Fitted-Q 88 | def NFQ(PatternSets, gamma, RF_paras = [3,20], n_trees = 200, threshold = 1e-5, initialize = "mine"): 89 | """ Learn optimal Q function from batch data (RF + fitted-Q) 90 | Input: a list of (s,a,s',u) 91 | Output: Q function 92 | """ 93 | rseed(0); npseed(0) 94 | ### Preparing training data 95 | s, a, ss, r = [np.array([a[i] for a in PatternSets]) for i in range(4)] 96 | a = np.array([a[1] for a in PatternSets]).reshape((-1, 1)) 97 | range_a = np.unique(a) 98 | x_train = np.hstack((s, a)) 99 | 100 | ### Initialization 101 | init_y = r * (1 / (1 - gamma)) # based on the series result 102 | is_CV = False 103 | if RF_paras == "CV": 104 | rseed(0); npseed(0) 105 | is_CV = True 106 | rfqr = RF(random_state = 0, n_estimators = n_trees) 107 | gd = GridSearchCV(estimator = rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0) 108 | gd.fit(x_train, init_y.ravel()) 109 | RF_paras = gd.best_params_ 110 | RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']] 111 | 112 | rseed(0); npseed(0) 113 | max_depth, min_samples_leaf = RF_paras 114 | Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf = 115 | min_samples_leaf, n_jobs = n_jobs, 116 | verbose = 0) 117 | Q.fit(x_train, init_y.ravel()) 118 | 119 | ### Iterations 120 | y_old = init_y.copy() 121 | # update the estimated Q 122 | rep, epsilon = 0, 100 123 | while(epsilon > threshold and rep < 100): # 200 before 124 | rseed(0); npseed(0) 125 | y_train = UpdatedValues(ss, range_a, r, Q, gamma) 126 | epsilon = change_rate( y_old = y_old, y_new = y_train) 127 | Q.fit(x_train, y_train.ravel()) 128 | y_old = y_train.copy() 129 | rep += 1 130 | return Q 131 | 132 | 133 | def UpdatedValues(ss, range_a, r, Q, gamma): 134 | """ Update the estimated optimal v(s,a) with the fitted Q function 135 | Input: 136 | PatternSets = a list of (s,a,s',r), Q 137 | ss0, ss1: (s', 0), (s', 1) --- just for lasy 138 | r: observed rewards 139 | Q: for values at next states 140 | Output: ((s,a),v), where v = r + gamma * max_a' Q(s',a'); 0/1 action in this example. 141 | """ 142 | v_as = [] 143 | N = ss.shape[0] 144 | for a in range_a: 145 | ss_a = np.hstack((ss, np.ones((N, 1)) * a )) 146 | v_a = Q.predict(ss_a) 147 | v_as.append(v_a.reshape(N, 1)) 148 | v_max = np.amax(np.hstack(v_as), 1) 149 | Q_new = r.reshape(N, 1) + gamma * v_max.reshape(N, 1) 150 | return Q_new 151 | 152 | 153 | def Estpolicy(Q_func, range_a): 154 | """ Q function to Policy 155 | Input: 156 | Q-function and the range of available actions 157 | Output: 158 | The optimal action policy (discrete) at this state [given a state, output an action] 159 | """ 160 | def policy(s, debug = 0): 161 | """ 162 | Input: s [N * dx] 163 | Output: actions [N * 1] 164 | """ 165 | rseed(0); npseed(0) 166 | N = s.shape[0] 167 | v_as = [] 168 | for a in range_a: 169 | s_a = np.hstack([s,np.repeat(a, N).reshape(-1,1)]) 170 | v_a = Q_func.predict(s_a) 171 | v_as.append(v_a.reshape(-1, 1)) 172 | v_as = np.round(np.hstack(v_as), 4) 173 | actions = np.array([range_a[i] for i in np.argmax(v_as, 1)]).reshape(-1, 1) 174 | if debug == 1: 175 | print(v_as - v_as[:,1].reshape(-1,1), DASH, actions) 176 | return actions 177 | 178 | return policy 179 | 180 | ########################################################################## 181 | ########################################################################## 182 | def UpdatedValues_eval(ss, policy, J, r, Q, gamma): 183 | """ Version of 1-step forward in Evaluations 184 | """ 185 | dx = ss.shape[1] 186 | sss = ss[:,(dx - (4 * J - 1)):dx] 187 | As = policy(sss) 188 | sa = np.hstack([ss,As]) 189 | return gamma * Q.predict(sa).reshape(-1,1) + r.reshape(-1,1) 190 | 191 | def FQE(PatternSets, Q_func, J, gamma = 0.9, RF_paras = [3, 20], n_trees = 200, 192 | threshold = 1e-4): 193 | """ 194 | Fitted-Q Evaluation for off-policy evaluation (OPE) in REAL DATA 195 | 196 | 1. fit RF q: (x,a) -> value 197 | 2. update the value function of policy: 198 | q_policy(x, a) = gamma * q(x', policy(x'[, (dx - J): dx])) + r 199 | 200 | 3. q_policy(x, x[, (dx - J): dx]) 201 | 202 | Input: 203 | PatternSets: a list of (s, a, s', u) [have been transformed] 204 | 205 | Output: V function 206 | 207 | """ 208 | rseed(0); npseed(0) 209 | 210 | # Preparing training data 211 | s_bef, a_bef, ss_bef, r_bef = [np.array([a[i] for a in PatternSets]) for i in range(4)] 212 | a_bef = a_bef.reshape(-1, 1) 213 | range_a = np.unique(a_bef) 214 | 215 | policy = Estpolicy(Q_func, range_a) 216 | time = now() 217 | 218 | dx = s_bef.shape[1] 219 | s1 = s_bef[:,(dx - (4 * J - 1)):dx].copy() 220 | As = policy(s1) 221 | selected = (As == a_bef) 222 | 223 | s2, a2, ss2, r2 = [], [], [], [] 224 | for i in range(s_bef.shape[0]): 225 | if selected[i, 0]: 226 | s2.append(s_bef[i,]) 227 | a2.append(a_bef[i,]) 228 | ss2.append(ss_bef[i,]) 229 | r2.append(r_bef[i,]) 230 | s, a, ss, r = np.vstack(s2).copy(), np.vstack(a2).copy(), np.vstack(ss2).copy(), np.vstack(r2).copy() 231 | 232 | 233 | ### Initialization 234 | x_train = np.hstack((s, a)) 235 | init_y = r * (1 / (1 - gamma)) 236 | if RF_paras == "CV": 237 | rseed(0); npseed(0) 238 | rfqr = RF(random_state = 0, n_estimators = n_trees) 239 | gd = GridSearchCV(estimator=rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0) 240 | gd.fit(x_train, init_y.ravel()) 241 | RF_paras = gd.best_params_ 242 | RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']] 243 | 244 | max_depth, min_samples_leaf = RF_paras 245 | rseed(0); npseed(0) 246 | Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf = 247 | min_samples_leaf, n_jobs = n_jobs, verbose = 0) 248 | Q.fit(x_train, init_y.ravel()) 249 | 250 | y_old = init_y.copy() 251 | # evaluate the policy policy 252 | rep, epsilon = 0, 100 253 | while(epsilon > threshold and rep < 100): 254 | rseed(0); npseed(0) 255 | y_train = UpdatedValues_eval(ss, policy, J, r, Q, gamma) # too slow [?] 256 | y_train = np.round(y_train, 6) 257 | epsilon = change_rate( y_old = y_old, y_new = y_train) 258 | Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf = 259 | min_samples_leaf, n_jobs = n_jobs, verbose = 0) 260 | Q.fit(x_train, y_train.ravel()) # regression function: (s,a) -> v 261 | 262 | y_old = y_train.copy() 263 | rep += 1 264 | 265 | def V_func(s): 266 | dx = s.shape[1] 267 | a = policy(s[:,(dx - (4 * J - 1)):dx]).reshape(-1,1) 268 | return Q.predict(np.hstack([s,a])) 269 | 270 | return V_func 271 | 272 | 273 | 274 | -------------------------------------------------------------------------------- /experiment_func/_utility_RL.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ########################################################################## 4 | import os, sys 5 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 6 | 7 | sys.path.insert(0, package_path + "/test_func") 8 | from _core_test_fun import * 9 | ########################################################################## 10 | param_grid = {'max_depth': [2, 6, 10], 'min_samples_leaf': [5, 10, 20]} 11 | n_jobs = multiprocessing.cpu_count() 12 | ########################################################################## 13 | def change_rate(y_old, y_new): 14 | return norm(y_old - y_new)**2 / norm(y_old)**2 15 | 16 | def flatten(l): 17 | # list of sublist -> list 18 | return [item for sublist in l for item in sublist] 19 | 20 | def cum_r(rewards, gamma): 21 | """ rewards -> culmulative reward 22 | """ 23 | return sum( 24 | map(operator.mul, [gamma ** j for j in range(len(rewards))], rewards)) 25 | cum_rewards = cum_r 26 | ########################################################################## 27 | 28 | #%% Prepare training data for the Fitted-Q: based on state (mul-J) transition and observed rewards 29 | 30 | def ObsAct2State(obs, actions, t, J, multiple_N = False): 31 | """ Based on our discussion on 12/03, to form a lag-J states from history obs and A 32 | For RL purpose. The testing part is clear. 33 | To make A_t, we need to define S_t, which is (with lag-J) (e.g., when lag-1, S_t+1 only depneds on X_t and A_t): 34 | O_(t-J + 1), A_(t - J+1), ..., O_t 35 | """ 36 | if not multiple_N: 37 | if J == 1: 38 | s = obs[t, :].ravel(order='C') 39 | else: 40 | s = np.hstack([obs[(t - J + 1): t, :], actions[(t - J + 1):t]]).ravel(order='C') 41 | s = np.append(s, obs[t, :].ravel()) 42 | return s 43 | else: # obs: 3 * T * N 44 | N = obs.shape[2] 45 | dim_obs = 3 46 | if J == 1: 47 | s = obs[:, t, :] 48 | else: # target: (4 * J_Q - 1) * N 49 | s = np.vstack(([ 50 | obs[:, (t - J + 1 ):t, :], 51 | actions[(t - J + 1):t, :].reshape((1, J - 1, N))])) # extend_dim for first one 52 | s = s.reshape(((dim_obs + 1) * (J - 1), N), order = 'F') 53 | obs_0 = obs[:, t, :] # 3 * N 54 | s = np.vstack([s, obs_0]) 55 | return s # dim * N 56 | 57 | 58 | 59 | def MDP2Trans(MDPs, J, action_in_states = False, combined = True): 60 | """ 61 | Input: a list (len-N) of trajectory [state matrix [T * 3], actions, rewards] - I need to modify evaluate.ipynb 62 | Output: a list of (s,a,s',u) (combined together) 63 | """ 64 | def MDP2Trans_one_traj(i): 65 | obs, actions, utilities = MDPs[i] 66 | T = obs.shape[0] 67 | result = [] 68 | for t in range(J - 1, T - 1): 69 | s = ObsAct2State(obs, actions, t, J) 70 | ss = ObsAct2State(obs, actions, t + 1, J) 71 | 72 | a = actions[t] 73 | u = utilities[t] 74 | result.append([s, a, ss, u]) 75 | return result 76 | r = rep_seeds(MDP2Trans_one_traj, len(MDPs) - 1) 77 | if combined: 78 | return flatten(r) # put every patient together; not into a metrix 79 | else: 80 | return r 81 | 82 | ########################################################################## 83 | """ Fitted Q 84 | 1. fit (x,a) -> q(x,a) 85 | 2. update q(a,x) = max_{a'}(q(a',x')) + gamma * r # (x',r) is observed 86 | """ 87 | ########################################################################## 88 | # %% Main functions for Fitted-Q 89 | def NFQ(PatternSets, gamma, RF_paras = [3,20], n_trees = 200, threshold = 1e-5, initialize = "mine"): 90 | """ Learn optimal Q function from batch data (RF + fitted-Q) 91 | Input: a list of (s,a,s',u) 92 | Output: Q function 93 | """ 94 | rseed(0); npseed(0) 95 | ### Preparing training data 96 | s, a, ss, r = [np.array([a[i] for a in PatternSets]) for i in range(4)] 97 | a = np.array([a[1] for a in PatternSets]).reshape((-1, 1)) 98 | range_a = np.unique(a) 99 | x_train = np.hstack((s, a)) 100 | 101 | ### Initialization 102 | init_y = r * (1 / (1 - gamma)) # based on the series result 103 | is_CV = False 104 | if RF_paras == "CV": 105 | rseed(0); npseed(0) 106 | is_CV = True 107 | rfqr = RF(random_state = 0, n_estimators = n_trees) 108 | gd = GridSearchCV(estimator = rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0) 109 | gd.fit(x_train, init_y.ravel()) 110 | RF_paras = gd.best_params_ 111 | RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']] 112 | 113 | rseed(0); npseed(0) 114 | max_depth, min_samples_leaf = RF_paras 115 | Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf = 116 | min_samples_leaf, n_jobs = n_jobs, 117 | verbose = 0) 118 | Q.fit(x_train, init_y.ravel()) 119 | 120 | ### Iterations 121 | y_old = init_y.copy() 122 | # update the estimated Q 123 | rep, epsilon = 0, 100 124 | while(epsilon > threshold and rep < 100): # 200 before 125 | rseed(0); npseed(0) 126 | y_train = UpdatedValues(ss, range_a, r, Q, gamma) 127 | epsilon = change_rate( y_old = y_old, y_new = y_train) 128 | Q.fit(x_train, y_train.ravel()) 129 | y_old = y_train.copy() 130 | rep += 1 131 | return Q 132 | 133 | 134 | def UpdatedValues(ss, range_a, r, Q, gamma): 135 | """ Update the estimated optimal v(s,a) with the fitted Q function 136 | Input: 137 | PatternSets = a list of (s,a,s',r), Q 138 | ss0, ss1: (s', 0), (s', 1) --- just for lasy 139 | r: observed rewards 140 | Q: for values at next states 141 | Output: ((s,a),v), where v = r + gamma * max_a' Q(s',a'); 0/1 action in this example. 142 | """ 143 | v_as = [] 144 | N = ss.shape[0] 145 | for a in range_a: 146 | ss_a = np.hstack((ss, np.ones((N, 1)) * a )) 147 | v_a = Q.predict(ss_a) 148 | v_as.append(v_a.reshape(N, 1)) 149 | v_max = np.amax(np.hstack(v_as), 1) 150 | Q_new = r.reshape(N, 1) + gamma * v_max.reshape(N, 1) 151 | return Q_new 152 | 153 | 154 | def Estpolicy(Q_func, range_a): 155 | """ Q function to Policy 156 | Input: 157 | Q-function and the range of available actions 158 | Output: 159 | The optimal action policy (discrete) at this state [given a state, output an action] 160 | """ 161 | def policy(s, debug = 0): 162 | """ 163 | Input: s [N * dx] 164 | Output: actions [N * 1] 165 | """ 166 | rseed(0); npseed(0) 167 | N = s.shape[0] 168 | v_as = [] 169 | for a in range_a: 170 | s_a = np.hstack([s,np.repeat(a, N).reshape(-1,1)]) 171 | v_a = Q_func.predict(s_a) 172 | v_as.append(v_a.reshape(-1, 1)) 173 | v_as = np.round(np.hstack(v_as), 4) 174 | actions = np.array([range_a[i] for i in np.argmax(v_as, 1)]).reshape(-1, 1) 175 | if debug == 1: 176 | print(v_as - v_as[:,1].reshape(-1,1), DASH, actions) 177 | return actions 178 | 179 | return policy 180 | 181 | ########################################################################## 182 | ########################################################################## 183 | def UpdatedValues_eval(ss, policy, J, r, Q, gamma): 184 | """ Version of 1-step forward in Evaluations 185 | """ 186 | dx = ss.shape[1] 187 | sss = ss[:,(dx - (4 * J - 1)):dx] 188 | As = policy(sss) 189 | sa = np.hstack([ss,As]) 190 | return gamma * Q.predict(sa).reshape(-1,1) + r.reshape(-1,1) 191 | 192 | def FQE(PatternSets, Q_func, J, gamma = 0.9, RF_paras = [3, 20], n_trees = 200, 193 | threshold = 1e-4): 194 | """ 195 | Fitted-Q Evaluation for off-policy evaluation (OPE) in REAL DATA 196 | 197 | 1. fit RF q: (x,a) -> value 198 | 2. update the value function of policy: 199 | q_policy(x, a) = gamma * q(x', policy(x'[, (dx - J): dx])) + r 200 | 201 | 3. q_policy(x, x[, (dx - J): dx]) 202 | 203 | Input: 204 | PatternSets: a list of (s, a, s', u) [have been transformed] 205 | 206 | Output: V function 207 | 208 | """ 209 | rseed(0); npseed(0) 210 | 211 | # Preparing training data 212 | s_bef, a_bef, ss_bef, r_bef = [np.array([a[i] for a in PatternSets]) for i in range(4)] 213 | a_bef = a_bef.reshape(-1, 1) 214 | range_a = np.unique(a_bef) 215 | 216 | policy = Estpolicy(Q_func, range_a) 217 | time = now() 218 | 219 | dx = s_bef.shape[1] 220 | s1 = s_bef[:,(dx - (4 * J - 1)):dx].copy() 221 | As = policy(s1) 222 | selected = (As == a_bef) 223 | 224 | s2, a2, ss2, r2 = [], [], [], [] 225 | for i in range(s_bef.shape[0]): 226 | if selected[i, 0]: 227 | s2.append(s_bef[i,]) 228 | a2.append(a_bef[i,]) 229 | ss2.append(ss_bef[i,]) 230 | r2.append(r_bef[i,]) 231 | s, a, ss, r = np.vstack(s2).copy(), np.vstack(a2).copy(), np.vstack(ss2).copy(), np.vstack(r2).copy() 232 | 233 | 234 | ### Initialization 235 | x_train = np.hstack((s, a)) 236 | init_y = r * (1 / (1 - gamma)) 237 | if RF_paras == "CV": 238 | rseed(0); npseed(0) 239 | rfqr = RF(random_state = 0, n_estimators = n_trees) 240 | gd = GridSearchCV(estimator=rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0) 241 | gd.fit(x_train, init_y.ravel()) 242 | RF_paras = gd.best_params_ 243 | RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']] 244 | 245 | max_depth, min_samples_leaf = RF_paras 246 | rseed(0); npseed(0) 247 | Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf = 248 | min_samples_leaf, n_jobs = n_jobs, verbose = 0) 249 | Q.fit(x_train, init_y.ravel()) 250 | 251 | y_old = init_y.copy() 252 | # evaluate the policy policy 253 | rep, epsilon = 0, 100 254 | while(epsilon > threshold and rep < 100): 255 | rseed(0); npseed(0) 256 | y_train = UpdatedValues_eval(ss, policy, J, r, Q, gamma) # too slow [?] 257 | y_train = np.round(y_train, 6) 258 | epsilon = change_rate( y_old = y_old, y_new = y_train) 259 | Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf = 260 | min_samples_leaf, n_jobs = n_jobs, verbose = 0) 261 | Q.fit(x_train, y_train.ravel()) # regression function: (s,a) -> v 262 | 263 | y_old = y_train.copy() 264 | rep += 1 265 | 266 | def V_func(s): 267 | dx = s.shape[1] 268 | a = policy(s[:,(dx - (4 * J - 1)):dx]).reshape(-1,1) 269 | return Q.predict(np.hstack([s,a])) 270 | 271 | return V_func 272 | 273 | 274 | 275 | -------------------------------------------------------------------------------- /experiment_func/.ipynb_checkpoints/_utility_RL-checkpoint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | ########################################################################## 4 | import os, sys 5 | package_path = os.path.dirname(os.path.abspath(os.getcwd())) 6 | 7 | sys.path.insert(0, package_path + "/test_func") 8 | from _core_test_fun import * 9 | ########################################################################## 10 | param_grid = {'max_depth': [2, 6, 10], 'min_samples_leaf': [5, 10, 20]} 11 | n_jobs = multiprocessing.cpu_count() 12 | ########################################################################## 13 | def change_rate(y_old, y_new): 14 | return norm(y_old - y_new)**2 / norm(y_old)**2 15 | 16 | def flatten(l): 17 | # list of sublist -> list 18 | return [item for sublist in l for item in sublist] 19 | 20 | def cum_r(rewards, gamma): 21 | """ rewards -> culmulative reward 22 | """ 23 | return sum( 24 | map(operator.mul, [gamma ** j for j in range(len(rewards))], rewards)) 25 | cum_rewards = cum_r 26 | ########################################################################## 27 | 28 | #%% Prepare training data for the Fitted-Q: based on state (mul-J) transition and observed rewards 29 | 30 | def ObsAct2State(obs, actions, t, J, multiple_N = False): 31 | """ Based on our discussion on 12/03, to form a lag-J states from history obs and A 32 | For RL purpose. The testing part is clear. 33 | To make A_t, we need to define S_t, which is (with lag-J) (e.g., when lag-1, S_t+1 only depneds on X_t and A_t): 34 | O_(t-J + 1), A_(t - J+1), ..., O_t 35 | """ 36 | if not multiple_N: 37 | if J == 1: 38 | s = obs[t, :].ravel(order='C') 39 | else: 40 | s = np.hstack([obs[(t - J + 1): t, :], actions[(t - J + 1):t]]).ravel(order='C') 41 | s = np.append(s, obs[t, :].ravel()) 42 | return s 43 | else: # obs: 3 * T * N 44 | N = obs.shape[2] 45 | dim_obs = 3 46 | if J == 1: 47 | s = obs[:, t, :] 48 | else: # target: (4 * J_Q - 1) * N 49 | s = np.vstack(([ 50 | obs[:, (t - J + 1 ):t, :], 51 | actions[(t - J + 1):t, :].reshape((1, J - 1, N))])) # extend_dim for first one 52 | s = s.reshape(((dim_obs + 1) * (J - 1), N), order = 'F') 53 | obs_0 = obs[:, t, :] # 3 * N 54 | s = np.vstack([s, obs_0]) 55 | return s # dim * N 56 | 57 | 58 | 59 | def MDP2Trans(MDPs, J, action_in_states = False, combined = True): 60 | """ 61 | Input: a list (len-N) of trajectory [state matrix [T * 3], actions, rewards] - I need to modify evaluate.ipynb 62 | Output: a list of (s,a,s',u) (combined together) 63 | """ 64 | def MDP2Trans_one_traj(i): 65 | obs, actions, utilities = MDPs[i] 66 | T = obs.shape[0] 67 | result = [] 68 | for t in range(J - 1, T - 1): 69 | s = ObsAct2State(obs, actions, t, J) 70 | ss = ObsAct2State(obs, actions, t + 1, J) 71 | 72 | a = actions[t] 73 | u = utilities[t] 74 | result.append([s, a, ss, u]) 75 | return result 76 | r = rep_seeds(MDP2Trans_one_traj, len(MDPs) - 1) 77 | if combined: 78 | return flatten(r) # put every patient together; not into a metrix 79 | else: 80 | return r 81 | 82 | ########################################################################## 83 | """ Fitted Q 84 | 1. fit (x,a) -> q(x,a) 85 | 2. update q(a,x) = max_{a'}(q(a',x')) + gamma * r # (x',r) is observed 86 | """ 87 | ########################################################################## 88 | # %% Main functions for Fitted-Q 89 | def NFQ(PatternSets, gamma, RF_paras = [3,20], n_trees = 200, threshold = 1e-5, initialize = "mine"): 90 | """ Learn optimal Q function from batch data (RF + fitted-Q) 91 | Input: a list of (s,a,s',u) 92 | Output: Q function 93 | """ 94 | rseed(0); npseed(0) 95 | ### Preparing training data 96 | s, a, ss, r = [np.array([a[i] for a in PatternSets]) for i in range(4)] 97 | a = np.array([a[1] for a in PatternSets]).reshape((-1, 1)) 98 | range_a = np.unique(a) 99 | x_train = np.hstack((s, a)) 100 | 101 | ### Initialization 102 | init_y = r * (1 / (1 - gamma)) # based on the series result 103 | is_CV = False 104 | if RF_paras == "CV": 105 | rseed(0); npseed(0) 106 | is_CV = True 107 | rfqr = RF(random_state = 0, n_estimators = n_trees) 108 | gd = GridSearchCV(estimator = rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0) 109 | gd.fit(x_train, init_y.ravel()) 110 | RF_paras = gd.best_params_ 111 | RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']] 112 | 113 | rseed(0); npseed(0) 114 | max_depth, min_samples_leaf = RF_paras 115 | Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf = 116 | min_samples_leaf, n_jobs = n_jobs, 117 | verbose = 0) 118 | Q.fit(x_train, init_y.ravel()) 119 | 120 | ### Iterations 121 | y_old = init_y.copy() 122 | # update the estimated Q 123 | rep, epsilon = 0, 100 124 | while(epsilon > threshold and rep < 100): # 200 before 125 | rseed(0); npseed(0) 126 | y_train = UpdatedValues(ss, range_a, r, Q, gamma) 127 | epsilon = change_rate( y_old = y_old, y_new = y_train) 128 | Q.fit(x_train, y_train.ravel()) 129 | y_old = y_train.copy() 130 | rep += 1 131 | return Q 132 | 133 | 134 | def UpdatedValues(ss, range_a, r, Q, gamma): 135 | """ Update the estimated optimal v(s,a) with the fitted Q function 136 | Input: 137 | PatternSets = a list of (s,a,s',r), Q 138 | ss0, ss1: (s', 0), (s', 1) --- just for lasy 139 | r: observed rewards 140 | Q: for values at next states 141 | Output: ((s,a),v), where v = r + gamma * max_a' Q(s',a'); 0/1 action in this example. 142 | """ 143 | v_as = [] 144 | N = ss.shape[0] 145 | for a in range_a: 146 | ss_a = np.hstack((ss, np.ones((N, 1)) * a )) 147 | v_a = Q.predict(ss_a) 148 | v_as.append(v_a.reshape(N, 1)) 149 | v_max = np.amax(np.hstack(v_as), 1) 150 | Q_new = r.reshape(N, 1) + gamma * v_max.reshape(N, 1) 151 | return Q_new 152 | 153 | 154 | def Estpolicy(Q_func, range_a): 155 | """ Q function to Policy 156 | Input: 157 | Q-function and the range of available actions 158 | Output: 159 | The optimal action policy (discrete) at this state [given a state, output an action] 160 | """ 161 | def policy(s, debug = 0): 162 | """ 163 | Input: s [N * dx] 164 | Output: actions [N * 1] 165 | """ 166 | rseed(0); npseed(0) 167 | N = s.shape[0] 168 | v_as = [] 169 | for a in range_a: 170 | s_a = np.hstack([s,np.repeat(a, N).reshape(-1,1)]) 171 | v_a = Q_func.predict(s_a) 172 | v_as.append(v_a.reshape(-1, 1)) 173 | v_as = np.round(np.hstack(v_as), 4) 174 | actions = np.array([range_a[i] for i in np.argmax(v_as, 1)]).reshape(-1, 1) 175 | if debug == 1: 176 | print(v_as - v_as[:,1].reshape(-1,1), DASH, actions) 177 | return actions 178 | 179 | return policy 180 | 181 | ########################################################################## 182 | ########################################################################## 183 | def UpdatedValues_eval(ss, policy, J, r, Q, gamma): 184 | """ Version of 1-step forward in Evaluations 185 | """ 186 | dx = ss.shape[1] 187 | sss = ss[:,(dx - (4 * J - 1)):dx] 188 | As = policy(sss) 189 | sa = np.hstack([ss,As]) 190 | return gamma * Q.predict(sa).reshape(-1,1) + r.reshape(-1,1) 191 | 192 | def FQE(PatternSets, Q_func, J, gamma = 0.9, RF_paras = [3, 20], n_trees = 200, 193 | threshold = 1e-4): 194 | """ 195 | Fitted-Q Evaluation for off-policy evaluation (OPE) in REAL DATA 196 | 197 | 1. fit RF q: (x,a) -> value 198 | 2. update the value function of policy: 199 | q_policy(x, a) = gamma * q(x', policy(x'[, (dx - J): dx])) + r 200 | 201 | 3. q_policy(x, x[, (dx - J): dx]) 202 | 203 | Input: 204 | PatternSets: a list of (s, a, s', u) [have been transformed] 205 | 206 | Output: V function 207 | 208 | """ 209 | rseed(0); npseed(0) 210 | 211 | # Preparing training data 212 | s_bef, a_bef, ss_bef, r_bef = [np.array([a[i] for a in PatternSets]) for i in range(4)] 213 | a_bef = a_bef.reshape(-1, 1) 214 | range_a = np.unique(a_bef) 215 | 216 | policy = Estpolicy(Q_func, range_a) 217 | time = now() 218 | 219 | dx = s_bef.shape[1] 220 | s1 = s_bef[:,(dx - (4 * J - 1)):dx].copy() 221 | As = policy(s1) 222 | selected = (As == a_bef) 223 | 224 | s2, a2, ss2, r2 = [], [], [], [] 225 | for i in range(s_bef.shape[0]): 226 | if selected[i, 0]: 227 | s2.append(s_bef[i,]) 228 | a2.append(a_bef[i,]) 229 | ss2.append(ss_bef[i,]) 230 | r2.append(r_bef[i,]) 231 | s, a, ss, r = np.vstack(s2).copy(), np.vstack(a2).copy(), np.vstack(ss2).copy(), np.vstack(r2).copy() 232 | 233 | 234 | ### Initialization 235 | x_train = np.hstack((s, a)) 236 | init_y = r * (1 / (1 - gamma)) 237 | if RF_paras == "CV": 238 | rseed(0); npseed(0) 239 | rfqr = RF(random_state = 0, n_estimators = n_trees) 240 | gd = GridSearchCV(estimator=rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0) 241 | gd.fit(x_train, init_y.ravel()) 242 | RF_paras = gd.best_params_ 243 | RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']] 244 | 245 | max_depth, min_samples_leaf = RF_paras 246 | rseed(0); npseed(0) 247 | Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf = 248 | min_samples_leaf, n_jobs = n_jobs, verbose = 0) 249 | Q.fit(x_train, init_y.ravel()) 250 | 251 | y_old = init_y.copy() 252 | # evaluate the policy policy 253 | rep, epsilon = 0, 100 254 | while(epsilon > threshold and rep < 100): 255 | rseed(0); npseed(0) 256 | y_train = UpdatedValues_eval(ss, policy, J, r, Q, gamma) # too slow [?] 257 | y_train = np.round(y_train, 6) 258 | epsilon = change_rate( y_old = y_old, y_new = y_train) 259 | Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf = 260 | min_samples_leaf, n_jobs = n_jobs, verbose = 0) 261 | Q.fit(x_train, y_train.ravel()) # regression function: (s,a) -> v 262 | 263 | y_old = y_train.copy() 264 | rep += 1 265 | 266 | def V_func(s): 267 | dx = s.shape[1] 268 | a = policy(s[:,(dx - (4 * J - 1)):dx]).reshape(-1,1) 269 | return Q.predict(np.hstack([s,a])) 270 | 271 | return V_func 272 | 273 | 274 | 275 | -------------------------------------------------------------------------------- /test_func/_QRF.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | This file is for the random forest-based method used in the paper "Does MDP Fit the Data?" to estimate conditional characteristic functions. 5 | The majority of functions in this file were adapted from the source code of the paper "Quantile Regression Forest" on Github. 6 | Date: 10/12/2019. 7 | URL:https://github.com/scikit-garden/scikit-garden/tree/master/skgarden 8 | """ 9 | ########################################################################## 10 | from _uti_basic import * 11 | ########################################################################## 12 | import warnings 13 | warnings.filterwarnings('ignore') 14 | from numpy.random import seed as rseed 15 | from numpy.random import randn # randn(d1,d2) is d1*d2 i.i.d N(0,1) 16 | import numpy as np 17 | from numpy import ma 18 | from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor 19 | from sklearn.ensemble.forest import ForestRegressor 20 | from sklearn.utils import check_array, check_random_state, check_X_y 21 | from sklearn.tree.tree import BaseDecisionTree 22 | from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor 23 | import time 24 | now = time.time 25 | ########################################################################## 26 | 27 | def weighted_est(y,uv,cos_sin,weights=None): 28 | """ 29 | # weights: array-like, shape=(n_samples,) 30 | # weights[i] is the weight given to point a[i] while computing the 31 | # quantile. If weights[i] is zero, a[i] is simply ignored during the 32 | # percentile computation. 33 | 34 | Parameters 35 | ---------- 36 | # uv: assume is B * d_ 37 | 38 | Returns 39 | ------- 40 | B * 1, for a given T 41 | """ 42 | if weights is None: 43 | return np.mean(cos_sin(y.dot(uv)),axis = 0) 44 | return weights.T.dot(cos_sin(y.dot(uv))) # v.T 45 | 46 | def generate_sample_indices(random_state, n_samples): 47 | """ 48 | [Just copied and pasted] 49 | Generates bootstrap indices for each tree fit. 50 | 51 | Parameters 52 | ---------- 53 | random_state: int, RandomState instance or None 54 | If int, random_state is the seed used by the random number generator. 55 | If RandomState instance, random_state is the random number generator. 56 | If None, the random number generator is the RandomState instance used 57 | by np.random. 58 | 59 | n_samples: int 60 | Number of samples to generate from each tree. 61 | 62 | Returns 63 | ------- 64 | sample_indices: array-like, shape=(n_samples), dtype=np.int32 65 | Sample indices. 66 | """ 67 | random_instance = check_random_state(random_state) 68 | sample_indices = random_instance.randint(0, n_samples, n_samples) 69 | return sample_indices 70 | ########################################################################## 71 | # QRF <- QBF,QDT 72 | 73 | class BaseForestQuantileRegressor(ForestRegressor): 74 | def fit(self, X, y): 75 | """ 76 | Build a forest from the training set (X, y). 77 | 78 | Parameters 79 | ---------- 80 | X : array-like or sparse matrix, shape = [n_samples, n_features] 81 | The training input samples. Internally, it will be converted to 82 | ``dtype=np.float32`` and if a sparse matrix is provided 83 | to a sparse ``csc_matrix``. 84 | 85 | y : array-like, shape = [n_samples] or [n_samples, n_outputs] 86 | The target values (class labels) as integers or strings. 87 | Returns 88 | ------- 89 | self : object 90 | Returns self. 91 | """ 92 | # apply method requires X to be of dtype np.float32 93 | X, y = check_X_y( 94 | X, y, accept_sparse="csc", dtype=np.float32, multi_output=1) 95 | super(BaseForestQuantileRegressor, self).fit(X, y) 96 | 97 | self.y_train_ = y 98 | self.y_train_leaves_ = -np.ones((self.n_estimators, len(y)), dtype=np.int32) 99 | self.y_weights_ = np.zeros_like((self.y_train_leaves_), dtype=np.float32) 100 | 101 | for i, est in enumerate(self.estimators_): 102 | bootstrap_indices = generate_sample_indices(est.random_state, len(y)) 103 | est_weights = np.bincount(bootstrap_indices, minlength=len(y)) 104 | y_train_leaves = est.y_train_leaves_ 105 | for curr_leaf in np.unique(y_train_leaves): 106 | y_ind = y_train_leaves == curr_leaf 107 | self.y_weights_[i, y_ind] = ( 108 | est_weights[y_ind] / np.sum(est_weights[y_ind])) 109 | 110 | self.y_train_leaves_[i, bootstrap_indices] = y_train_leaves[bootstrap_indices] 111 | return self 112 | 113 | def predict(self, X, uv=0): # , cos_sin 114 | """ 115 | Predict cond. char. values for either forward or backward 116 | 117 | Parameters 118 | ---------- 119 | X : array-like or sparse matrix of shape = [n_samples, n_features] 120 | uv: [B,dim_y]. can be either u or v 121 | Returns 122 | ------- 123 | char_est : array of shape = [n_samples,B] 124 | """ 125 | # apply method requires X to be of dtype np.float32 126 | X = check_array(X, dtype=np.float32, accept_sparse="csc") # around N * T 127 | X_leaves = self.apply(X) # (n_test = N * T, n_tree) 128 | weights = np.zeros((X.shape[0], len(self.y_train_)))# N_test * N_train 129 | begin = now() 130 | a = now() 131 | mask_time = 0 132 | sum_time= 0 133 | 134 | 135 | for i, x_leaf in enumerate(X_leaves): # n_test 136 | mask = (self.y_train_leaves_ != np.expand_dims(x_leaf, 1)) 137 | x_weights = ma.masked_array(self.y_weights_, mask)# n_tree * n_train. for each n_test 138 | b = now() 139 | mask_time += b - a 140 | weights[i,:] = x_weights.sum(axis = 0) 141 | a = now() 142 | sum_time += a - b 143 | # print("prediction iteration:", now()- begin, " with mask:", mask_time, "sum:", sum_time) 144 | if uv is 0: # debug. E(X_t|X_t-1). for CV too. 145 | return weights.dot(self.y_train_) / np.sum(weights,axis=1)[:,None] 146 | else: 147 | char_est_cos = weights.dot(np.cos(self.y_train_.dot(uv.T))) / np.sum(weights,axis=1)[:,None] 148 | char_est_sin = weights.dot(np.sin(self.y_train_.dot(uv.T))) / np.sum(weights,axis=1)[:,None] 149 | return char_est_cos, char_est_sin 150 | 151 | class RandomForestQuantileRegressor(BaseForestQuantileRegressor): 152 | """ 153 | Based on BaseForestQuantileRegressor. What is the purpose? 154 | 155 | The sub-sample size is always the same as the original 156 | input sample size but the samples are drawn with replacement if 157 | `bootstrap=True` (default). 158 | 159 | """ 160 | def __init__(self, 161 | n_estimators=10, 162 | criterion='mse', 163 | max_depth=None, 164 | min_samples_split=2, 165 | min_samples_leaf=1, 166 | min_weight_fraction_leaf=0.0, 167 | max_features='auto', 168 | max_leaf_nodes=None, 169 | bootstrap=True, 170 | oob_score=False, 171 | n_jobs=1, 172 | random_state=None, 173 | verbose=0, 174 | warm_start=False): 175 | super(RandomForestQuantileRegressor, self).__init__( 176 | base_estimator=DecisionTreeQuantileRegressor(), 177 | n_estimators=n_estimators, 178 | estimator_params=("criterion", "max_depth", "min_samples_split", 179 | "min_samples_leaf", "min_weight_fraction_leaf", 180 | "max_features", "max_leaf_nodes", 181 | "random_state"), 182 | bootstrap=bootstrap, 183 | oob_score=oob_score, 184 | n_jobs=n_jobs, 185 | random_state=random_state, 186 | verbose=verbose, 187 | warm_start=warm_start) 188 | 189 | self.criterion = criterion 190 | self.max_depth = max_depth 191 | self.min_samples_split = min_samples_split 192 | self.min_samples_leaf = min_samples_leaf 193 | self.min_weight_fraction_leaf = min_weight_fraction_leaf 194 | self.max_features = max_features 195 | self.max_leaf_nodes = max_leaf_nodes 196 | 197 | class BaseTreeQuantileRegressor(BaseDecisionTree): 198 | def fit(self, X, y, sample_weight=None, check_input=True, 199 | X_idx_sorted=None): 200 | """ 201 | Child of BaseDecisionTree (sklearn), which use a single DecisionTree to do the same kind of Quantile things. 202 | 203 | Parameters 204 | ---------- 205 | X : array-like or sparse matrix, shape = [n_samples, n_features] 206 | The training input samples. Internally, it will be converted to 207 | ``dtype=np.float32`` and if a sparse matrix is provided 208 | to a sparse ``csc_matrix``. 209 | 210 | y : array-like, shape = [n_samples] or [n_samples, n_outputs] 211 | The target values (class labels) as integers or strings. 212 | 213 | sample_weight : array-like, shape = [n_samples] or None 214 | Sample weights. If None, then samples are equally weighted. Splits 215 | that would create child nodes with net zero or negative weight are 216 | ignored while searching for a split in each node. Splits are also 217 | ignored if they would result in any single class carrying a 218 | negative weight in either child node. 219 | 220 | check_input : boolean, (default=True) 221 | Allow to bypass several input checking. 222 | Don't use this parameter unless you know what you do. 223 | 224 | 225 | Returns 226 | ------- 227 | self : object 228 | Returns self. 229 | """ 230 | # y passed from a forest is 2-D. This is to silence the 231 | # annoying data-conversion warnings. 232 | y = np.asarray(y) 233 | if np.ndim(y) == 2 and y.shape[1] == 1: 234 | y = np.ravel(y) 235 | 236 | # apply method requires X to be of dtype np.float32 237 | X, y = check_X_y( 238 | X, y, accept_sparse="csc", dtype=np.float32, multi_output=1) 239 | super(BaseTreeQuantileRegressor, self).fit( 240 | X, y, sample_weight=sample_weight, check_input=check_input, 241 | X_idx_sorted=X_idx_sorted) 242 | self.y_train_ = y 243 | 244 | # Stores the leaf nodes that the samples lie in. 245 | self.y_train_leaves_ = self.tree_.apply(X) 246 | return self 247 | 248 | def predict(self, X,u, check_input=False): # ,cos_sin 249 | """ 250 | Predict regression value for X. 251 | 252 | Parameters 253 | ---------- 254 | X : array-like or sparse matrix of shape = [n_samples, n_features] 255 | The input samples. Internally, it will be converted to 256 | ``dtype=np.float32`` and if a sparse matrix is provided 257 | to a sparse ``csr_matrix``. 258 | 259 | quantile : int, optional 260 | Value ranging from 0 to 100. By default, the mean is returned. 261 | 262 | check_input : boolean, (default=True) 263 | Allow to bypass several input checking. 264 | Don't use this parameter unless you know what you do. 265 | 266 | Returns 267 | ------- 268 | y : array of shape = [n_samples] 269 | If quantile is set to None, then return E(Y | X). Else return 270 | y such that F(Y=y | x) = quantile. 271 | """ 272 | # apply method requires X to be of dtype np.float32 273 | X = check_array(X, dtype=np.float32, accept_sparse="csc") 274 | if quantile is None: 275 | return super(BaseTreeQuantileRegressor, self).predict(X, check_input=check_input) 276 | 277 | B = u.shape[0] 278 | r_cos, r_sin = np.zeros((X.shape[0],B)), np.zeros((X.shape[0],B)) 279 | X_leaves = self.apply(X) 280 | unique_leaves = np.unique(X_leaves) 281 | 282 | for leaf in unique_leaves: 283 | # for those X_test in that leaf, we use training in that leaf to cal the quantiles. 284 | y = self.y_train_[self.y_train_leaves_ == leaf] 285 | r_cos[X_leaves == leaf,:] = np.mean(np.cos(y.dot(uv.T)),axis = 0) 286 | r_sin[X_leaves == leaf,:] = np.mean(np.sin(y.dot(uv.T)),axis = 0) 287 | return r_cos, r_sin 288 | 289 | class DecisionTreeQuantileRegressor(DecisionTreeRegressor, BaseTreeQuantileRegressor): 290 | """ 291 | Just combine QBT and DecisionTreeRegressor, and provide _init_ 292 | 293 | A decision tree regressor that provides quantile estimates. 294 | """ 295 | def __init__(self, 296 | criterion="mse", 297 | splitter="best", 298 | max_depth=None, 299 | min_samples_split=2, 300 | min_samples_leaf=1, 301 | min_weight_fraction_leaf=0., 302 | max_features=None, 303 | random_state=None, 304 | max_leaf_nodes=None): 305 | super(DecisionTreeQuantileRegressor, self).__init__( 306 | criterion=criterion, 307 | splitter=splitter, 308 | max_depth=max_depth, 309 | min_samples_split=min_samples_split, 310 | min_samples_leaf=min_samples_leaf, 311 | min_weight_fraction_leaf=min_weight_fraction_leaf, 312 | max_features=max_features, 313 | max_leaf_nodes=max_leaf_nodes, 314 | random_state=random_state) 315 | -------------------------------------------------------------------------------- /test_func/.ipynb_checkpoints/_QRF-checkpoint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | This file is for the random forest-based method used in the paper "Does MDP Fit the Data?" to estimate conditional characteristic functions. 5 | The majority of functions in this file were adapted from the source code of the paper "Quantile Regression Forest" on Github. 6 | Date: 10/12/2019. 7 | URL:https://github.com/scikit-garden/scikit-garden/tree/master/skgarden 8 | """ 9 | ########################################################################## 10 | from _uti_basic import * 11 | ########################################################################## 12 | import warnings 13 | warnings.filterwarnings('ignore') 14 | from numpy.random import seed as rseed 15 | from numpy.random import randn # randn(d1,d2) is d1*d2 i.i.d N(0,1) 16 | import numpy as np 17 | from numpy import ma 18 | from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor 19 | from sklearn.ensemble.forest import ForestRegressor 20 | from sklearn.utils import check_array, check_random_state, check_X_y 21 | from sklearn.tree.tree import BaseDecisionTree 22 | from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor 23 | import time 24 | now = time.time 25 | ########################################################################## 26 | 27 | def weighted_est(y,uv,cos_sin,weights=None): 28 | """ 29 | # weights: array-like, shape=(n_samples,) 30 | # weights[i] is the weight given to point a[i] while computing the 31 | # quantile. If weights[i] is zero, a[i] is simply ignored during the 32 | # percentile computation. 33 | 34 | Parameters 35 | ---------- 36 | # uv: assume is B * d_ 37 | 38 | Returns 39 | ------- 40 | B * 1, for a given T 41 | """ 42 | if weights is None: 43 | return np.mean(cos_sin(y.dot(uv)),axis = 0) 44 | return weights.T.dot(cos_sin(y.dot(uv))) # v.T 45 | 46 | def generate_sample_indices(random_state, n_samples): 47 | """ 48 | [Just copied and pasted] 49 | Generates bootstrap indices for each tree fit. 50 | 51 | Parameters 52 | ---------- 53 | random_state: int, RandomState instance or None 54 | If int, random_state is the seed used by the random number generator. 55 | If RandomState instance, random_state is the random number generator. 56 | If None, the random number generator is the RandomState instance used 57 | by np.random. 58 | 59 | n_samples: int 60 | Number of samples to generate from each tree. 61 | 62 | Returns 63 | ------- 64 | sample_indices: array-like, shape=(n_samples), dtype=np.int32 65 | Sample indices. 66 | """ 67 | random_instance = check_random_state(random_state) 68 | sample_indices = random_instance.randint(0, n_samples, n_samples) 69 | return sample_indices 70 | ########################################################################## 71 | # QRF <- QBF,QDT 72 | 73 | class BaseForestQuantileRegressor(ForestRegressor): 74 | def fit(self, X, y): 75 | """ 76 | Build a forest from the training set (X, y). 77 | 78 | Parameters 79 | ---------- 80 | X : array-like or sparse matrix, shape = [n_samples, n_features] 81 | The training input samples. Internally, it will be converted to 82 | ``dtype=np.float32`` and if a sparse matrix is provided 83 | to a sparse ``csc_matrix``. 84 | 85 | y : array-like, shape = [n_samples] or [n_samples, n_outputs] 86 | The target values (class labels) as integers or strings. 87 | Returns 88 | ------- 89 | self : object 90 | Returns self. 91 | """ 92 | # apply method requires X to be of dtype np.float32 93 | X, y = check_X_y( 94 | X, y, accept_sparse="csc", dtype=np.float32, multi_output=1) 95 | super(BaseForestQuantileRegressor, self).fit(X, y) 96 | 97 | self.y_train_ = y 98 | self.y_train_leaves_ = -np.ones((self.n_estimators, len(y)), dtype=np.int32) 99 | self.y_weights_ = np.zeros_like((self.y_train_leaves_), dtype=np.float32) 100 | 101 | for i, est in enumerate(self.estimators_): 102 | bootstrap_indices = generate_sample_indices(est.random_state, len(y)) 103 | est_weights = np.bincount(bootstrap_indices, minlength=len(y)) 104 | y_train_leaves = est.y_train_leaves_ 105 | for curr_leaf in np.unique(y_train_leaves): 106 | y_ind = y_train_leaves == curr_leaf 107 | self.y_weights_[i, y_ind] = ( 108 | est_weights[y_ind] / np.sum(est_weights[y_ind])) 109 | 110 | self.y_train_leaves_[i, bootstrap_indices] = y_train_leaves[bootstrap_indices] 111 | return self 112 | 113 | def predict(self, X, uv=0): # , cos_sin 114 | """ 115 | Predict cond. char. values for either forward or backward 116 | 117 | Parameters 118 | ---------- 119 | X : array-like or sparse matrix of shape = [n_samples, n_features] 120 | uv: [B,dim_y]. can be either u or v 121 | Returns 122 | ------- 123 | char_est : array of shape = [n_samples,B] 124 | """ 125 | # apply method requires X to be of dtype np.float32 126 | X = check_array(X, dtype=np.float32, accept_sparse="csc") # around N * T 127 | X_leaves = self.apply(X) # (n_test = N * T, n_tree) 128 | weights = np.zeros((X.shape[0], len(self.y_train_)))# N_test * N_train 129 | begin = now() 130 | a = now() 131 | mask_time = 0 132 | sum_time= 0 133 | 134 | 135 | for i, x_leaf in enumerate(X_leaves): # n_test 136 | mask = (self.y_train_leaves_ != np.expand_dims(x_leaf, 1)) 137 | x_weights = ma.masked_array(self.y_weights_, mask)# n_tree * n_train. for each n_test 138 | b = now() 139 | mask_time += b - a 140 | weights[i,:] = x_weights.sum(axis = 0) 141 | a = now() 142 | sum_time += a - b 143 | # print("prediction iteration:", now()- begin, " with mask:", mask_time, "sum:", sum_time) 144 | if uv is 0: # debug. E(X_t|X_t-1). for CV too. 145 | return weights.dot(self.y_train_) / np.sum(weights,axis=1)[:,None] 146 | else: 147 | char_est_cos = weights.dot(np.cos(self.y_train_.dot(uv.T))) / np.sum(weights,axis=1)[:,None] 148 | char_est_sin = weights.dot(np.sin(self.y_train_.dot(uv.T))) / np.sum(weights,axis=1)[:,None] 149 | return char_est_cos, char_est_sin 150 | 151 | class RandomForestQuantileRegressor(BaseForestQuantileRegressor): 152 | """ 153 | Based on BaseForestQuantileRegressor. What is the purpose? 154 | 155 | The sub-sample size is always the same as the original 156 | input sample size but the samples are drawn with replacement if 157 | `bootstrap=True` (default). 158 | 159 | """ 160 | def __init__(self, 161 | n_estimators=10, 162 | criterion='mse', 163 | max_depth=None, 164 | min_samples_split=2, 165 | min_samples_leaf=1, 166 | min_weight_fraction_leaf=0.0, 167 | max_features='auto', 168 | max_leaf_nodes=None, 169 | bootstrap=True, 170 | oob_score=False, 171 | n_jobs=1, 172 | random_state=None, 173 | verbose=0, 174 | warm_start=False): 175 | super(RandomForestQuantileRegressor, self).__init__( 176 | base_estimator=DecisionTreeQuantileRegressor(), 177 | n_estimators=n_estimators, 178 | estimator_params=("criterion", "max_depth", "min_samples_split", 179 | "min_samples_leaf", "min_weight_fraction_leaf", 180 | "max_features", "max_leaf_nodes", 181 | "random_state"), 182 | bootstrap=bootstrap, 183 | oob_score=oob_score, 184 | n_jobs=n_jobs, 185 | random_state=random_state, 186 | verbose=verbose, 187 | warm_start=warm_start) 188 | 189 | self.criterion = criterion 190 | self.max_depth = max_depth 191 | self.min_samples_split = min_samples_split 192 | self.min_samples_leaf = min_samples_leaf 193 | self.min_weight_fraction_leaf = min_weight_fraction_leaf 194 | self.max_features = max_features 195 | self.max_leaf_nodes = max_leaf_nodes 196 | 197 | class BaseTreeQuantileRegressor(BaseDecisionTree): 198 | def fit(self, X, y, sample_weight=None, check_input=True, 199 | X_idx_sorted=None): 200 | """ 201 | Child of BaseDecisionTree (sklearn), which use a single DecisionTree to do the same kind of Quantile things. 202 | 203 | Parameters 204 | ---------- 205 | X : array-like or sparse matrix, shape = [n_samples, n_features] 206 | The training input samples. Internally, it will be converted to 207 | ``dtype=np.float32`` and if a sparse matrix is provided 208 | to a sparse ``csc_matrix``. 209 | 210 | y : array-like, shape = [n_samples] or [n_samples, n_outputs] 211 | The target values (class labels) as integers or strings. 212 | 213 | sample_weight : array-like, shape = [n_samples] or None 214 | Sample weights. If None, then samples are equally weighted. Splits 215 | that would create child nodes with net zero or negative weight are 216 | ignored while searching for a split in each node. Splits are also 217 | ignored if they would result in any single class carrying a 218 | negative weight in either child node. 219 | 220 | check_input : boolean, (default=True) 221 | Allow to bypass several input checking. 222 | Don't use this parameter unless you know what you do. 223 | 224 | 225 | Returns 226 | ------- 227 | self : object 228 | Returns self. 229 | """ 230 | # y passed from a forest is 2-D. This is to silence the 231 | # annoying data-conversion warnings. 232 | y = np.asarray(y) 233 | if np.ndim(y) == 2 and y.shape[1] == 1: 234 | y = np.ravel(y) 235 | 236 | # apply method requires X to be of dtype np.float32 237 | X, y = check_X_y( 238 | X, y, accept_sparse="csc", dtype=np.float32, multi_output=1) 239 | super(BaseTreeQuantileRegressor, self).fit( 240 | X, y, sample_weight=sample_weight, check_input=check_input, 241 | X_idx_sorted=X_idx_sorted) 242 | self.y_train_ = y 243 | 244 | # Stores the leaf nodes that the samples lie in. 245 | self.y_train_leaves_ = self.tree_.apply(X) 246 | return self 247 | 248 | def predict(self, X,u, check_input=False): # ,cos_sin 249 | """ 250 | Predict regression value for X. 251 | 252 | Parameters 253 | ---------- 254 | X : array-like or sparse matrix of shape = [n_samples, n_features] 255 | The input samples. Internally, it will be converted to 256 | ``dtype=np.float32`` and if a sparse matrix is provided 257 | to a sparse ``csr_matrix``. 258 | 259 | quantile : int, optional 260 | Value ranging from 0 to 100. By default, the mean is returned. 261 | 262 | check_input : boolean, (default=True) 263 | Allow to bypass several input checking. 264 | Don't use this parameter unless you know what you do. 265 | 266 | Returns 267 | ------- 268 | y : array of shape = [n_samples] 269 | If quantile is set to None, then return E(Y | X). Else return 270 | y such that F(Y=y | x) = quantile. 271 | """ 272 | # apply method requires X to be of dtype np.float32 273 | X = check_array(X, dtype=np.float32, accept_sparse="csc") 274 | if quantile is None: 275 | return super(BaseTreeQuantileRegressor, self).predict(X, check_input=check_input) 276 | 277 | B = u.shape[0] 278 | r_cos, r_sin = np.zeros((X.shape[0],B)), np.zeros((X.shape[0],B)) 279 | X_leaves = self.apply(X) 280 | unique_leaves = np.unique(X_leaves) 281 | 282 | for leaf in unique_leaves: 283 | # for those X_test in that leaf, we use training in that leaf to cal the quantiles. 284 | y = self.y_train_[self.y_train_leaves_ == leaf] 285 | r_cos[X_leaves == leaf,:] = np.mean(np.cos(y.dot(uv.T)),axis = 0) 286 | r_sin[X_leaves == leaf,:] = np.mean(np.sin(y.dot(uv.T)),axis = 0) 287 | return r_cos, r_sin 288 | 289 | class DecisionTreeQuantileRegressor(DecisionTreeRegressor, BaseTreeQuantileRegressor): 290 | """ 291 | Just combine QBT and DecisionTreeRegressor, and provide _init_ 292 | 293 | A decision tree regressor that provides quantile estimates. 294 | """ 295 | def __init__(self, 296 | criterion="mse", 297 | splitter="best", 298 | max_depth=None, 299 | min_samples_split=2, 300 | min_samples_leaf=1, 301 | min_weight_fraction_leaf=0., 302 | max_features=None, 303 | random_state=None, 304 | max_leaf_nodes=None): 305 | super(DecisionTreeQuantileRegressor, self).__init__( 306 | criterion=criterion, 307 | splitter=splitter, 308 | max_depth=max_depth, 309 | min_samples_split=min_samples_split, 310 | min_samples_leaf=min_samples_leaf, 311 | min_weight_fraction_leaf=min_weight_fraction_leaf, 312 | max_features=max_features, 313 | max_leaf_nodes=max_leaf_nodes, 314 | random_state=random_state) 315 | -------------------------------------------------------------------------------- /test_func/_core_test_fun.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Main functions for the test proposed in the paper "Does MDP Fit the Data?". Refer to the Algorithm 1 and 2 therein. 5 | """ 6 | 7 | ########################################################################## 8 | #%% 9 | from _QRF import * 10 | from _uti_basic import * 11 | from _utility import * 12 | ########################################################################## 13 | # %% 14 | n_jobs = multiprocessing.cpu_count() 15 | param_grid = {'max_depth': [2, 4, 6, 8], 'min_samples_leaf': [5, 10, 20]} 16 | 17 | ########################################################################## 18 | #%% Algorithm 1 19 | def test(data, J = 1, 20 | B = 200, Q = 10, L = 3, 21 | paras="CV", n_trees = 200, 22 | print_time = False, 23 | include_reward = False, fixed_state_comp = None, 24 | method = "QRF"): 25 | """ 26 | The main test function 27 | 28 | Parameters 29 | ---------- 30 | data: the observed trajectories. A len-N list of [X, A], where X and A are T * dim arrays. 31 | J: the null hyphothesis that the MDP is lag-J. Donoted as k in the paper 32 | B, Q: required hyperparameters; The definition of Q is slightly different with the paper. Q_here = Q_paper + 2 33 | paras: the parameters [max_depth, min_samples_leaf] used in the random forests. 34 | n_trees: the number of trees used in the random forests 35 | print_time: whether or not to print out the time cost for each part 36 | include_reward: whether or not to include the R_t as part of X_t for our testing 37 | fixed_state_comp: to resolve the duplicate S problem in the TIGER 38 | method: the estimators used for the conditional characteristic function estimation. 39 | 40 | Returns 41 | ------- 42 | p-values 43 | """ 44 | N = len(data) 45 | data = normalize(data.copy()) 46 | T = data[0][0].shape[0] 47 | a = now() 48 | lam = lam_est(data = data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees, 49 | include_reward = include_reward, L = L, 50 | fixed_state_comp = fixed_state_comp, method = method) 51 | r, pValues = [], [] 52 | Sigma_q_s = Sigma_q(lam) # a list (len = Q-1) 2B * 2B. 53 | if print_time: 54 | print("RF:", now() - a) 55 | a = now() 56 | S = S_hat(lam = lam, dims = [N, T], J = J) # Construct the test statistics 57 | pValues = bootstrap_p_value(Sigma_q_s, rep_times = int(1e3), test_stat = S) # Construct the resampling-based c.v. 58 | if print_time: 59 | print("Bootstrap:", now() - a) 60 | return pValues 61 | 62 | #%% Algorithm 2 63 | 64 | 65 | 66 | def selectOrder(data, B = 100, Q = 10, L = 3, alpha = 0.01, K = 10, paras="CV", n_trees = 200, 67 | print_time = False, 68 | include_reward = False, fixed_state_comp = None, 69 | method = "QRF"): 70 | p_values = [] 71 | for k in range(1, K + 1): 72 | p_value = test(data, J = k, 73 | B = B, Q = Q, L = L, 74 | paras=paras, n_trees = n_trees, 75 | print_time = print_time, 76 | include_reward = include_reward, fixed_state_comp = fixed_state_comp, 77 | method = method) 78 | p_values.append(p_value) 79 | if p_value > alpha: 80 | print("Conclude the system is of order:", k) 81 | return p_values 82 | print("Conclude the system is a POMDP") 83 | return p_values 84 | 85 | 86 | 87 | 88 | ########################################################################## 89 | #%% Getting data. Helper functions 90 | def get_pairs( data, is_forward, J = 1, as_array = 1, include_reward = 0, fixed_state_comp = None): 91 | """ 92 | get [(x_{t-1},a_{t-1}),x_t] or [(x_t,a_t),(x_{t-1},a_{t-1})] pairs, only for training[can not distinguish patients] 93 | 94 | forward: indicator 95 | as_array: by default, into pred/response array 96 | """ 97 | def get_pairs_one_traj(i, is_forward, J): 98 | """ 99 | do one patient for , get trainiinig data 100 | patient = [X,A] 101 | X = T * d_x, A = T * d_a 102 | """ 103 | patient = data[i] 104 | if include_reward: 105 | X, A, R = patient 106 | else: 107 | X, A = patient 108 | T = X.shape[0] 109 | r = [] 110 | dx = X.shape[1] 111 | XA = np.hstack([X, A]) 112 | if include_reward: 113 | XR = np.hstack([X, R]) 114 | 115 | for t in range(T - J): 116 | if is_forward: 117 | if include_reward: 118 | pair = [ 119 | XA[t:t + J, :].reshape(1, J * (dx + 1)), XR[t + J, :]] 120 | else: 121 | pair = [ 122 | XA[t:t + J, :].reshape(1, J * (dx + 1)), X[t + J, :]] 123 | else: 124 | pair = [XA[(t + 1):(t + J + 1), :].reshape(1, 125 | J * (dx + 1)), XA[t, :]] 126 | if fixed_state_comp is not None: 127 | true_state = fixed_state_comp[i] 128 | # both pred and response have the true_state before. 129 | pair = [ 130 | np.append( 131 | true_state, pair[0]), np.append( 132 | true_state, pair[1])] 133 | r.append(pair) 134 | return r 135 | 136 | # get pairs for each patient and put together 137 | r = flatten([get_pairs_one_traj(i, is_forward, J) 138 | for i in range(len(data))]) 139 | if as_array: 140 | r = [np.vstack([a[0] for a in r]), np.vstack([a[1] for a in r])] 141 | return r 142 | 143 | 144 | def get_test_data(test_data, J=1, fixed_state_comp=None): 145 | """ 146 | Get testing predictors 147 | """ 148 | def patient_2_predictors(i, J=1): 149 | """ 150 | XA: T * (d_x + d_a) 151 | Return: T * ((d_x + d_a) * J) 152 | """ 153 | patient = test_data[i] 154 | XA = np.hstack([patient[0], patient[1]]) 155 | T = XA.shape[0] 156 | r = XA.copy() 157 | for j in range(1, J): 158 | r = np.hstack([r, roll(XA, -j, 0)]) 159 | if fixed_state_comp is not None: 160 | true_state = np.repeat(fixed_state_comp[i], T).reshape(T, 1) 161 | r = np.hstack((true_state, r)) 162 | return r 163 | 164 | return np.vstack([patient_2_predictors(i, J) 165 | for i in range(len(test_data))]) 166 | 167 | ########################################################################## 168 | # Functions for estimating the CCF and constructing the conditional covariances. Step 2 - 3 of Algorithm 1. 169 | 170 | # %% Conditional covariance lam construction 171 | def lam_est(data, J, B, Q, L = 3, 172 | paras = [3, 20], n_trees = 200, include_reward = 0, fixed_state_comp = None, method = "QRF"): 173 | """ 174 | construct the pointwise cov lam (for both test stat and c.v.), by combine the two parts (estimated and observed) 175 | 176 | Returns 177 | ------- 178 | lam: (Q-1)-len list of four lam matrices (n * T-q * B) 179 | """ 180 | 181 | dx, da = data[0][0].shape[1], data[0][1].shape[1] 182 | if fixed_state_comp is not None: 183 | dx += 1 184 | 185 | # generate uv 186 | rseed(0); npseed(0) 187 | if include_reward: 188 | uv = [randn(B, dx + 1), randn(B, dx + da)] 189 | else: 190 | uv = [randn(B, dx), randn(B, dx + da)] 191 | 192 | # estimate characteristic values (cross-fitting): phi_R, psi_R, phi_I, 193 | # psi_I 194 | estimated = cond_char_vaule_est(data = data, uv = uv, 195 | paras = paras, n_trees = n_trees, L = L, 196 | J = J, 197 | include_reward = include_reward, fixed_state_comp = fixed_state_comp, 198 | method = method) # ,obs_ys 199 | if paras == "CV_once": 200 | CV_paras = estimated 201 | return CV_paras 202 | else: 203 | estimated_cond_char = estimated 204 | # cos and sin in batch. (n*T*dx) * (dx* B) = n * T * B: 205 | # c_X,s_X,c_XA,s_XA 206 | observed_cond_char = obs_char(data = data, uv = uv, 207 | include_reward = include_reward, fixed_state_comp = fixed_state_comp) 208 | # combine the above two parts to get cond. corr. estimation. 209 | lam = lam_formula(estimated_cond_char, observed_cond_char, J, Q) 210 | return lam 211 | 212 | 213 | def cond_char_vaule_est(data, uv, 214 | paras = "CV_once", n_trees = 200, L = 3, 215 | J = 1, include_reward = 0, fixed_state_comp = None, method = "QRF"): 216 | """ 217 | Cross-fitting-type prediction of the cond. char "values" 218 | 219 | Returns 220 | ------- 221 | phi_R, phi_I, psi_R, psi_I values as [n * T * B] tensors. 222 | """ 223 | T = data[0][0].shape[0] 224 | n = N = len(data) 225 | B = uv[0].shape[0] 226 | dx, dxa = uv[0].shape[1], uv[1].shape[1] 227 | char_values, obs_ys = [np.zeros([n, T, B]) for i in range(4)], [ 228 | np.zeros([n, T, B]) for i in range(4)] 229 | K = L # num of cross-fitting 230 | kf = KFold(n_splits=K) 231 | kf.get_n_splits(zeros(n)) 232 | 233 | # Just to get CV-based paras 234 | if paras == "CV_once": 235 | for train_index, test_index in kf.split( 236 | data): # only do this one time to get paras by using CV 237 | if fixed_state_comp: 238 | true_state_train = [fixed_state_comp[i] for i in train_index] 239 | else: 240 | true_state_train = None 241 | train_data, test_data = [data[i] for i in train_index], [data[i] for i in test_index] 242 | CV_paras = char_fun_est(train_data = train_data, 243 | paras = "CV_once", n_trees = n_trees, uv = uv, J = J, 244 | include_reward=include_reward, fixed_state_comp=true_state_train) 245 | return CV_paras 246 | 247 | # estimate char values by cross-fitting 248 | for train_index, test_index in kf.split(data): 249 | if fixed_state_comp: 250 | true_state_train = [fixed_state_comp[i] for i in train_index] 251 | true_state_test = [fixed_state_comp[i] for i in test_index] 252 | else: 253 | true_state_train, true_state_test = None, None 254 | train_data, test_data = [data[i] for i in train_index], [data[i] for i in test_index] 255 | test_pred = get_test_data(test_data = test_data, J = J, fixed_state_comp = true_state_test) 256 | a = now() 257 | 258 | if method == "QRF": 259 | char_funs = char_fun_est(train_data=train_data, paras=paras, n_trees = n_trees, 260 | uv=uv, J=J, include_reward=include_reward, 261 | fixed_state_comp=true_state_train) # a list of four estimated fun 262 | 263 | for i in range(2): # forward / backward 264 | r = char_funs[i].predict(test_pred, uv[i]) # return: char_est_cos, char_est_sin 265 | char_values[0 + i][test_index] = r[0].reshape((len(test_index), T, B)) 266 | char_values[2 + i][test_index] = r[1].reshape((len(test_index), T, B)) 267 | elif method == "RF": 268 | char_funs = char_fun_est_RF(train_data = train_data, 269 | paras = paras, n_trees = n_trees, uv = uv, J = J, 270 | include_reward = include_reward, fixed_state_comp = fixed_state_comp) 271 | for i in range(2): 272 | r = char_funs[i] 273 | char_values[0 + i][test_index] = r[0].predict(test_pred).reshape((len(test_index), T, B)) 274 | char_values[2 + i][test_index] = r[1].predict(test_pred).reshape((len(test_index), T, B)) 275 | return char_values 276 | 277 | 278 | def char_fun_est( 279 | train_data, 280 | paras=[3, 20], n_trees = 200, uv = 0, J = 1, include_reward = 0, fixed_state_comp = None): 281 | """ 282 | For each cross-fitting-task, use QRF to do prediction 283 | 284 | paras == "CV_once": use CV_once to fit 285 | get_CV_paras == True: just to get paras by using CV 286 | 287 | Returns 288 | ------- 289 | a list of four estimated fun, and a list of four true y vectors 290 | """ 291 | 292 | char_funs = [] 293 | 294 | X1, y1 = get_pairs(train_data, is_forward = 1, J = J, 295 | include_reward = include_reward, fixed_state_comp = fixed_state_comp) 296 | X2, y2 = get_pairs(train_data, is_forward = 0, J = J, 297 | include_reward = include_reward, fixed_state_comp = fixed_state_comp) 298 | 299 | X, y = [X1, X2], [y1, y2] 300 | 301 | if paras in ["CV", "CV_once"]: 302 | for i in range(2): 303 | rfqr = RandomForestQuantileRegressor(random_state=0, n_estimators = n_trees) 304 | gd = GridSearchCV(estimator = rfqr, param_grid = param_grid, 305 | cv = 5, n_jobs = n_jobs, verbose=0) 306 | gd.fit(X[i], y[i]) 307 | best_paras = gd.best_params_ 308 | 309 | if paras == "CV_once": # only return forward 310 | return [best_paras['max_depth'], best_paras['min_samples_leaf']] 311 | 312 | elif paras == "CV": 313 | print("best_paras:", best_paras) 314 | # use the optimal paras and the whole dataset 315 | rfqr1 = RandomForestQuantileRegressor( 316 | random_state=0, 317 | n_estimators = n_trees, 318 | max_depth=best_paras['max_depth'], 319 | min_samples_leaf=best_paras['min_samples_leaf'], 320 | n_jobs = n_jobs) 321 | char_funs.append(rfqr1.fit(X[i], y[i])) 322 | 323 | else: # pre-specified paras 324 | max_depth, min_samples_leaf = paras 325 | for i in range(2): 326 | char_funs.append( 327 | RandomForestQuantileRegressor( 328 | random_state=0, n_estimators = n_trees, 329 | max_depth = max_depth, min_samples_leaf = min_samples_leaf, 330 | n_jobs = n_jobs).fit( X[i], y[i])) 331 | 332 | return char_funs 333 | 334 | 335 | def obs_char(data, uv, include_reward, fixed_state_comp=None): 336 | """ 337 | Batchwise calculation for the cos/sin terms, used to define lam 338 | (n*T*dx) * (dx* B) = n * T * B 339 | """ 340 | T = data[0][0].shape[0] 341 | X_mat = np.array([a[0] for a in data]) 342 | N = X_mat.shape[0] 343 | if fixed_state_comp: 344 | true_state = np.repeat(fixed_state_comp, T).reshape(N, T, 1) 345 | X_mat = np.concatenate([true_state, X_mat], 2) 346 | A_mat = np.array([a[1] for a in data]) 347 | XA_mat = np.concatenate([X_mat, A_mat], 2) 348 | if include_reward: 349 | R_mat = np.array([a[2] for a in data]) 350 | XR_mat = np.concatenate([X_mat, R_mat], 2) 351 | S = [XR_mat, XA_mat] 352 | else: 353 | S = [X_mat, XA_mat] 354 | r = [] 355 | for i in range(2): 356 | temp = S[i].dot(uv[i].T) 357 | r += [cos(temp), sin(temp)] 358 | return r 359 | 360 | 361 | def lam_formula(char_values, c_s_values, J, Q): 362 | """ 363 | implement the 4 lam formula (point cond. cov) 364 | # char_values: predict t + J and t - 1; # len-4 list, the element is len-n [T_i, B] 365 | Inputs: 366 | char_values: predicted values, at point t, they are [t, …, t + J - 1] -> [t - 1] and [t + J] 367 | c_s_values: observed values, t is just t 368 | Outputs: 369 | lam: (Q-1)-len list with every entry as [four (n * T-q * B) matries about lam values] 370 | """ 371 | phi_R, psi_R, phi_I, psi_I = char_values 372 | c_X, s_X, c_XA, s_XA = c_s_values 373 | 374 | # forward, t is the residual at time t 375 | left_cos_R = c_X - roll(phi_R, J, 1) 376 | left_sin_I = s_X - roll(phi_I, J, 1) 377 | # backward, t is the residual at time t 378 | right_cos_R = c_XA - roll(psi_R, -1, 1) 379 | right_sin_I = s_XA - roll(psi_I, -1, 1) 380 | 381 | lam = [] 382 | 383 | for q in range(2, Q + 1): 384 | shift = q + J - 1 385 | startT = q + J - 1 386 | lam_RR = multiply( 387 | left_cos_R, roll( 388 | right_cos_R, shift, 1))[ 389 | :, startT:, :] 390 | lam_II = multiply( 391 | left_sin_I, roll( 392 | right_sin_I, shift, 1))[ 393 | :, startT:, :] 394 | lam_IR = multiply( 395 | left_sin_I, roll( 396 | right_cos_R, shift, 1))[ 397 | :, startT:, :] 398 | lam_RI = multiply( 399 | left_cos_R, roll( 400 | right_sin_I, shift, 1))[ 401 | :, startT:, :] 402 | lam.append([lam_RR, lam_II, lam_IR, lam_RI]) 403 | return lam 404 | 405 | 406 | ########################################################################## 407 | # %% The final test statistics and p-values 408 | # only rely on estimated cond. cov [est cond. char v.s. obs cond. char] 409 | # has nothing to do with the char estimation part 410 | ########################################################################## 411 | 412 | #%% part 2 of Step 3 for Algorithm 1 413 | def S_hat(lam, dims, J = 1): 414 | """ 415 | Construct the test stat S based on cond. covs. 416 | 1. construct (Q-1 * B) Gammas from lam(sample lag-q covariance functions) 417 | 2. Step3 - aggregate to get S_hat 418 | 419 | Inputs: 420 | lam: (Q-1)-len list of four lam matrices (n * T-q * B) 421 | 422 | Ourputs: 423 | """ 424 | Gamma = [np.array([np.mean(a[i], (0, 1)) for a in lam]) for i in range(4)] 425 | Gamma_R = Gamma[0] - Gamma[1] # Gamma_RR - Gamma_II 426 | Gamma_I = Gamma[2] + Gamma[3] # Gamma_IR + Gamma_RI 427 | 428 | N, T = dims 429 | Q = Gamma_R.shape[0] + 1 430 | B = Gamma_R.shape[1] 431 | r = [] 432 | 433 | for q in range(2, Q + 1): 434 | c = sqrt(N * (T + 1 - q - J)) 435 | r.append(c * max(max(Gamma_R[q - 2, :]), max(Gamma_I[q - 2, :]))) 436 | return max(r) 437 | 438 | #%% Step 4 of Algorithm 1 439 | def Sigma_q(Q_four_lams): 440 | """ 441 | sample covariance matrix, prepare for resampling 442 | Paras: 443 | lams: (Q-1)-len list of four lam matrices (n * T-q * B) 444 | """ 445 | sigma_q_s_max, sigma_q_s_mean = [], [] 446 | Q = len(Q_four_lams) + 1 447 | q = 2 448 | for four_lams in Q_four_lams: # for each q 449 | lam_RR, lam_II, lam_IR, lam_RI = four_lams # (n * T-q * B) matrix 450 | 451 | lam = concatenate([lam_RR - lam_II, lam_RI + lam_IR], 452 | 2) # into (n * T-q * 2B) 453 | N, T_q, BB = lam.shape 454 | sigma_q = np.zeros((BB, BB)) 455 | for i in range(N): 456 | # aggregate across T with the .dot() 457 | sigma_q += lam[i].T.dot(lam[i]) 458 | sigma_q_s_max.append(sigma_q / (N * T_q)) 459 | q += 1 460 | return sigma_q_s_max 461 | 462 | 463 | #%% Step 5 of Algorithm 1 464 | def bootstrap_p_value(Q_Sigma_q, rep_times, test_stat=0): 465 | """ 466 | resampling to get cv/p-values 467 | """ 468 | BB = Q_Sigma_q[0].shape[0] 469 | Q = len(Q_Sigma_q) + 1 470 | Sigma_q_squares = [sqrtm(a) for a in Q_Sigma_q] 471 | 472 | def one_time(seed): 473 | rseed(seed); npseed(seed) 474 | Z = randn(BB, Q - 1) 475 | r = [] 476 | for q in range(Q - 1): 477 | z = Z[:, q] 478 | r.append(max(Sigma_q_squares[q].dot(z))) 479 | return max(r) 480 | # generate boostrapped test stats 481 | r = rep_seeds(one_time, rep_times) 482 | p = p_value(test_stat, r) 483 | return p 484 | ########################################################################## 485 | ############ ARCHIVE ######################################## 486 | ########################################################################## 487 | 488 | def char_fun_est_RF(train_data, paras=[3, 20], n_trees = 200, uv = 0, J = 1, 489 | include_reward = 0, fixed_state_comp = None): 490 | """ cond. char. fun. estimaton with the alternative estimator (multi-outcome random forests) 491 | """ 492 | char_funs = [] 493 | X1, y1 = get_pairs(train_data, is_forward = 1, J = J, 494 | include_reward = include_reward, fixed_state_comp = fixed_state_comp) 495 | X2, y2 = get_pairs(train_data, is_forward = 0, J = J, 496 | include_reward = include_reward, fixed_state_comp = fixed_state_comp) 497 | XX, yy = [X1, X2], [y1, y2] 498 | 499 | max_depth, min_samples_leaf = paras 500 | for i in range(2): 501 | X, y = XX[i], yy[i] 502 | y_cos, y_sin = cos(y.dot(uv[i].T)), sin(y.dot(uv[i].T)) 503 | regr_cos = RandomForest(random_state = 0, n_estimators = n_trees, 504 | max_depth = max_depth, min_samples_leaf = min_samples_leaf, 505 | n_jobs = n_jobs) 506 | regr_sin = RandomForest(random_state = 0, n_estimators = n_trees, 507 | max_depth = max_depth, min_samples_leaf = min_samples_leaf, 508 | n_jobs = n_jobs) 509 | regr_cos.fit(X, y_cos) 510 | regr_sin.fit(X, y_sin) 511 | char_funs.append([regr_cos, regr_sin]) 512 | return char_funs 513 | --------------------------------------------------------------------------------