├── __init__.py
├── experiment_func
    ├── __init__.py
    ├── __pycache__
    │   ├── _DGP_Ohio.cpython-36.pyc
    │   ├── _DGP_Ohio.cpython-37.pyc
    │   ├── _DGP_TIGER.cpython-36.pyc
    │   └── _utility_RL.cpython-36.pyc
    ├── _DGP_TIGER.py
    ├── .ipynb_checkpoints
    │   ├── _DGP_TIGER-checkpoint.py
    │   ├── _DGP_Ohio-checkpoint.py
    │   └── _utility_RL-checkpoint.py
    ├── _DGP_Ohio.py
    └── _utility_RL.py
├── experiment_script
    ├── __init__.py
    ├── Ohio_simu_values.txt
    ├── Tiger_simu.py
    ├── .ipynb_checkpoints
    │   ├── Tiger_simu-checkpoint.py
    │   ├── Ohio_simu_seq_lags-checkpoint.py
    │   ├── Ohio_simu_values-checkpoint.py
    │   └── Ohio_simu_testing-checkpoint.py
    ├── Ohio_simu_seq_lags.py
    ├── Ohio_simu_values.py
    └── Ohio_simu_testing.py
├── .ipynb_checkpoints
    ├── __init__-checkpoint.py
    ├── LICENSE-checkpoint
    ├── README-checkpoint.md
    └── Tiger_simu-checkpoint.py
├── diag.png
├── __pycache__
    ├── _QRF.cpython-36.pyc
    ├── _DGP_Ohio.cpython-36.pyc
    ├── __init__.cpython-37.pyc
    ├── _uti_basic.cpython-36.pyc
    ├── _utility.cpython-36.pyc
    ├── _utility_RL.cpython-36.pyc
    ├── _core_test_fun.cpython-36.pyc
    └── _Funcs_Real_Ohio.cpython-36.pyc
├── test_func
    ├── __pycache__
    │   ├── _QRF.cpython-34.pyc
    │   ├── _QRF.cpython-36.pyc
    │   ├── _QRF.cpython-37.pyc
    │   ├── _DGP_Ohio.cpython-36.pyc
    │   ├── _DGP_Ohio.cpython-37.pyc
    │   ├── _DGP_TIGER.cpython-36.pyc
    │   ├── _DGP_TIGER.cpython-37.pyc
    │   ├── __init__.cpython-36.pyc
    │   ├── __init__.cpython-37.pyc
    │   ├── _uti_basic.cpython-34.pyc
    │   ├── _uti_basic.cpython-36.pyc
    │   ├── _uti_basic.cpython-37.pyc
    │   ├── _utility.cpython-36.pyc
    │   ├── _utility.cpython-37.pyc
    │   ├── _utility_RL.cpython-36.pyc
    │   ├── _utility_RL.cpython-37.pyc
    │   ├── _core_test_fun.cpython-34.pyc
    │   ├── _core_test_fun.cpython-36.pyc
    │   ├── _core_test_fun.cpython-37.pyc
    │   ├── core_test_fun.cpython-36.pyc
    │   └── _Funcs_Real_Ohio.cpython-36.pyc
    ├── __init__.py
    ├── .ipynb_checkpoints
    │   ├── __init__-checkpoint.py
    │   ├── _uti_basic-checkpoint.py
    │   ├── _DGP_TIGER-checkpoint.py
    │   ├── _Funcs_Real_Ohio-checkpoint.py
    │   ├── _utility-checkpoint.py
    │   ├── _DGP_Ohio-checkpoint.py
    │   ├── _utility_RL-checkpoint.py
    │   └── _QRF-checkpoint.py
    ├── _uti_basic.py
    ├── _utility.py
    ├── _QRF.py
    └── _core_test_fun.py
├── LICENSE
├── TestMDP.yml
└── README.md


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiment_func/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiment_script/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiment_script/Ohio_simu_values.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/__init__-checkpoint.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/diag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/diag.png


--------------------------------------------------------------------------------
/__pycache__/_QRF.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_QRF.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/_DGP_Ohio.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_DGP_Ohio.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/_uti_basic.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_uti_basic.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/_utility.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_utility.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/_utility_RL.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_utility_RL.cpython-36.pyc


--------------------------------------------------------------------------------
/__pycache__/_core_test_fun.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_core_test_fun.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_QRF.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_QRF.cpython-34.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_QRF.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_QRF.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_QRF.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_QRF.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/_Funcs_Real_Ohio.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/__pycache__/_Funcs_Real_Ohio.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_DGP_Ohio.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_DGP_Ohio.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_DGP_Ohio.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_DGP_Ohio.cpython-37.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_DGP_TIGER.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_DGP_TIGER.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_DGP_TIGER.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_DGP_TIGER.cpython-37.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_uti_basic.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_uti_basic.cpython-34.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_uti_basic.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_uti_basic.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_uti_basic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_uti_basic.cpython-37.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_utility.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_utility.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_utility.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_utility.cpython-37.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_utility_RL.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_utility_RL.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_utility_RL.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_utility_RL.cpython-37.pyc


--------------------------------------------------------------------------------
/experiment_func/__pycache__/_DGP_Ohio.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/experiment_func/__pycache__/_DGP_Ohio.cpython-36.pyc


--------------------------------------------------------------------------------
/experiment_func/__pycache__/_DGP_Ohio.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/experiment_func/__pycache__/_DGP_Ohio.cpython-37.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_core_test_fun.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_core_test_fun.cpython-34.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_core_test_fun.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_core_test_fun.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_core_test_fun.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_core_test_fun.cpython-37.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/core_test_fun.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/core_test_fun.cpython-36.pyc


--------------------------------------------------------------------------------
/experiment_func/__pycache__/_DGP_TIGER.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/experiment_func/__pycache__/_DGP_TIGER.cpython-36.pyc


--------------------------------------------------------------------------------
/experiment_func/__pycache__/_utility_RL.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/experiment_func/__pycache__/_utility_RL.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__pycache__/_Funcs_Real_Ohio.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RunzheStat/TestMDP/HEAD/test_func/__pycache__/_Funcs_Real_Ohio.cpython-36.pyc


--------------------------------------------------------------------------------
/test_func/__init__.py:
--------------------------------------------------------------------------------
 1 | from ._utility import  *
 2 | from ._uti_basic import  *
 3 | from ._core_test_fun import *
 4 | from ._QRF import  *
 5 | # from ._utility_RL import  *
 6 | 
 7 | # from ._DGP_Ohio  import  *  
 8 | # from ._DGP_TIGER import  *
 9 | # # from ._Funcs_Real_Ohio import  *
10 | 
11 | __all__ = ["_core_test_fun", "_QRF", "_uti_basic", "_utility"]
12 | 


--------------------------------------------------------------------------------
/test_func/.ipynb_checkpoints/__init__-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from ._utility import  *
 2 | from ._uti_basic import  *
 3 | from ._core_test_fun import *
 4 | from ._QRF import  *
 5 | # from ._utility_RL import  *
 6 | 
 7 | # from ._DGP_Ohio  import  *  
 8 | # from ._DGP_TIGER import  *
 9 | # # from ._Funcs_Real_Ohio import  *
10 | 
11 | __all__ = ["_core_test_fun", "_QRF", "_uti_basic", "_utility"]
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 RunzheStat
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/LICENSE-checkpoint:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 RunzheStat
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/TestMDP.yml:
--------------------------------------------------------------------------------
 1 | name: TestMDP
 2 | channels:
 3 |   - conda-forge
 4 |   - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
 5 |   - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
 6 |   - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
 7 |   - defaults
 8 | dependencies:
 9 |   - _r-mutex=1.0.0=anacondar_1
10 |   - bwidget=1.9.11=1
11 |   - bzip2=1.0.8=h1de35cc_0
12 |   - ca-certificates=2020.1.1=0
13 |   - cairo=1.14.12=hc4e6be7_4
14 |   - cctools=927.0.2=h5ba7a2e_4
15 |   - certifi=2020.4.5.1=py36_0
16 |   - clang=10.0.0=default_hf57f61e_0
17 |   - clang_osx-64=10.0.0=h05bbb7f_1
18 |   - clangxx=10.0.0=default_hf57f61e_0
19 |   - clangxx_osx-64=10.0.0=h05bbb7f_1
20 |   - compiler-rt=10.0.0=h47ead80_0
21 |   - compiler-rt_osx-64=10.0.0=hbcc88fd_0
22 |   - curl=7.67.0=ha441bb4_0
23 |   - dill=0.3.1.1=py36_0
24 |   - fontconfig=2.13.0=h5d5b041_1
25 |   - freetype=2.9.1=hb4e5f40_0
26 |   - fribidi=1.0.5=h1de35cc_0
27 |   - gettext=0.19.8.1=h15daf44_3
28 |   - gfortran_osx-64=4.8.5=h22b1bf0_8
29 |   - glib=2.63.1=hd977a24_0
30 |   - graphite2=1.3.13=h2098e52_0
31 |   - gsl=2.4=h1de35cc_4
32 |   - harfbuzz=1.8.8=hb8d4a28_0
33 |   - icu=58.2=h0a44026_3
34 |   - intel-openmp=2020.1=216
35 |   - joblib=0.15.1=py_0
36 |   - jpeg=9b=he5867d9_2
37 |   - krb5=1.16.4=hddcf347_0
38 |   - ld64=450.3=h3c32e8a_4
39 |   - libblas=3.8.0=11_openblas
40 |   - libcblas=3.8.0=11_openblas
41 |   - libcurl=7.67.0=h051b688_0
42 |   - libcxx=10.0.0=1
43 |   - libedit=3.1.20181209=hb402a30_0
44 |   - libffi=3.2.1=h0a44026_6
45 |   - libgfortran=3.0.1=h93005f0_2
46 |   - libiconv=1.16=h1de35cc_0
47 |   - liblapack=3.8.0=11_openblas
48 |   - libllvm10=10.0.0=h21ff451_0
49 |   - libopenblas=0.3.6=hdc02c5d_2
50 |   - libpng=1.6.37=ha441bb4_0
51 |   - libssh2=1.9.0=ha12b0ac_1
52 |   - libtiff=4.1.0=hcb84e12_1
53 |   - libxml2=2.9.9=hf6e021a_1
54 |   - llvm-openmp=10.0.0=h28b9765_0
55 |   - llvm-tools=10.0.0=h21ff451_0
56 |   - lz4-c=1.9.2=h0a44026_0
57 |   - make=4.2.1=h3efe00b_1
58 |   - mkl=2020.1=216
59 |   - mkl_random=1.1.1=py36h0130604_0
60 |   - multiprocess=0.70.9=py36h37b9a7d_1
61 |   - ncurses=6.2=h0a44026_1
62 |   - numpy=1.18.5=py36hdc5ca10_0
63 |   - openssl=1.1.1g=h1de35cc_0
64 |   - pandas=1.0.4=py36hcc1bba6_0
65 |   - pango=1.42.4=h7e27002_1
66 |   - patsy=0.5.1=py36_0
67 |   - pcre=8.43=h0a44026_0
68 |   - pip=20.0.2=py36_3
69 |   - pixman=0.38.0=h1de35cc_0
70 |   - python=3.6.8=haf84260_0
71 |   - python-dateutil=2.8.1=py_0
72 |   - python_abi=3.6=1_cp36m
73 |   - pytz=2020.1=py_0
74 |   - r-base=3.6.1=hcb44179_1
75 |   - r-sys=3.2=r36h46e59ec_0
76 |   - readline=7.0=h1de35cc_5
77 |   - scikit-learn=0.23.1=py36hef903b7_0
78 |   - scipy=1.2.1=py36hbd7caa9_1
79 |   - setuptools=47.1.1=py36_0
80 |   - six=1.15.0=py_0
81 |   - sqlite=3.31.1=h5c1f38d_1
82 |   - statsmodels=0.11.1=py36h37b9a7d_1
83 |   - tapi=1000.10.8=ha1b3eb9_0
84 |   - threadpoolctl=2.1.0=pyh5ca1d4c_0
85 |   - tk=8.6.8=ha441bb4_0
86 |   - tktable=2.10=h1de35cc_0
87 |   - wheel=0.34.2=py36_0
88 |   - xz=5.2.5=h1de35cc_0
89 |   - zlib=1.2.11=h1de35cc_3
90 |   - zstd=1.4.4=h1990bb4_3
91 | prefix: /Users/mac/opt/anaconda3/envs/TestMDP
92 | 
93 | 


--------------------------------------------------------------------------------
/test_func/_uti_basic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | #############################################################################
 4 | import time
 5 | now = time.time
 6 | import smtplib, ssl
 7 | from multiprocessing import Pool
 8 | import multiprocessing
 9 | 
10 | n_cores = multiprocessing.cpu_count()
11 | #############################################################################
12 | dash = "--------------------------------------"
13 | DASH = "\n" + "--------------------------------------" + "\n"
14 | Dash = "\n" + dash
15 | dasH = dash + "\n"
16 | #############################################################################
17 | #%% utility funs
18 | 
19 | def fun(f, q_in, q_out):
20 |     while True:
21 |         i, x = q_in.get()
22 |         if i is None:
23 |             break
24 |         q_out.put((i, f(x)))
25 |         
26 | def parmap(f, X, nprocs=multiprocessing.cpu_count()-2):
27 |     q_in = multiprocessing.Queue(1)
28 |     q_out = multiprocessing.Queue()
29 | 
30 |     proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out))
31 |             for _ in range(nprocs)]
32 |     for p in proc:
33 |         p.daemon = True
34 |         p.start()
35 | 
36 |     sent = [q_in.put((i, x)) for i, x in enumerate(X)]
37 |     [q_in.put((None, None)) for _ in range(nprocs)]
38 |     res = [q_out.get() for _ in range(len(sent))]
39 | 
40 |     [p.join() for p in proc]
41 | 
42 |     return [x for i, x in sorted(res)]
43 | 
44 | def send_email(message = None, email_address = "13300180059@fudan.edu.cn", title = "Your results are ready!",
45 |               receiver_email = "Same"): # py.notify.me@gmail.com
46 |     port = 465  # For SSL
47 |     # Create a secure SSL context
48 |     context = ssl.create_default_context()
49 |     sender_email = email_address # "py.notify.me@gmail.com"
50 |     if receiver_email == "Same":
51 |         receiver_email = email_address
52 |     email_content = message
53 |     
54 |     a = """
55 | 
56 |     """
57 |     
58 |     message = """\
59 |     Subject: """ + title + a
60 |     message += email_content
61 |     
62 |     with smtplib.SMTP_SSL("mail.fudan.edu.cn", port, context=context) as server: # "smtp.gmail.com"
63 |         server.login(email_address,"w19950722")  #("py.notify.me@gmail.com", "w19950722")
64 |         server.sendmail(sender_email, receiver_email, message)
65 | 
66 | #############################################################################
67 | def rep_seeds(fun,rep_times):
68 |     """
69 |     non-parallel-version of pool.map
70 |     """
71 |     return list(map(fun, range(rep_times)))
72 | 
73 | def rep_seeds_print(fun,rep_times,init_seed):
74 |     r = []
75 |     start = now()
76 |     for seed in range(rep_times):
77 |         r.append(fun(seed + init_seed))
78 |         if seed % 25 == 0:
79 |             print(round((seed+1)/rep_times*100,2),"% DONE", round((now() - start)/60,2), "mins" )
80 |     return r
81 | #############################################################################     
82 | 
83 | def round_list(thelist,dec):
84 |     """
85 |     extend np.round to list
86 |     """
87 |     return [round(a,dec) for a in thelist]
88 | 
89 | def print_time_cost(seed,total_rep,time):
90 |     print(round((seed+1/total_rep)*100,3),"% DONE, takes", round((time)/60,3)," mins \n")
91 |     
92 | def is_disc(v, n):
93 |     return len(set(v)) <= n
94 | 


--------------------------------------------------------------------------------
/test_func/.ipynb_checkpoints/_uti_basic-checkpoint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | #############################################################################
 4 | import time
 5 | now = time.time
 6 | import smtplib, ssl
 7 | from multiprocessing import Pool
 8 | import multiprocessing
 9 | 
10 | n_cores = multiprocessing.cpu_count()
11 | #############################################################################
12 | dash = "--------------------------------------"
13 | DASH = "\n" + "--------------------------------------" + "\n"
14 | Dash = "\n" + dash
15 | dasH = dash + "\n"
16 | #############################################################################
17 | #%% utility funs
18 | 
19 | def fun(f, q_in, q_out):
20 |     while True:
21 |         i, x = q_in.get()
22 |         if i is None:
23 |             break
24 |         q_out.put((i, f(x)))
25 |         
26 | def parmap(f, X, nprocs=multiprocessing.cpu_count()-2):
27 |     q_in = multiprocessing.Queue(1)
28 |     q_out = multiprocessing.Queue()
29 | 
30 |     proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out))
31 |             for _ in range(nprocs)]
32 |     for p in proc:
33 |         p.daemon = True
34 |         p.start()
35 | 
36 |     sent = [q_in.put((i, x)) for i, x in enumerate(X)]
37 |     [q_in.put((None, None)) for _ in range(nprocs)]
38 |     res = [q_out.get() for _ in range(len(sent))]
39 | 
40 |     [p.join() for p in proc]
41 | 
42 |     return [x for i, x in sorted(res)]
43 | 
44 | def send_email(message = None, email_address = "13300180059@fudan.edu.cn", title = "Your results are ready!",
45 |               receiver_email = "Same"): # py.notify.me@gmail.com
46 |     port = 465  # For SSL
47 |     # Create a secure SSL context
48 |     context = ssl.create_default_context()
49 |     sender_email = email_address # "py.notify.me@gmail.com"
50 |     if receiver_email == "Same":
51 |         receiver_email = email_address
52 |     email_content = message
53 |     
54 |     a = """
55 | 
56 |     """
57 |     
58 |     message = """\
59 |     Subject: """ + title + a
60 |     message += email_content
61 |     
62 |     with smtplib.SMTP_SSL("mail.fudan.edu.cn", port, context=context) as server: # "smtp.gmail.com"
63 |         server.login(email_address,"w19950722")  #("py.notify.me@gmail.com", "w19950722")
64 |         server.sendmail(sender_email, receiver_email, message)
65 | 
66 | #############################################################################
67 | def rep_seeds(fun,rep_times):
68 |     """
69 |     non-parallel-version of pool.map
70 |     """
71 |     return list(map(fun, range(rep_times)))
72 | 
73 | def rep_seeds_print(fun,rep_times,init_seed):
74 |     r = []
75 |     start = now()
76 |     for seed in range(rep_times):
77 |         r.append(fun(seed + init_seed))
78 |         if seed % 25 == 0:
79 |             print(round((seed+1)/rep_times*100,2),"% DONE", round((now() - start)/60,2), "mins" )
80 |     return r
81 | #############################################################################     
82 | 
83 | def round_list(thelist,dec):
84 |     """
85 |     extend np.round to list
86 |     """
87 |     return [round(a,dec) for a in thelist]
88 | 
89 | def print_time_cost(seed,total_rep,time):
90 |     print(round((seed+1/total_rep)*100,3),"% DONE, takes", round((time)/60,3)," mins \n")
91 |     
92 | def is_disc(v, n):
93 |     return len(set(v)) <= n
94 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/README-checkpoint.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making
 3 | 
 4 | This repository contains the implementation for the paper "Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making" (ICML 2020) in Python.
 5 | 
 6 | ## Summary of the paper
 7 | 
 8 | The Markov assumption (MA) is fundamental to the empirical validity of reinforcement learning. In this paper, we propose a novel Forward-Backward Learning procedure to test MA in sequential decision making. The proposed test does not assume any parametric form on the joint distribution of the observed data and plays an important role for identifying the optimal policy in high-order Markov decision processes and partially observable MDPs. We apply our test to both synthetic datasets and a real data example from mobile health studies to illustrate its usefulness.
 9 | 
10 | <img align="center" src="diag.png" alt="drawing" width="600">
11 | 
12 | 
13 | 
14 | ## Requirements
15 | Change your working directory to this main folder, run `conda env create --file TestMDP.yml` to create the Conda environment, and then run `conda activate TestMDP` to activate the environment.
16 | 
17 | ## File Overview
18 | 2. `/test_func`: main functions for the proposed test
19 |     1. `_core_test_fun.py`: main functions for the proposed test, including Algorithm 1 and 2 in the paper, and their componnets.
20 |     5. `_QRF.py`: the random forests regressor used in our experiments.
21 |     6. `_uti_basic.py` and `_utility.py`: helper functions
22 | 1. `/experiment_script`: scripts for reproducing results. See next section. 
23 | 2. `/experiment_func`: supporting functions for the experiments presented in the paper
24 |         2. `_DGP_Ohio.py`: simulate data and evaluate policies for the HMDP synthetic data section.
25 |         3. `_DGP_TIGER.py`: simulate data for the POMDP synthetic data section.
26 |         7. `_utility_RL.py`: RL algorithms used in the experiments, including FQI, FQE and related functions.
27 | 
28 | ## How to reproduce results in the paper
29 | Simply run the corresponding scripts:
30 | 
31 | 1. Figure 2: `Ohio_simu_testing.py`
32 | 2. Figure 3: `Ohio_simu_values.py` and `Ohio_simu_seq_lags.py`
33 | 3. Figure 4: `Tiger_simu.py`
34 | 
35 | ## How to test the Markov property for your own data
36 | 1. run `from _core_test_fun import *` to import required functions
37 | 2. Algorithm 1: decide whether or not your data satisfies J-th order Markov property
38 |     1. make sure your data, the observed trajectories, is a list of [X, A], each for one trajectory. Here, X is a T by dim_state_variable array for observed states, and A is a T by dim_action_variable array for observed actions. 
39 |     2. run `test(data = data, J = J)`, and the output is the p-value. More optional parameters can be found in the file. 
40 | 3. Algorithm 2: decide whether the system is an MDP (and its order) or the system is most likely to be a POMDP 
41 |     1. make sure your data and parameters satisfy the requirement for  `test()`. 
42 |     2. specify the significance level alpha and order upper bound K. 
43 |     2. run `selectOrder(data = data, K = K, alpha = alpha)`. More optional parameters can be found in the file. 
44 | 
45 | 
46 | 
47 | ## Citation
48 | 
49 | Please cite our paper
50 | [Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making (ICML 2020)](https://arxiv.org/abs/2002.01751)
51 | 
52 | ``` 
53 | @article{Shi2020DoesTM,
54 |   title={Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making},
55 |   author={Chengchun Shi and Runzhe Wan and Rui Song and Wenbin Lu and Ling Leng},
56 |   journal={ArXiv},
57 |   year={2020},
58 |   volume={abs/2002.01751}
59 | }
60 | ``` 
61 | 
62 | 
63 | ## Contributing
64 | 
65 | All contributions welcome! All content in this repository is licensed under the MIT license.
66 | 
67 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making
 3 | 
 4 | This repository contains the implementation for the paper "Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making" (ICML 2020) in Python.
 5 | 
 6 | ## Summary of the paper
 7 | 
 8 | The Markov assumption (MA) is fundamental to the empirical validity of reinforcement learning. In this paper, we propose a novel Forward-Backward Learning procedure to test MA in sequential decision making. The proposed test does not assume any parametric form on the joint distribution of the observed data and plays an important role for identifying the optimal policy in high-order Markov decision processes and partially observable MDPs. We apply our test to both synthetic datasets and a real data example from mobile health studies to illustrate its usefulness.
 9 | 
10 | <img align="center" src="diag.png" alt="drawing" width="600">
11 | 
12 | 
13 | 
14 | ## Requirements
15 | Change your working directory to this main folder, run `conda env create --file TestMDP.yml` to create the Conda environment, and then run `conda activate TestMDP` to activate the environment.
16 | 
17 | ## File Overview
18 | 2. `/test_func`: main functions for the proposed test
19 |     1. `_core_test_fun.py`: main functions for the proposed test, including Algorithm 1 and 2 in the paper, and their componnets.
20 |     5. `_QRF.py`: the random forests regressor used in our experiments.
21 |     6. `_uti_basic.py` and `_utility.py`: helper functions
22 | 1. `/experiment_script`: scripts for reproducing results. See next section. 
23 | 2. `/experiment_func`: supporting functions for the experiments presented in the paper:
24 |     2. `_DGP_Ohio.py`: simulate data and evaluate policies for the HMDP synthetic data section.
25 |     3. `_DGP_TIGER.py`: simulate data for the POMDP synthetic data section.
26 |     7. `_utility_RL.py`: RL algorithms used in the experiments, including FQI, FQE and related functions.
27 | 
28 | ## How to reproduce results in the paper
29 | Simply run the corresponding scripts:
30 | 
31 | 1. Figure 2: `Ohio_simu_testing.py`
32 | 2. Figure 3: `Ohio_simu_values.py` and `Ohio_simu_seq_lags.py`
33 | 3. Figure 4: `Tiger_simu.py`
34 | 
35 | ## How to test the Markov property for your own data
36 | 1. run `from _core_test_fun import *` to import required functions
37 | 2. Algorithm 1: decide whether or not your data satisfies J-th order Markov property
38 |     1. make sure your data, the observed trajectories, is a list of [X, A], each for one trajectory. Here, X is a T by dim_state_variable array for observed states, and A is a T by dim_action_variable array for observed actions. 
39 |     2. run `test(data = data, J = J)`, and the output is the p-value. More optional parameters can be found in the file. 
40 | 3. Algorithm 2: decide whether the system is an MDP (and its order) or the system is most likely to be a POMDP 
41 |     1. make sure your data and parameters satisfy the requirement for  `test()`. 
42 |     2. specify the significance level alpha and order upper bound K. 
43 |     2. run `selectOrder(data = data, K = K, alpha = alpha)`. More optional parameters can be found in the file. 
44 | 
45 | 
46 | 
47 | ## Citation
48 | 
49 | Please cite our paper
50 | [Does the Markov Decision Process Fit the Data: Testing for the Markov Property in Sequential Decision Making (ICML 2020)](http://proceedings.mlr.press/v119/shi20c/shi20c.pdf)
51 | 
52 | ``` 
53 | @inproceedings{shi2020does,
54 |   title={Does the Markov decision process fit the data: testing for the Markov property in sequential decision making},
55 |   author={Shi, Chengchun and Wan, Runzhe and Song, Rui and Lu, Wenbin and Leng, Ling},
56 |   booktitle={International Conference on Machine Learning},
57 |   pages={8807--8817},
58 |   year={2020},
59 |   organization={PMLR}
60 | }
61 | ``` 
62 | 
63 | 
64 | ## Contributing
65 | 
66 | All contributions welcome! All content in this repository is licensed under the MIT license.
67 | 
68 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/Tiger_simu-checkpoint.py:
--------------------------------------------------------------------------------
 1 | from code import *
 2 | import os
 3 | os.environ["OMP_NUM_THREADS"] = "1"
 4 | #####################################
 5 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 
 6 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 
 7 | # the difference with standard cross-validation is negligible and will not affect our findings.
 8 | #####################################
 9 | def one_time(seed = 1, J = 1, 
10 |                      N = 100, T = 20, T_def = 0, 
11 |                      B = 100, Q = 10, 
12 |                      behav_def = 0, obs_def = "alt", 
13 |                      paras = [100, 3,20], weighted = True, include_reward = False,
14 |                      method = "QRF"):
15 |     """
16 |     include_reward: if include reward to our test
17 |     T_def:
18 |         0: length = T with always listen
19 |         1: truncation
20 |     T: the final length
21 |     """
22 |     ### generate data
23 |     fixed_state_comp = (obs_def == "null")
24 |     MDPs = simu_tiger(N = N, T = T, seed = seed,
25 |                        behav_def = behav_def, obs_def = obs_def,
26 |                        T_def = T_def, include_reward = include_reward, fixed_state_comp = fixed_state_comp)
27 |     T += 1 # due to the DGP
28 |     ### Preprocess
29 |     if fixed_state_comp:
30 |         MDPs, fixed_state_comp = MDPs
31 |     else:
32 |         fixed_state_comp = None
33 |     if T_def == 1:
34 |         MDPs = truncateMDP(MDPs,T)
35 |     if not include_reward:
36 |         MDPs = [a[:2] for a in MDPs]
37 |     N = len(MDPs)
38 |     ### Calculate
39 |     if paras == "CV_once":
40 |         return lam_est(data = MDPs, J = J, B = B, Q = Q, paras = paras, include_reward = include_reward,
41 |                   fixed_state_comp = fixed_state_comp, method = method)
42 |     return test(data = MDPs, J = J, B = B, Q = Q, paras = paras, #print_time = print_time,
43 |                 include_reward = include_reward, fixed_state_comp = fixed_state_comp, method = method)
44 | 
45 | 
46 | def one_setting_one_J(rep_times = 10, J = 1, 
47 |                       N = 100, T = 20, T_def = 0,
48 |                       B = 100, Q = 10,
49 |                       behav_def = 0, obs_def = "alt",
50 |                       include_reward = False, mute = True,
51 |                       paras = "CV_once", init_seed = 0, parallel = True, method = "QRF"):
52 |     if paras == "CV_once":
53 |         paras = one_time(seed = 0, J = J, 
54 |                          N = N, T = T, B = B, Q = Q,
55 |                         behav_def = behav_def, obs_def = obs_def,
56 |                         paras = "CV_once",
57 |                         T_def = T_def, include_reward = include_reward, method = method)
58 |         print("CV paras:", paras)
59 |     
60 |     def one_test(seed):
61 |         return one_time(seed = seed, J = J, 
62 |                      N = N, T = T, B = B, Q = Q,
63 |                     behav_def = behav_def, obs_def = obs_def,
64 |                     T_def = T_def, include_reward = include_reward,
65 |                      paras = paras, method = method)
66 |     p_values = parmap(one_test,range(init_seed, init_seed + rep_times), parallel)
67 |     if not mute:
68 |         print("rejection rates are:", rej_rate_quick(p_values))
69 |     return p_values
70 | print("Import DONE!")
71 | 
72 | print("n_cores = ", n_cores)
73 | 
74 | 
75 | for obs_def in ["null", "alt"]:
76 |     for N in [50, 100, 200]:
77 |         for J in range(1, 11):
78 |             p_values = one_setting_one_J(rep_times = 500, J = J, 
79 |                               N = N, T = 20, T_def = 0,
80 |                               B = 100, Q = 10,
81 |                               behav_def = 0, obs_def = obs_def,
82 |                               include_reward = False, mute = False,
83 |                               paras = "CV_once", init_seed = 0, parallel = n_cores, method = "QRF")
84 |             rej_rate_quick(p_values)


--------------------------------------------------------------------------------
/experiment_script/Tiger_simu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os, sys
 4 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
 5 | 
 6 | sys.path.insert(0, package_path + "/test_func")
 7 | from _core_test_fun import *
 8 | 
 9 | sys.path.insert(0, package_path + "/experiment_func")
10 | from _DGP_TIGER import *
11 | 
12 | os.environ["OMP_NUM_THREADS"] = "1"
13 | #####################################
14 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 
15 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 
16 | # the difference with standard cross-validation is negligible and will not affect our findings.
17 | #####################################
18 | def one_time(seed = 1, J = 1, 
19 |                      N = 100, T = 20, T_def = 0, 
20 |                      B = 100, Q = 10, 
21 |                      behav_def = 0, obs_def = "alt", 
22 |                      paras = [100, 3,20], weighted = True, include_reward = False,
23 |                      method = "QRF"):
24 |     """
25 |     include_reward: if include reward to our test
26 |     T_def:
27 |         0: length = T with always listen
28 |         1: truncation
29 |     T: the final length
30 |     """
31 |     ### generate data
32 |     fixed_state_comp = (obs_def == "null")
33 |     MDPs = simu_tiger(N = N, T = T, seed = seed,
34 |                        behav_def = behav_def, obs_def = obs_def,
35 |                        T_def = T_def, include_reward = include_reward, fixed_state_comp = fixed_state_comp)
36 |     T += 1 # due to the DGP
37 |     ### Preprocess
38 |     if fixed_state_comp:
39 |         MDPs, fixed_state_comp = MDPs
40 |     else:
41 |         fixed_state_comp = None
42 |     if T_def == 1:
43 |         MDPs = truncateMDP(MDPs,T)
44 |     if not include_reward:
45 |         MDPs = [a[:2] for a in MDPs]
46 |     N = len(MDPs)
47 |     ### Calculate
48 |     if paras == "CV_once":
49 |         return lam_est(data = MDPs, J = J, B = B, Q = Q, paras = paras, include_reward = include_reward,
50 |                   fixed_state_comp = fixed_state_comp, method = method)
51 |     return test(data = MDPs, J = J, B = B, Q = Q, paras = paras, #print_time = print_time,
52 |                 include_reward = include_reward, fixed_state_comp = fixed_state_comp, method = method)
53 | 
54 | 
55 | def one_setting_one_J(rep_times = 10, J = 1, 
56 |                       N = 100, T = 20, T_def = 0,
57 |                       B = 100, Q = 10,
58 |                       behav_def = 0, obs_def = "alt",
59 |                       include_reward = False, mute = True,
60 |                       paras = "CV_once", init_seed = 0, parallel = True, method = "QRF"):
61 |     if paras == "CV_once":
62 |         paras = one_time(seed = 0, J = J, 
63 |                          N = N, T = T, B = B, Q = Q,
64 |                         behav_def = behav_def, obs_def = obs_def,
65 |                         paras = "CV_once",
66 |                         T_def = T_def, include_reward = include_reward, method = method)
67 |         print("CV paras:", paras)
68 |     
69 |     def one_test(seed):
70 |         return one_time(seed = seed, J = J, 
71 |                      N = N, T = T, B = B, Q = Q,
72 |                     behav_def = behav_def, obs_def = obs_def,
73 |                     T_def = T_def, include_reward = include_reward,
74 |                      paras = paras, method = method)
75 |     p_values = parmap(one_test,range(init_seed, init_seed + rep_times), parallel)
76 |     if not mute:
77 |         print("rejection rates are:", rej_rate_quick(p_values))
78 |     return p_values
79 | print("Import DONE!")
80 | 
81 | print("n_cores = ", n_cores)
82 | 
83 | 
84 | for obs_def in ["null", "alt"]:
85 |     for N in [50, 100, 200]:
86 |         for J in range(1, 11):
87 |             p_values = one_setting_one_J(rep_times = 500, J = J, 
88 |                               N = N, T = 20, T_def = 0,
89 |                               B = 100, Q = 10,
90 |                               behav_def = 0, obs_def = obs_def,
91 |                               include_reward = False, mute = False,
92 |                               paras = "CV_once", init_seed = 0, parallel = n_cores, method = "QRF")
93 |             rej_rate_quick(p_values)


--------------------------------------------------------------------------------
/experiment_script/.ipynb_checkpoints/Tiger_simu-checkpoint.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import os, sys
 4 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
 5 | 
 6 | sys.path.insert(0, package_path + "/test_func")
 7 | from _core_test_fun import *
 8 | 
 9 | sys.path.insert(0, package_path + "/experiment_func")
10 | from _DGP_TIGER import *
11 | 
12 | os.environ["OMP_NUM_THREADS"] = "1"
13 | #####################################
14 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 
15 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 
16 | # the difference with standard cross-validation is negligible and will not affect our findings.
17 | #####################################
18 | def one_time(seed = 1, J = 1, 
19 |                      N = 100, T = 20, T_def = 0, 
20 |                      B = 100, Q = 10, 
21 |                      behav_def = 0, obs_def = "alt", 
22 |                      paras = [100, 3,20], weighted = True, include_reward = False,
23 |                      method = "QRF"):
24 |     """
25 |     include_reward: if include reward to our test
26 |     T_def:
27 |         0: length = T with always listen
28 |         1: truncation
29 |     T: the final length
30 |     """
31 |     ### generate data
32 |     fixed_state_comp = (obs_def == "null")
33 |     MDPs = simu_tiger(N = N, T = T, seed = seed,
34 |                        behav_def = behav_def, obs_def = obs_def,
35 |                        T_def = T_def, include_reward = include_reward, fixed_state_comp = fixed_state_comp)
36 |     T += 1 # due to the DGP
37 |     ### Preprocess
38 |     if fixed_state_comp:
39 |         MDPs, fixed_state_comp = MDPs
40 |     else:
41 |         fixed_state_comp = None
42 |     if T_def == 1:
43 |         MDPs = truncateMDP(MDPs,T)
44 |     if not include_reward:
45 |         MDPs = [a[:2] for a in MDPs]
46 |     N = len(MDPs)
47 |     ### Calculate
48 |     if paras == "CV_once":
49 |         return lam_est(data = MDPs, J = J, B = B, Q = Q, paras = paras, include_reward = include_reward,
50 |                   fixed_state_comp = fixed_state_comp, method = method)
51 |     return test(data = MDPs, J = J, B = B, Q = Q, paras = paras, #print_time = print_time,
52 |                 include_reward = include_reward, fixed_state_comp = fixed_state_comp, method = method)
53 | 
54 | 
55 | def one_setting_one_J(rep_times = 10, J = 1, 
56 |                       N = 100, T = 20, T_def = 0,
57 |                       B = 100, Q = 10,
58 |                       behav_def = 0, obs_def = "alt",
59 |                       include_reward = False, mute = True,
60 |                       paras = "CV_once", init_seed = 0, parallel = True, method = "QRF"):
61 |     if paras == "CV_once":
62 |         paras = one_time(seed = 0, J = J, 
63 |                          N = N, T = T, B = B, Q = Q,
64 |                         behav_def = behav_def, obs_def = obs_def,
65 |                         paras = "CV_once",
66 |                         T_def = T_def, include_reward = include_reward, method = method)
67 |         print("CV paras:", paras)
68 |     
69 |     def one_test(seed):
70 |         return one_time(seed = seed, J = J, 
71 |                      N = N, T = T, B = B, Q = Q,
72 |                     behav_def = behav_def, obs_def = obs_def,
73 |                     T_def = T_def, include_reward = include_reward,
74 |                      paras = paras, method = method)
75 |     p_values = parmap(one_test,range(init_seed, init_seed + rep_times), parallel)
76 |     if not mute:
77 |         print("rejection rates are:", rej_rate_quick(p_values))
78 |     return p_values
79 | print("Import DONE!")
80 | 
81 | print("n_cores = ", n_cores)
82 | 
83 | 
84 | for obs_def in ["null", "alt"]:
85 |     for N in [50, 100, 200]:
86 |         for J in range(1, 11):
87 |             p_values = one_setting_one_J(rep_times = 500, J = J, 
88 |                               N = N, T = 20, T_def = 0,
89 |                               B = 100, Q = 10,
90 |                               behav_def = 0, obs_def = obs_def,
91 |                               include_reward = False, mute = False,
92 |                               paras = "CV_once", init_seed = 0, parallel = n_cores, method = "QRF")
93 |             rej_rate_quick(p_values)


--------------------------------------------------------------------------------
/experiment_script/Ohio_simu_seq_lags.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os, sys
  4 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  5 | 
  6 | sys.path.insert(0, package_path + "/test_func")
  7 | from _core_test_fun import *
  8 | 
  9 | sys.path.insert(0, package_path + "/experiment_func")
 10 | from _DGP_Ohio import *
 11 | from _utility_RL import *
 12 | 
 13 | os.environ["OMP_NUM_THREADS"] = "1"
 14 | 
 15 | #####################################
 16 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 
 17 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 
 18 | # the difference with standard cross-validation is negligible and will not affect our findings.
 19 | #####################################
 20 | 
 21 | def one_time_seq(seed = 1, J_upper = 10, alpha_range = [0.02, 0.01, 0.005], 
 22 |                      N = 10, T = 7 * 8 * 24, B = 100, Q = 10, sd_G = 3,
 23 |                      para_ranges = None, n_trees = 100, 
 24 |                      ):
 25 |     ## generate data
 26 |     data = simu_Ohio(T, N, seed = seed, sd_G = sd_G)
 27 |     data = burn_in(data, first_T = 10)
 28 |     T -= 10
 29 |     # for value evaluation [we will use the original transition], 
 30 |     # do not use normalized data[will not be dominated like testing]
 31 |     value_data = data
 32 |     testing_data = [a[:2] for a in normalize(data)]    
 33 |     time = now()
 34 |     p_values = []
 35 |     for J in range(1, J_upper + 1):
 36 |         p_value = test(data = testing_data, J = J, B = B, Q = Q, paras = para_ranges[J - 1], 
 37 |                        n_trees = n_trees, print_time = False, method = "QRF")
 38 |         p_values.append(p_value)
 39 |         if p_value > alpha_range[0]: 
 40 |             break
 41 |     lags = []
 42 |     for alpha in alpha_range:
 43 |         for i in range(J_upper):
 44 |             if p_values[i] > alpha:
 45 |                 lags.append(i + 1)
 46 |                 break
 47 |         if i == J_upper - 1: 
 48 |             lags.append(J_upper)
 49 |             
 50 |     if seed % 50 == 0:
 51 |         print("** testing time:", now() - time, " for seed = ", seed,"**"); time = now()
 52 |     return [lags, p_values]
 53 | 
 54 | def one_setting_seq(rep_times = 500, N = 10, T = 24 * 56, B = 100, Q = 10, sd_G = 3, 
 55 |                       n_trees = 100, alpha_range = [0.02, 0.01, 0.005], 
 56 |                       init_seed = 0,
 57 |                       file = None, J_low = 1, J_upper = 10, 
 58 |                       parallel = 10):
 59 |     # CV_paras for each J
 60 |     para_ranges = []
 61 |     data = simu_Ohio(T, N, seed = 0, sd_G = sd_G)
 62 |     data = burn_in(data, first_T = 10)
 63 |     T -= 10
 64 |     testing_data = [a[:2] for a in normalize(data)]
 65 |     for J in range(1, J_upper + 1):
 66 |         paras = lam_est(data = testing_data, J = J, B = B, Q = Q, paras = "CV_once", n_trees = n_trees, method = "QRF")
 67 |         para_ranges.append(paras)
 68 |     def one_time(seed):
 69 |         r = one_time_seq(seed = seed, J_upper = J_upper, alpha_range = alpha_range, 
 70 |                      N = N, T = T, B = B, Q = Q, sd_G = sd_G,
 71 |                      para_ranges = para_ranges, n_trees = n_trees)
 72 |         if seed % 50 == 0:
 73 |             print(seed, "Done!\n")
 74 |         return r
 75 |     
 76 |     r = parmap(one_time, range(init_seed, init_seed + rep_times), parallel)
 77 |     # different alphas
 78 |     lagss, p_valuess = [a[0] for a in r], [a[1] for a in r]
 79 |     print(lagss, DASH, DASH, p_valuess, DASH)
 80 |     lags_each_alpha = []
 81 |     for i in range(len(alpha_range)):
 82 |         lags_each_alpha.append([a[i] for a in lagss])
 83 |     r = [lags_each_alpha, p_valuess]
 84 |     if file is not None:
 85 |         print(DASH + str([N, sd_G]), file = file)
 86 |         for i in range(4):
 87 |             print(str(r[i]) + dasH, file = file)
 88 |     return r
 89 | print("import DONE!", "num of cores:", n_cores, DASH)
 90 | 
 91 | #####################################
 92 | path = "Ohio_seq.txt"
 93 | file = open(path, 'w')
 94 | rr = []
 95 | times = 500
 96 | sd_G = 3
 97 | for N in [10, 15, 20]:
 98 |     print([N, sd_G],": \n")
 99 |     r = one_setting_seq(rep_times = times, N = N, T = 24 * 7 * 8, sd_G = 3, 
100 |                   n_trees = 100, B = 100, Q = 10, alpha_range = [0.01, 0.005], 
101 |                   init_seed = 0, 
102 |                   file = file, J_low = 1, J_upper = 10, 
103 |                   parallel = n_cores)
104 |     rr.append(r)
105 | file.close()
106 | 
107 | 
108 | with open("Ohio_seq.list", 'wb') as file:
109 |     pickle.dump(rr, file)
110 | file.close()
111 | 


--------------------------------------------------------------------------------
/experiment_script/.ipynb_checkpoints/Ohio_simu_seq_lags-checkpoint.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os, sys
  4 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  5 | 
  6 | sys.path.insert(0, package_path + "/test_func")
  7 | from _core_test_fun import *
  8 | 
  9 | sys.path.insert(0, package_path + "/experiment_func")
 10 | from _DGP_Ohio import *
 11 | from _utility_RL import *
 12 | 
 13 | os.environ["OMP_NUM_THREADS"] = "1"
 14 | 
 15 | #####################################
 16 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 
 17 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 
 18 | # the difference with standard cross-validation is negligible and will not affect our findings.
 19 | #####################################
 20 | 
 21 | def one_time_seq(seed = 1, J_upper = 10, alpha_range = [0.02, 0.01, 0.005], 
 22 |                      N = 10, T = 7 * 8 * 24, B = 100, Q = 10, sd_G = 3,
 23 |                      para_ranges = None, n_trees = 100, 
 24 |                      ):
 25 |     ## generate data
 26 |     data = simu_Ohio(T, N, seed = seed, sd_G = sd_G)
 27 |     data = burn_in(data, first_T = 10)
 28 |     T -= 10
 29 |     # for value evaluation [we will use the original transition], 
 30 |     # do not use normalized data[will not be dominated like testing]
 31 |     value_data = data
 32 |     testing_data = [a[:2] for a in normalize(data)]    
 33 |     time = now()
 34 |     p_values = []
 35 |     for J in range(1, J_upper + 1):
 36 |         p_value = test(data = testing_data, J = J, B = B, Q = Q, paras = para_ranges[J - 1], 
 37 |                        n_trees = n_trees, print_time = False, method = "QRF")
 38 |         p_values.append(p_value)
 39 |         if p_value > alpha_range[0]: 
 40 |             break
 41 |     lags = []
 42 |     for alpha in alpha_range:
 43 |         for i in range(J_upper):
 44 |             if p_values[i] > alpha:
 45 |                 lags.append(i + 1)
 46 |                 break
 47 |         if i == J_upper - 1: 
 48 |             lags.append(J_upper)
 49 |             
 50 |     if seed % 50 == 0:
 51 |         print("** testing time:", now() - time, " for seed = ", seed,"**"); time = now()
 52 |     return [lags, p_values]
 53 | 
 54 | def one_setting_seq(rep_times = 500, N = 10, T = 24 * 56, B = 100, Q = 10, sd_G = 3, 
 55 |                       n_trees = 100, alpha_range = [0.02, 0.01, 0.005], 
 56 |                       init_seed = 0,
 57 |                       file = None, J_low = 1, J_upper = 10, 
 58 |                       parallel = 10):
 59 |     # CV_paras for each J
 60 |     para_ranges = []
 61 |     data = simu_Ohio(T, N, seed = 0, sd_G = sd_G)
 62 |     data = burn_in(data, first_T = 10)
 63 |     T -= 10
 64 |     testing_data = [a[:2] for a in normalize(data)]
 65 |     for J in range(1, J_upper + 1):
 66 |         paras = lam_est(data = testing_data, J = J, B = B, Q = Q, paras = "CV_once", n_trees = n_trees, method = "QRF")
 67 |         para_ranges.append(paras)
 68 |     def one_time(seed):
 69 |         r = one_time_seq(seed = seed, J_upper = J_upper, alpha_range = alpha_range, 
 70 |                      N = N, T = T, B = B, Q = Q, sd_G = sd_G,
 71 |                      para_ranges = para_ranges, n_trees = n_trees)
 72 |         if seed % 50 == 0:
 73 |             print(seed, "Done!\n")
 74 |         return r
 75 |     
 76 |     r = parmap(one_time, range(init_seed, init_seed + rep_times), parallel)
 77 |     # different alphas
 78 |     lagss, p_valuess = [a[0] for a in r], [a[1] for a in r]
 79 |     print(lagss, DASH, DASH, p_valuess, DASH)
 80 |     lags_each_alpha = []
 81 |     for i in range(len(alpha_range)):
 82 |         lags_each_alpha.append([a[i] for a in lagss])
 83 |     r = [lags_each_alpha, p_valuess]
 84 |     if file is not None:
 85 |         print(DASH + str([N, sd_G]), file = file)
 86 |         for i in range(4):
 87 |             print(str(r[i]) + dasH, file = file)
 88 |     return r
 89 | print("import DONE!", "num of cores:", n_cores, DASH)
 90 | 
 91 | #####################################
 92 | path = "Ohio_seq.txt"
 93 | file = open(path, 'w')
 94 | rr = []
 95 | times = 500
 96 | sd_G = 3
 97 | for N in [10, 15, 20]:
 98 |     print([N, sd_G],": \n")
 99 |     r = one_setting_seq(rep_times = times, N = N, T = 24 * 7 * 8, sd_G = 3, 
100 |                   n_trees = 100, B = 100, Q = 10, alpha_range = [0.01, 0.005], 
101 |                   init_seed = 0, 
102 |                   file = file, J_low = 1, J_upper = 10, 
103 |                   parallel = n_cores)
104 |     rr.append(r)
105 | file.close()
106 | 
107 | 
108 | with open("Ohio_seq.list", 'wb') as file:
109 |     pickle.dump(rr, file)
110 | file.close()
111 | 


--------------------------------------------------------------------------------
/test_func/.ipynb_checkpoints/_DGP_TIGER-checkpoint.py:
--------------------------------------------------------------------------------
  1 | #%% packages
  2 | #%% 
  3 | from ._QRF import *
  4 | from ._uti_basic import *
  5 | from ._utility import *
  6 | #############################################################################
  7 | #############################################################################
  8 | 
  9 | def list2Matrix(List):
 10 |     # return a n * 1 matrix
 11 |     return np.array(np.expand_dims(np.array(List),1))
 12 | 
 13 | #%%
 14 | 
 15 | def TIGER_dynamics(state, action):
 16 |     p_correct = 0.7 # larger -> more POMDP
 17 |     # obs -> action -> obs, reward
 18 |     if action == 0: # listen
 19 |         p = rbin(1, p_correct)
 20 |         obs = p * state +  (1-p) * (0-state)
 21 |         reward = -1
 22 |     else: # action = -1 or 1
 23 |         if action == state:
 24 |             reward = -100
 25 |         else: # no tiger door
 26 |             reward = 10
 27 |         obs = 3 # end status
 28 |     return reward, obs
 29 | 
 30 | 
 31 | def TIGER_choose_action(obs, behav_def = 0):
 32 |     """
 33 |     behav_def:
 34 |         0. always listen
 35 |         1. random
 36 |         2. adaptive
 37 |     """
 38 |     p_listen = 0.9 # for random policy
 39 |     T_must_obs = 10 # for adaptive plicy
 40 |     
 41 |     if behav_def == 0:
 42 |         return 0 # always listen
 43 |     elif behav_def == 1:
 44 |         if rbin(1, p_listen):
 45 |             return 0
 46 |         elif rbin(1, 0.5):
 47 |             return 1
 48 |         else:
 49 |             return -1
 50 |     elif behav_def == 2:
 51 |         """ based on obs, Chengchun's approach
 52 |         1. if n <= T_must_obs: obs
 53 |         2. else: n > T_must_obs 时 p_listen = (1- max(p_left,p_right)) * 2, o.w. open the door accourding to the prob.
 54 |         """
 55 |         if obs[1] <= T_must_obs:
 56 |             return 0
 57 |         else:
 58 |             p_l = obs[0]
 59 |             p_listen = (1- max(p_l,1 - p_l)) * 2
 60 |             if rbin(1, p_listen):
 61 |                 return 0
 62 |             elif rbin(1, p_l):
 63 |                 return -1
 64 |             else:
 65 |                 return 1
 66 |         
 67 | def simu_tiger(N = 1, T = 20, seed = 1, behav_def = 0, obs_def = "alt", T_def = 0, include_reward = True, fixed_state_comp = False):
 68 |     """
 69 |     T: spycify the game here
 70 |     A: "listen"/ "open_l" / "open_r"  ---- 0 / -1 / +1
 71 |     State:  "l" / "r" : -1 / +1
 72 |     Obervation: hear "l" / "r"
 73 |     Reward: -1, 10, - 100
 74 |     Returns: a list (len = N) of [$O_{T*dim_O},A_{T*1}$] or [O,A,R]
 75 |     
 76 |     behav_def:
 77 |         0. always listen
 78 |         1. random
 79 |         2. adaptive
 80 |     obs_def:
 81 |         "alt": [1,-1]
 82 |         1: [p]
 83 |         2: [p,n]
 84 |     T_def:
 85 |         0: length = T with always listen
 86 |         1: truncation
 87 |     """   
 88 |     # gamma = .9 
 89 |     
 90 |     MDPs = []
 91 |     rseed(seed); npseed(seed)
 92 |     init_state = rbin(1, .5, N) * 2 - 1
 93 |     true_states = []
 94 |     
 95 |     if T_def == 1:
 96 |         def stop(obs,t):
 97 |             return obs != 3
 98 |     else:
 99 |         def stop(obs,t):
100 |             return t < T
101 |     
102 |     for i in range(N):
103 |         ## Initialization
104 |         state = init_state[i]
105 |         obs, obs_hist = 0, [0]
106 |         A = []
107 |         R = [0] # for alignment purpose
108 |         O, O_1 = [[0.5, 0]], [0.5]  
109 |         t, left_cnt = 0, 0
110 |         
111 |         while(stop(obs,t)): # not in the Terminal state
112 |             ## choose actiom, receive reward and state trainsition [observations]
113 |             action = TIGER_choose_action(obs = O[-1], behav_def = behav_def) # obs = [p,n], old version
114 |             reward, obs = TIGER_dynamics(state,action)
115 |             
116 |             ## record
117 |             left_cnt += (obs == -1)
118 |             t += 1
119 |             # for obs_def_0
120 |             obs_hist.append(obs)
121 |             # for obs_def_1
122 |             O_1.append(left_cnt/t)
123 |             # for action choosing and obs_def_2
124 |             if obs == 3:
125 |                 O.append([left_cnt/(t-1),t])
126 |             else:
127 |                 O.append([left_cnt/t,t])  
128 |             A.append(action)
129 |             R.append(reward)
130 |         A.append(3)
131 |         
132 |         if obs_def == "alt":
133 |             O =  list2Matrix(obs_hist)
134 |         elif obs_def == "null":
135 | #             O =  list2Matrix(obs_hist)        
136 |             if fixed_state_comp:
137 |                 O = list2Matrix(obs_hist)
138 |                 true_states.append(state)
139 |             else:
140 |                 O = np.array([[a,state] for a in obs_hist])
141 | #             print(O.shape)
142 |         elif obs_def == 1:
143 |             O = list2Matrix(O_1)
144 |         elif obs_def == 2:
145 |             O = np.array(O)
146 |         if include_reward:
147 |             MDP = [O, list2Matrix(A), list2Matrix(R)]
148 |         else:
149 |             MDP = [O, list2Matrix(A)]
150 |         MDPs.append(MDP)
151 |     if fixed_state_comp:
152 |         return [MDPs,true_states]
153 |     return MDPs
154 | 
155 | 


--------------------------------------------------------------------------------
/experiment_func/_DGP_TIGER.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | #%% packages
  5 | #%% 
  6 | import os, sys
  7 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  8 | 
  9 | sys.path.insert(0, package_path + "/test_func")
 10 | from _core_test_fun import *
 11 | #############################################################################
 12 | #############################################################################
 13 | 
 14 | def list2Matrix(List):
 15 |     # return a n * 1 matrix
 16 |     return np.array(np.expand_dims(np.array(List),1))
 17 | 
 18 | #%%
 19 | 
 20 | def TIGER_dynamics(state, action):
 21 |     p_correct = 0.7 # larger -> more POMDP
 22 |     # obs -> action -> obs, reward
 23 |     if action == 0: # listen
 24 |         p = rbin(1, p_correct)
 25 |         obs = p * state +  (1-p) * (0-state)
 26 |         reward = -1
 27 |     else: # action = -1 or 1
 28 |         if action == state:
 29 |             reward = -100
 30 |         else: # no tiger door
 31 |             reward = 10
 32 |         obs = 3 # end status
 33 |     return reward, obs
 34 | 
 35 | 
 36 | def TIGER_choose_action(obs, behav_def = 0):
 37 |     """
 38 |     behav_def:
 39 |         0. always listen
 40 |         1. random
 41 |         2. adaptive
 42 |     """
 43 |     p_listen = 0.9 # for random policy
 44 |     T_must_obs = 10 # for adaptive plicy
 45 |     
 46 |     if behav_def == 0:
 47 |         return 0 # always listen
 48 |     elif behav_def == 1:
 49 |         if rbin(1, p_listen):
 50 |             return 0
 51 |         elif rbin(1, 0.5):
 52 |             return 1
 53 |         else:
 54 |             return -1
 55 |     elif behav_def == 2:
 56 |         """ based on obs, Chengchun's approach
 57 |         1. if n <= T_must_obs: obs
 58 |         2. else: n > T_must_obs 时 p_listen = (1- max(p_left,p_right)) * 2, o.w. open the door accourding to the prob.
 59 |         """
 60 |         if obs[1] <= T_must_obs:
 61 |             return 0
 62 |         else:
 63 |             p_l = obs[0]
 64 |             p_listen = (1- max(p_l,1 - p_l)) * 2
 65 |             if rbin(1, p_listen):
 66 |                 return 0
 67 |             elif rbin(1, p_l):
 68 |                 return -1
 69 |             else:
 70 |                 return 1
 71 |         
 72 | def simu_tiger(N = 1, T = 20, seed = 1, behav_def = 0, obs_def = "alt", T_def = 0, include_reward = True, fixed_state_comp = False):
 73 |     """
 74 |     T: spycify the game here
 75 |     A: "listen"/ "open_l" / "open_r"  ---- 0 / -1 / +1
 76 |     State:  "l" / "r" : -1 / +1
 77 |     Obervation: hear "l" / "r"
 78 |     Reward: -1, 10, - 100
 79 |     Returns: a list (len = N) of [$O_{T*dim_O},A_{T*1}$] or [O,A,R]
 80 |     
 81 |     behav_def:
 82 |         0. always listen
 83 |         1. random
 84 |         2. adaptive
 85 |     obs_def:
 86 |         "alt": [1,-1]
 87 |         1: [p]
 88 |         2: [p,n]
 89 |     T_def:
 90 |         0: length = T with always listen
 91 |         1: truncation
 92 |     """   
 93 |     # gamma = .9 
 94 |     
 95 |     MDPs = []
 96 |     rseed(seed); npseed(seed)
 97 |     init_state = rbin(1, .5, N) * 2 - 1
 98 |     true_states = []
 99 |     
100 |     if T_def == 1:
101 |         def stop(obs,t):
102 |             return obs != 3
103 |     else:
104 |         def stop(obs,t):
105 |             return t < T
106 |     
107 |     for i in range(N):
108 |         ## Initialization
109 |         state = init_state[i]
110 |         obs, obs_hist = 0, [0]
111 |         A = []
112 |         R = [0] # for alignment purpose
113 |         O, O_1 = [[0.5, 0]], [0.5]  
114 |         t, left_cnt = 0, 0
115 |         
116 |         while(stop(obs,t)): # not in the Terminal state
117 |             ## choose actiom, receive reward and state trainsition [observations]
118 |             action = TIGER_choose_action(obs = O[-1], behav_def = behav_def) # obs = [p,n], old version
119 |             reward, obs = TIGER_dynamics(state,action)
120 |             
121 |             ## record
122 |             left_cnt += (obs == -1)
123 |             t += 1
124 |             # for obs_def_0
125 |             obs_hist.append(obs)
126 |             # for obs_def_1
127 |             O_1.append(left_cnt/t)
128 |             # for action choosing and obs_def_2
129 |             if obs == 3:
130 |                 O.append([left_cnt/(t-1),t])
131 |             else:
132 |                 O.append([left_cnt/t,t])  
133 |             A.append(action)
134 |             R.append(reward)
135 |         A.append(3)
136 |         
137 |         if obs_def == "alt":
138 |             O =  list2Matrix(obs_hist)
139 |         elif obs_def == "null":
140 | #             O =  list2Matrix(obs_hist)        
141 |             if fixed_state_comp:
142 |                 O = list2Matrix(obs_hist)
143 |                 true_states.append(state)
144 |             else:
145 |                 O = np.array([[a,state] for a in obs_hist])
146 | #             print(O.shape)
147 |         elif obs_def == 1:
148 |             O = list2Matrix(O_1)
149 |         elif obs_def == 2:
150 |             O = np.array(O)
151 |         if include_reward:
152 |             MDP = [O, list2Matrix(A), list2Matrix(R)]
153 |         else:
154 |             MDP = [O, list2Matrix(A)]
155 |         MDPs.append(MDP)
156 |     if fixed_state_comp:
157 |         return [MDPs,true_states]
158 |     return MDPs
159 | 
160 | 


--------------------------------------------------------------------------------
/experiment_func/.ipynb_checkpoints/_DGP_TIGER-checkpoint.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | #%% packages
  5 | #%% 
  6 | import os, sys
  7 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  8 | 
  9 | sys.path.insert(0, package_path + "/test_func")
 10 | from _core_test_fun import *
 11 | #############################################################################
 12 | #############################################################################
 13 | 
 14 | def list2Matrix(List):
 15 |     # return a n * 1 matrix
 16 |     return np.array(np.expand_dims(np.array(List),1))
 17 | 
 18 | #%%
 19 | 
 20 | def TIGER_dynamics(state, action):
 21 |     p_correct = 0.7 # larger -> more POMDP
 22 |     # obs -> action -> obs, reward
 23 |     if action == 0: # listen
 24 |         p = rbin(1, p_correct)
 25 |         obs = p * state +  (1-p) * (0-state)
 26 |         reward = -1
 27 |     else: # action = -1 or 1
 28 |         if action == state:
 29 |             reward = -100
 30 |         else: # no tiger door
 31 |             reward = 10
 32 |         obs = 3 # end status
 33 |     return reward, obs
 34 | 
 35 | 
 36 | def TIGER_choose_action(obs, behav_def = 0):
 37 |     """
 38 |     behav_def:
 39 |         0. always listen
 40 |         1. random
 41 |         2. adaptive
 42 |     """
 43 |     p_listen = 0.9 # for random policy
 44 |     T_must_obs = 10 # for adaptive plicy
 45 |     
 46 |     if behav_def == 0:
 47 |         return 0 # always listen
 48 |     elif behav_def == 1:
 49 |         if rbin(1, p_listen):
 50 |             return 0
 51 |         elif rbin(1, 0.5):
 52 |             return 1
 53 |         else:
 54 |             return -1
 55 |     elif behav_def == 2:
 56 |         """ based on obs, Chengchun's approach
 57 |         1. if n <= T_must_obs: obs
 58 |         2. else: n > T_must_obs 时 p_listen = (1- max(p_left,p_right)) * 2, o.w. open the door accourding to the prob.
 59 |         """
 60 |         if obs[1] <= T_must_obs:
 61 |             return 0
 62 |         else:
 63 |             p_l = obs[0]
 64 |             p_listen = (1- max(p_l,1 - p_l)) * 2
 65 |             if rbin(1, p_listen):
 66 |                 return 0
 67 |             elif rbin(1, p_l):
 68 |                 return -1
 69 |             else:
 70 |                 return 1
 71 |         
 72 | def simu_tiger(N = 1, T = 20, seed = 1, behav_def = 0, obs_def = "alt", T_def = 0, include_reward = True, fixed_state_comp = False):
 73 |     """
 74 |     T: spycify the game here
 75 |     A: "listen"/ "open_l" / "open_r"  ---- 0 / -1 / +1
 76 |     State:  "l" / "r" : -1 / +1
 77 |     Obervation: hear "l" / "r"
 78 |     Reward: -1, 10, - 100
 79 |     Returns: a list (len = N) of [$O_{T*dim_O},A_{T*1}$] or [O,A,R]
 80 |     
 81 |     behav_def:
 82 |         0. always listen
 83 |         1. random
 84 |         2. adaptive
 85 |     obs_def:
 86 |         "alt": [1,-1]
 87 |         1: [p]
 88 |         2: [p,n]
 89 |     T_def:
 90 |         0: length = T with always listen
 91 |         1: truncation
 92 |     """   
 93 |     # gamma = .9 
 94 |     
 95 |     MDPs = []
 96 |     rseed(seed); npseed(seed)
 97 |     init_state = rbin(1, .5, N) * 2 - 1
 98 |     true_states = []
 99 |     
100 |     if T_def == 1:
101 |         def stop(obs,t):
102 |             return obs != 3
103 |     else:
104 |         def stop(obs,t):
105 |             return t < T
106 |     
107 |     for i in range(N):
108 |         ## Initialization
109 |         state = init_state[i]
110 |         obs, obs_hist = 0, [0]
111 |         A = []
112 |         R = [0] # for alignment purpose
113 |         O, O_1 = [[0.5, 0]], [0.5]  
114 |         t, left_cnt = 0, 0
115 |         
116 |         while(stop(obs,t)): # not in the Terminal state
117 |             ## choose actiom, receive reward and state trainsition [observations]
118 |             action = TIGER_choose_action(obs = O[-1], behav_def = behav_def) # obs = [p,n], old version
119 |             reward, obs = TIGER_dynamics(state,action)
120 |             
121 |             ## record
122 |             left_cnt += (obs == -1)
123 |             t += 1
124 |             # for obs_def_0
125 |             obs_hist.append(obs)
126 |             # for obs_def_1
127 |             O_1.append(left_cnt/t)
128 |             # for action choosing and obs_def_2
129 |             if obs == 3:
130 |                 O.append([left_cnt/(t-1),t])
131 |             else:
132 |                 O.append([left_cnt/t,t])  
133 |             A.append(action)
134 |             R.append(reward)
135 |         A.append(3)
136 |         
137 |         if obs_def == "alt":
138 |             O =  list2Matrix(obs_hist)
139 |         elif obs_def == "null":
140 | #             O =  list2Matrix(obs_hist)        
141 |             if fixed_state_comp:
142 |                 O = list2Matrix(obs_hist)
143 |                 true_states.append(state)
144 |             else:
145 |                 O = np.array([[a,state] for a in obs_hist])
146 | #             print(O.shape)
147 |         elif obs_def == 1:
148 |             O = list2Matrix(O_1)
149 |         elif obs_def == 2:
150 |             O = np.array(O)
151 |         if include_reward:
152 |             MDP = [O, list2Matrix(A), list2Matrix(R)]
153 |         else:
154 |             MDP = [O, list2Matrix(A)]
155 |         MDPs.append(MDP)
156 |     if fixed_state_comp:
157 |         return [MDPs,true_states]
158 |     return MDPs
159 | 
160 | 


--------------------------------------------------------------------------------
/test_func/.ipynb_checkpoints/_Funcs_Real_Ohio-checkpoint.py:
--------------------------------------------------------------------------------
  1 | #############################################################################
  2 | #%%
  3 | from ._utility import * 
  4 | from ._QRF import * 
  5 | from ._core_test_fun import *
  6 | from ._DGP_Ohio import *
  7 | from ._uti_basic import * 
  8 | from ._utility_RL import * 
  9 | os.environ["OMP_NUM_THREADS"] = "1"
 10 | n_cores = multiprocessing.cpu_count()
 11 | #############################################################################
 12 | #%% Data
 13 | import sys, os
 14 | path = os.getcwd() + "/code_data/Data_Ohio.csv"
 15 | data = pd.read_csv(path, header = 0)
 16 | data0 = np.array(data)
 17 | data0 = data0[:,1:] # no row index
 18 | #############################################################################
 19 | 
 20 | def generate_initial_states(N_init = 100, J_upper = 5, seed = 0):
 21 |     """generate initial states for comparison of values in the Ohio real data.
 22 |     """
 23 |     init_obs, init_A = simu_Ohio(T = J_upper, N = N_init, 
 24 |                                  seed = seed, sd_G = 3, matrix_output = True, is_real = True)
 25 |     init_A = init_A.reshape(1, J_upper, N_init)
 26 |     initial_states = np.concatenate([init_obs, init_A], 0)
 27 |     initial_states = initial_states.reshape((4 * J_upper, N_init), order = "F")
 28 |     initial_states = initial_states[:(J_upper * 4 - 1), :] 
 29 |     return initial_states.T
 30 |     
 31 |     
 32 | def process_data_Nsplit(index, T):
 33 |     """
 34 |     data: a list(len-N) of [T*3 states, T * 1 actions T * 1 rewards]
 35 |     T: length = 1100
 36 |     """
 37 |     data, J_data = [], []
 38 |     for i in index:
 39 |         temp = data0[T * i : T * (i+1)].copy()
 40 |         states = temp[:, :3]
 41 |         actions = temp[:, 3].reshape(-1, 1)
 42 |         rewards = np.roll(apply_v(Glucose2Reward, states[:, 0]), shift = -1).reshape(-1, 1) 
 43 |         J_data.append([states, actions])
 44 |         data.append([states, actions, rewards])
 45 |     return data.copy(), normalize(J_data.copy())
 46 | 
 47 | 
 48 | #############################################################################
 49 | #############################################################################
 50 | #%%
 51 |  
 52 | def real_ohio_Nsplit(J_upper = 10, gamma = 0.9, gamma_eval = 0.9, alpha = 0.02, RF_paras = "CV", n_trees = 100, 
 53 |                      N_init = 100, seed = 1, n_set = 20,
 54 |                      parallel = False, T_unify = 1100, threshold = 1e-4):
 55 |     a = now()
 56 |     init_states = generate_initial_states(N_init = N_init, J_upper = J_upper, seed = seed) # N * (J_upper * 4 - 1)
 57 |         
 58 |     arr = [i for i in range(6)]
 59 |     rseed(seed); npseed(seed)
 60 |     all_possible_train_set = permutation(list(combinations(arr, 3)) )
 61 |     def one_time(i):
 62 |         time = now()
 63 |         train_set = all_possible_train_set[i]
 64 |         eval_set = set([i for i in range(6)]) - set(train_set)   
 65 |         train_data, J_data = process_data_Nsplit(train_set, T = T_unify)
 66 |         eval_data, _ = process_data_Nsplit(eval_set, T = T_unify) 
 67 |         
 68 |         ### Given a J, get the optimal policy and evaluate its performance
 69 |         eval_PatternSets = MDP2Trans(MDPs = eval_data, J = J_upper, action_in_states = True)
 70 |         values = []        
 71 |         for J in range(1, J_upper + 1):
 72 |             ### Learn the optimal policies
 73 |             Learning_PatternSets = MDP2Trans(MDPs = train_data, J = J, action_in_states = True)
 74 |             Q_func = NFQ(PatternSets = Learning_PatternSets, gamma = gamma,
 75 |                              RF_paras = RF_paras, n_trees = n_trees, threshold = threshold)
 76 |             ### Evaluate the policy: learned Q and observed trajectories
 77 |             V_func = FQE(PatternSets = eval_PatternSets, Q_func = Q_func, J = J,
 78 |                                      gamma = gamma_eval, RF_paras = RF_paras, n_trees = n_trees,
 79 |                                           threshold = threshold)
 80 |             
 81 |             ### Evaluate using init states
 82 |             values_integration = V_func(init_states)
 83 |             value = np.round(np.mean(values_integration), 4)
 84 |             values.append(value)
 85 |         ### Store results
 86 |         print("The ", i + 1, "round ends with Time cost:", np.round(now() - time,2), "\n")
 87 |         
 88 |         return values
 89 |     
 90 |     r_values = parmap(one_time, range(n_set))
 91 |     r_values = np.array(r_values)
 92 | 
 93 |     print("mean:", np.mean(r_values, 0), "\n", "std:", np.std(r_values, 0))
 94 |     print("time cost: ", now() - a)
 95 |     return r_values
 96 | 
 97 | #############################################################################
 98 | #%% Decide the order with all data
 99 | 
100 | def decide_J(data, J_range, paras = "CV", n_trees = 100, T = 1100):
101 |     data_J = []
102 |     for i in range(6):
103 |         temp = data[T * i : T * (i + 1)]
104 |         temp = [temp[:, :3], temp[:, 3].reshape(-1, 1)]
105 |         data_J.append(temp)
106 |     data_J = normalize(data_J)
107 |     r = selectOrder(data_J, B = 200, Q = 10, L = 3, alpha = 0.1, K = 10, paras="CV", n_trees = n_trees)
108 |     return r
109 | 
110 | # def decide_J(data, J_range, paras = "CV", n_trees = 100, T = 1100):
111 | #     data_J = []
112 | #     for i in range(6):
113 | #         temp = data[T * i : T * (i + 1)]
114 | #         temp = [temp[:, :3], temp[:, 3].reshape(-1, 1)]
115 | #         data_J.append(temp)
116 | #     data_J = normalize(data_J)
117 | #     def test_one_J(J):
118 | #         return test(data_J, J = J, B = 200, Q = 10, paras = paras, n_trees = n_trees)
119 | #     r = parmap(test_one_J, J_range, n_cores)
120 | #     print(r)
121 | #     return r
122 | 
123 | 


--------------------------------------------------------------------------------
/experiment_script/Ohio_simu_values.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os, sys
  4 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  5 | 
  6 | sys.path.insert(0, package_path + "/test_func")
  7 | from _core_test_fun import *
  8 | 
  9 | sys.path.insert(0, package_path + "/experiment_func")
 10 | from _DGP_Ohio import *
 11 | from _utility_RL import *
 12 | 
 13 | os.environ["OMP_NUM_THREADS"] = "1"
 14 | #####################################
 15 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 
 16 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 
 17 | # the difference with standard cross-validation is negligible and will not affect our findings.
 18 | #####################################
 19 | def one_time_value_only(seed = 1, J = 1, J_upper = 10,
 20 |                      N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3,
 21 |                      gamma_NFQ = 0.9, 
 22 |                      T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4,
 23 |                      paras = "CV_once", n_trees = 100,
 24 |                      first_T = 10, true_lag = 4):
 25 |     ## generate data
 26 |     a = now()
 27 |     data = simu_Ohio(T, N, seed = seed, sd_G = sd_G)
 28 |     data = burn_in(data,first_T)
 29 |     T -= first_T
 30 |     value_data = data
 31 |     testing_data = [a[:2] for a in normalize(data)]
 32 |     ## this one time is used to get paras
 33 |     if paras == "CV_once": 
 34 |         return lam_est(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees)
 35 |     time = now()
 36 |     Learning_PatternSets = MDP2Trans(MDPs = value_data, J = J, action_in_states = True)
 37 |     Q_func = NFQ(PatternSets = Learning_PatternSets, gamma = gamma_NFQ,
 38 |                  RF_paras = paras, n_trees = n_trees, threshold = thre_eval)
 39 |     if seed % 100 == 0:
 40 |         print("** Learning [for value] time cost:", np.round(now() - time, 3) , "**"); time = now()
 41 |     J_values = eval_Ohio_policy(Q_func = Q_func, J_Q = J, J_upper = J_upper,
 42 |                                T = T_eval, gamma = gamma_eval, N = N_eval, 
 43 |                                sd_G = sd_G, seed = 0)#, true_lag = 4)
 44 |     return np.mean(J_values)
 45 | 
 46 | 
 47 | def one_setting_one_J_value_only(rep_times = 500, J = 1, J_upper = 10,
 48 |                       N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3,
 49 |                       paras = "CV_once", n_trees = 100, 
 50 |                       gamma_NFQ = 0.9, 
 51 |                       T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4,
 52 |                       parallel = False, path = None):
 53 |     a = now()
 54 |     if paras == "CV_once":
 55 |         paras = one_time_value_only(seed = 0, J = J, J_upper = J_upper,
 56 |                          N = N, T = T, B = B, Q = Q,
 57 |                          sd_G = sd_G, gamma_NFQ = gamma_NFQ,
 58 |                          T_eval = T_eval, N_eval = N_eval, 
 59 |                       gamma_eval = gamma_eval, thre_eval = thre_eval,
 60 |                          paras = "CV_once", n_trees = n_trees)   
 61 |     def one_time(seed):
 62 |         return one_time_value_only(seed = seed, J = J, J_upper = J_upper,
 63 |                      N = N, T = T, B = B, Q = Q, sd_G = sd_G,
 64 |                      gamma_NFQ = gamma_NFQ, 
 65 |                      T_eval = T_eval, N_eval = N_eval, 
 66 |                       gamma_eval = gamma_eval, thre_eval = thre_eval,
 67 |                      paras = paras, n_trees = n_trees)            
 68 | 
 69 |     values = parmap(one_time, range(rep_times), parallel)
 70 | 
 71 |     print("total time cost for one J:", np.round(now() - a, 3), Dash)
 72 |     
 73 |     u_val = np.round(np.mean(values),4)
 74 |     sd_val = np.round(np.std(values),4)
 75 |     
 76 |     return values, u_val, sd_val
 77 | 
 78 | 
 79 | 
 80 | def one_setting_value_only(rep_times = 500,
 81 |                       N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3,
 82 |                       paras = "CV_once", n_trees = 100, 
 83 |                       gamma_NFQ = 0.9, 
 84 |                       T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4,
 85 |                       parallel = False, file = None):
 86 |     rr = []
 87 |     value_details = []
 88 |     for J in range(1, 11):
 89 |         r = one_setting_one_J_value_only(rep_times = rep_times, J = J, J_upper = 10,
 90 |                       N = N, T = T, B = B, Q = Q, sd_G = sd_G,
 91 |                       paras = paras, n_trees = n_trees, 
 92 |                       gamma_NFQ = gamma_NFQ, 
 93 |                       T_eval = T_eval, N_eval = N_eval, 
 94 |                       gamma_eval = gamma_eval, thre_eval = thre_eval,
 95 |                       parallel = parallel)
 96 |         rr.append([r[1], r[2]])
 97 |         value_details.append(r[0])
 98 |         print("the currect results for J = ", J, ":\n", rr, DASH)
 99 |     print_content = "N = " + str(N) + "sd = " + str(sd_G) + ":" + str(rr)
100 |     print(print_content, file = file)
101 |     return rr, value_details
102 | 
103 | print("import DONE!", "num of cores:", n_cores, DASH)
104 | 
105 | 
106 | #%% Time Cost
107 | 
108 | 
109 | path = "Ohio_simu_values.txt" # 0128 reruned and reproduced
110 | file = open(path, 'w')
111 | reps = 500
112 | gamma = 0.9
113 | T_eval = 60
114 | sd_G = 3
115 | value_details = []
116 | mean_values = []
117 | 
118 | for N in [10, 15, 20]: 
119 |     print(DASH, "[N, sd_G] = ", [N, sd_G], DASH)
120 |     r, value_detail = one_setting_value_only(rep_times = reps,
121 |                       N = N, T = 8 * 7 * 24, B = 100, Q = 10, sd_G = sd_G,
122 |                       paras = "CV_once", n_trees = 100, 
123 |                       gamma_NFQ = gamma,
124 |                       T_eval = 60, N_eval = 100, 
125 |                       gamma_eval = gamma, thre_eval = 1e-4,
126 |                       parallel = n_cores, file = file)
127 |     print(DASH, "[N, sd_G] = ", [N, sd_G], "r:", r, DASH)
128 |     mean_values.append(r)
129 |     value_details.append(value_detail)
130 | file.close()
131 | 
132 | res = [mean_values, value_details]
133 | with open("Ohio_simu_value.list", 'wb') as file:
134 |     pickle.dump(res, file)
135 | file.close()


--------------------------------------------------------------------------------
/experiment_script/.ipynb_checkpoints/Ohio_simu_values-checkpoint.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os, sys
  4 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  5 | 
  6 | sys.path.insert(0, package_path + "/test_func")
  7 | from _core_test_fun import *
  8 | 
  9 | sys.path.insert(0, package_path + "/experiment_func")
 10 | from _DGP_Ohio import *
 11 | from _utility_RL import *
 12 | 
 13 | os.environ["OMP_NUM_THREADS"] = "1"
 14 | #####################################
 15 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 
 16 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 
 17 | # the difference with standard cross-validation is negligible and will not affect our findings.
 18 | #####################################
 19 | def one_time_value_only(seed = 1, J = 1, J_upper = 10,
 20 |                      N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3,
 21 |                      gamma_NFQ = 0.9, 
 22 |                      T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4,
 23 |                      paras = "CV_once", n_trees = 100,
 24 |                      first_T = 10, true_lag = 4):
 25 |     ## generate data
 26 |     a = now()
 27 |     data = simu_Ohio(T, N, seed = seed, sd_G = sd_G)
 28 |     data = burn_in(data,first_T)
 29 |     T -= first_T
 30 |     value_data = data
 31 |     testing_data = [a[:2] for a in normalize(data)]
 32 |     ## this one time is used to get paras
 33 |     if paras == "CV_once": 
 34 |         return lam_est(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees)
 35 |     time = now()
 36 |     Learning_PatternSets = MDP2Trans(MDPs = value_data, J = J, action_in_states = True)
 37 |     Q_func = NFQ(PatternSets = Learning_PatternSets, gamma = gamma_NFQ,
 38 |                  RF_paras = paras, n_trees = n_trees, threshold = thre_eval)
 39 |     if seed % 100 == 0:
 40 |         print("** Learning [for value] time cost:", np.round(now() - time, 3) , "**"); time = now()
 41 |     J_values = eval_Ohio_policy(Q_func = Q_func, J_Q = J, J_upper = J_upper,
 42 |                                T = T_eval, gamma = gamma_eval, N = N_eval, 
 43 |                                sd_G = sd_G, seed = 0)#, true_lag = 4)
 44 |     return np.mean(J_values)
 45 | 
 46 | 
 47 | def one_setting_one_J_value_only(rep_times = 500, J = 1, J_upper = 10,
 48 |                       N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3,
 49 |                       paras = "CV_once", n_trees = 100, 
 50 |                       gamma_NFQ = 0.9, 
 51 |                       T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4,
 52 |                       parallel = False, path = None):
 53 |     a = now()
 54 |     if paras == "CV_once":
 55 |         paras = one_time_value_only(seed = 0, J = J, J_upper = J_upper,
 56 |                          N = N, T = T, B = B, Q = Q,
 57 |                          sd_G = sd_G, gamma_NFQ = gamma_NFQ,
 58 |                          T_eval = T_eval, N_eval = N_eval, 
 59 |                       gamma_eval = gamma_eval, thre_eval = thre_eval,
 60 |                          paras = "CV_once", n_trees = n_trees)   
 61 |     def one_time(seed):
 62 |         return one_time_value_only(seed = seed, J = J, J_upper = J_upper,
 63 |                      N = N, T = T, B = B, Q = Q, sd_G = sd_G,
 64 |                      gamma_NFQ = gamma_NFQ, 
 65 |                      T_eval = T_eval, N_eval = N_eval, 
 66 |                       gamma_eval = gamma_eval, thre_eval = thre_eval,
 67 |                      paras = paras, n_trees = n_trees)            
 68 | 
 69 |     values = parmap(one_time, range(rep_times), parallel)
 70 | 
 71 |     print("total time cost for one J:", np.round(now() - a, 3), Dash)
 72 |     
 73 |     u_val = np.round(np.mean(values),4)
 74 |     sd_val = np.round(np.std(values),4)
 75 |     
 76 |     return values, u_val, sd_val
 77 | 
 78 | 
 79 | 
 80 | def one_setting_value_only(rep_times = 500,
 81 |                       N = 10, T = 56 * 24, B = 100, Q = 10, sd_G = 3,
 82 |                       paras = "CV_once", n_trees = 100, 
 83 |                       gamma_NFQ = 0.9, 
 84 |                       T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4,
 85 |                       parallel = False, file = None):
 86 |     rr = []
 87 |     value_details = []
 88 |     for J in range(1, 11):
 89 |         r = one_setting_one_J_value_only(rep_times = rep_times, J = J, J_upper = 10,
 90 |                       N = N, T = T, B = B, Q = Q, sd_G = sd_G,
 91 |                       paras = paras, n_trees = n_trees, 
 92 |                       gamma_NFQ = gamma_NFQ, 
 93 |                       T_eval = T_eval, N_eval = N_eval, 
 94 |                       gamma_eval = gamma_eval, thre_eval = thre_eval,
 95 |                       parallel = parallel)
 96 |         rr.append([r[1], r[2]])
 97 |         value_details.append(r[0])
 98 |         print("the currect results for J = ", J, ":\n", rr, DASH)
 99 |     print_content = "N = " + str(N) + "sd = " + str(sd_G) + ":" + str(rr)
100 |     print(print_content, file = file)
101 |     return rr, value_details
102 | 
103 | print("import DONE!", "num of cores:", n_cores, DASH)
104 | 
105 | 
106 | #%% Time Cost
107 | 
108 | 
109 | path = "Ohio_simu_values.txt" # 0128 reruned and reproduced
110 | file = open(path, 'w')
111 | reps = 500
112 | gamma = 0.9
113 | T_eval = 60
114 | sd_G = 3
115 | value_details = []
116 | mean_values = []
117 | 
118 | for N in [10, 15, 20]: 
119 |     print(DASH, "[N, sd_G] = ", [N, sd_G], DASH)
120 |     r, value_detail = one_setting_value_only(rep_times = reps,
121 |                       N = N, T = 8 * 7 * 24, B = 100, Q = 10, sd_G = sd_G,
122 |                       paras = "CV_once", n_trees = 100, 
123 |                       gamma_NFQ = gamma,
124 |                       T_eval = 60, N_eval = 100, 
125 |                       gamma_eval = gamma, thre_eval = 1e-4,
126 |                       parallel = n_cores, file = file)
127 |     print(DASH, "[N, sd_G] = ", [N, sd_G], "r:", r, DASH)
128 |     mean_values.append(r)
129 |     value_details.append(value_detail)
130 | file.close()
131 | 
132 | res = [mean_values, value_details]
133 | with open("Ohio_simu_value.list", 'wb') as file:
134 |     pickle.dump(res, file)
135 | file.close()


--------------------------------------------------------------------------------
/test_func/_utility.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #############################################################################
  4 | #############################################################################
  5 | #%% packages
  6 | import numpy as np
  7 | import scipy as sp
  8 | from scipy.linalg import sqrtm
  9 | import pandas as pd
 10 | from numpy import absolute as np_abs
 11 | from random import seed as rseed
 12 | from numpy.random import seed as npseed
 13 | from numpy.random import normal as rnorm
 14 | from numpy.random import uniform as runi
 15 | from numpy.random import binomial as rbin
 16 | from numpy.random import shuffle,randn, permutation # randn(d1,d2) is d1*d2 i.i.d N(0,1)
 17 | from numpy import array as arr
 18 | from numpy import sqrt, cos, sin, exp, dot, diag, quantile, zeros, roll, multiply, stack, concatenate
 19 | from numpy import concatenate as v_add
 20 | from numpy.linalg import norm
 21 | from numpy import apply_along_axis as apply
 22 | from sklearn.preprocessing import StandardScaler
 23 | import pickle
 24 | from sklearn.ensemble import RandomForestRegressor as RF
 25 | from sklearn.model_selection import GridSearchCV
 26 | from itertools import combinations 
 27 | import operator
 28 | import time
 29 | now = time.time
 30 | from sklearn.ensemble import RandomForestRegressor as RandomForest
 31 | from sklearn.model_selection import KFold
 32 | from statsmodels.stats import proportion as prop
 33 | import os
 34 | 
 35 | #############################################################################
 36 | #############################################################################
 37 | 
 38 | #%% utility funs
 39 | 
 40 | def CI_prop(n,p):
 41 |     """ 
 42 |     Input: In n reps, observed proportion p
 43 |     Output: the 95% CI  of this p
 44 |     """
 45 |     r = prop.proportion_confint(n * p, n, alpha = 0.05, method='binom_test')
 46 |     return np.round([r[0], r[1]],4)
 47 | 
 48 | def normalize_unit_sd(array):
 49 |     def temp(v):
 50 |         return v / np.std(v)
 51 |     return np.array(apply(temp, 0, array))
 52 | 
 53 | def apply_v(f,v):
 54 |     return np.array([f(a) for a in v])
 55 | 
 56 | def burn_in(data,first_T):
 57 |     if len(data[0]) == 2:
 58 |         return [[patient[0][first_T:,:], patient[1][first_T:,:]] for patient in data]
 59 |     else:
 60 |         return [[patient[0][first_T:,:], patient[1][first_T:,:], patient[2][first_T:,:]] for patient in data]
 61 | 
 62 | flatten = lambda l: [item for sublist in l for item in sublist]
 63 | 
 64 | def is_null(true_lag,J):
 65 |     if J >= true_lag:
 66 |         return "(H0)"
 67 |     else:
 68 |         return "(H1)"
 69 | 
 70 | def list2Matrix(List):
 71 |     # return a n * 1 matrix
 72 |     return np.array(np.expand_dims(np.array(List),1))
 73 | 
 74 | def round_list(thelist,dec):
 75 |     """
 76 |     extend np.round to list
 77 |     """
 78 |     return [round(a,dec) for a in thelist]
 79 | 
 80 | 
 81 | def normalize(data, centralized = False):
 82 |     """
 83 |     normalize the simulated data
 84 |     data: len-n of [T*dx,T*da]
 85 |     Returns: data
 86 |     """
 87 |     state, action = [a[0].copy() for a in data], [a[1].copy() for a in data]
 88 |     n = len(data)
 89 |     dx = state[0].shape[1]
 90 |     
 91 |     ### States
 92 |     for i in range(dx):
 93 |         s = np.array([a[:,i] for a in state])
 94 |         mean, sd = np.mean(s), np.std(s)
 95 |         for j in range(n):
 96 |             if centralized:
 97 |                 state[j][:,i] -= mean
 98 |             if sd != 0:
 99 |                 state[j][:,i] = state[j][:,i] / sd
100 |                 
101 |     ### Action: 
102 |     a = np.array(action)
103 |     mean, sd = np.mean(a), np.std(a)
104 | #     sd = 1
105 | #     action = [ a / sd for a in action]
106 | 
107 |     ### Reward
108 |     if len(data[0]) == 3:
109 |         reward = [a[2] for a in data]
110 |         a = np.array(reward)
111 |         mean, sd = np.mean(a), np.std(a)
112 |         if sd == 0:
113 |             sd = 1
114 |         if centralized:
115 |             reward = [ (a - mean) / sd for a in reward]
116 |         else:
117 |             reward = [ a / sd for a in reward]
118 |         return [[state[i],action[i],reward[i]] for i in range(n)]
119 |     else:
120 |         return [[state[i],action[i]] for i in range(n)]
121 | 
122 | #%% utility funs
123 | 
124 | 
125 | def p_value(test_stat,sim_test_stats):
126 |     """
127 |     one testing result (p-value), Bootstrap-based.
128 |     
129 |     Default: the larger, the significant
130 |     Return: p-value
131 |     """
132 |     return round(1 - sum(np.abs(test_stat) > np.abs(sim_test_stats)) / len(sim_test_stats),4)
133 | 
134 | def rej_rate(p_values, alphas):
135 |     rep_times = len(p_values)
136 |     p_values = np.array(p_values)
137 |     RRs = []
138 |     for alpha in alphas:
139 |         RR = sum(p_values < alpha) / rep_times
140 |         RRs.append(RR)
141 |         print("Under alpha", alpha, "the rejection rate is:", RR)
142 |     return RRs
143 | 
144 | def rej_rate_quite(p_values,alphas,file = None):
145 |     rep_times = len(p_values)
146 |     p_values = np.array(p_values)
147 |     
148 |     RRs = []
149 |     for alpha in alphas:
150 |         RR = sum(p_values < alpha) / rep_times
151 |         RRs.append(RR)
152 |     return RRs
153 | 
154 | 
155 | def rej_rate_quick(p):
156 |     r = []
157 |     T = len(p)
158 |     p = np.array(p)
159 |     for i in [0.01,0.05,0.1]:
160 |         r.append(np.sum( p < i) / T)
161 |     return r
162 | 
163 | def rej_rate_seq(results):
164 |     """
165 |     Imput: a list (len = times) of [1,0]
166 |     Output: [0.2,0.7]
167 |     """
168 |     results = np.array(results)
169 |     times = results.shape[0]
170 |     return np.sum(results,0) / times
171 | 
172 | 
173 | 
174 | def seq_rej_rate_mul_J(ps,alphas):
175 |     """
176 |     ps: len-J_upper list of np.array(times * 2)
177 |     Output: if always rej, then rej
178 |     """
179 |     rej = []
180 |     for alpha in alphas:
181 |         aa = [np.array(p) < alpha for p in ps]
182 |         bb = np.sum(np.array(aa), 0) == len(ps)
183 |         rate = np.round(np.mean(bb, 0),3)
184 |         rej.append(rate)
185 |     return rej
186 | 
187 | 
188 | #%%
189 | def truncateMDP(MDPs,T):
190 |     data = []
191 |     l = len(MDPs[0])
192 |     for MDP in MDPs:
193 |         if (MDP[0].shape[0]) >= T:
194 |             data.append([MDP[i][:T] for i in range(l)])
195 |     return data
196 | 
197 | 
198 | def p_sd(T):
199 |     r = []
200 |     for p_true in [0.01,0.05,0.1]:
201 |         r.append(np.round(np.sqrt(p_true * (1 - p_true) / T),4))
202 |     return r
203 | 
204 | def latex_ohio_one_T_sd_G_mul_j(a, file):
205 |     for J in range(len(a)):
206 |         print("J = ", J + 1, end = "    " , file = file)
207 |         aa = a[J]
208 |         for alpha in range(3):
209 |             print(aa[alpha][0],"& ", end = "", file = file) # max
210 |         print("\n", file = file)
211 |         
212 | def print_progress(i, N):
213 |     if (i * 100 // N == 0):
214 |         print("#", end = "", flush = True)


--------------------------------------------------------------------------------
/test_func/.ipynb_checkpoints/_utility-checkpoint.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #############################################################################
  4 | #############################################################################
  5 | #%% packages
  6 | import numpy as np
  7 | import scipy as sp
  8 | from scipy.linalg import sqrtm
  9 | import pandas as pd
 10 | from numpy import absolute as np_abs
 11 | from random import seed as rseed
 12 | from numpy.random import seed as npseed
 13 | from numpy.random import normal as rnorm
 14 | from numpy.random import uniform as runi
 15 | from numpy.random import binomial as rbin
 16 | from numpy.random import shuffle,randn, permutation # randn(d1,d2) is d1*d2 i.i.d N(0,1)
 17 | from numpy import array as arr
 18 | from numpy import sqrt, cos, sin, exp, dot, diag, quantile, zeros, roll, multiply, stack, concatenate
 19 | from numpy import concatenate as v_add
 20 | from numpy.linalg import norm
 21 | from numpy import apply_along_axis as apply
 22 | from sklearn.preprocessing import StandardScaler
 23 | import pickle
 24 | from sklearn.ensemble import RandomForestRegressor as RF
 25 | from sklearn.model_selection import GridSearchCV
 26 | from itertools import combinations 
 27 | import operator
 28 | import time
 29 | now = time.time
 30 | from sklearn.ensemble import RandomForestRegressor as RandomForest
 31 | from sklearn.model_selection import KFold
 32 | from statsmodels.stats import proportion as prop
 33 | import os
 34 | 
 35 | #############################################################################
 36 | #############################################################################
 37 | 
 38 | #%% utility funs
 39 | 
 40 | def CI_prop(n,p):
 41 |     """ 
 42 |     Input: In n reps, observed proportion p
 43 |     Output: the 95% CI  of this p
 44 |     """
 45 |     r = prop.proportion_confint(n * p, n, alpha = 0.05, method='binom_test')
 46 |     return np.round([r[0], r[1]],4)
 47 | 
 48 | def normalize_unit_sd(array):
 49 |     def temp(v):
 50 |         return v / np.std(v)
 51 |     return np.array(apply(temp, 0, array))
 52 | 
 53 | def apply_v(f,v):
 54 |     return np.array([f(a) for a in v])
 55 | 
 56 | def burn_in(data,first_T):
 57 |     if len(data[0]) == 2:
 58 |         return [[patient[0][first_T:,:], patient[1][first_T:,:]] for patient in data]
 59 |     else:
 60 |         return [[patient[0][first_T:,:], patient[1][first_T:,:], patient[2][first_T:,:]] for patient in data]
 61 | 
 62 | flatten = lambda l: [item for sublist in l for item in sublist]
 63 | 
 64 | def is_null(true_lag,J):
 65 |     if J >= true_lag:
 66 |         return "(H0)"
 67 |     else:
 68 |         return "(H1)"
 69 | 
 70 | def list2Matrix(List):
 71 |     # return a n * 1 matrix
 72 |     return np.array(np.expand_dims(np.array(List),1))
 73 | 
 74 | def round_list(thelist,dec):
 75 |     """
 76 |     extend np.round to list
 77 |     """
 78 |     return [round(a,dec) for a in thelist]
 79 | 
 80 | 
 81 | def normalize(data, centralized = False):
 82 |     """
 83 |     normalize the simulated data
 84 |     data: len-n of [T*dx,T*da]
 85 |     Returns: data
 86 |     """
 87 |     state, action = [a[0].copy() for a in data], [a[1].copy() for a in data]
 88 |     n = len(data)
 89 |     dx = state[0].shape[1]
 90 |     
 91 |     ### States
 92 |     for i in range(dx):
 93 |         s = np.array([a[:,i] for a in state])
 94 |         mean, sd = np.mean(s), np.std(s)
 95 |         for j in range(n):
 96 |             if centralized:
 97 |                 state[j][:,i] -= mean
 98 |             if sd != 0:
 99 |                 state[j][:,i] = state[j][:,i] / sd
100 |                 
101 |     ### Action: 
102 |     a = np.array(action)
103 |     mean, sd = np.mean(a), np.std(a)
104 | #     sd = 1
105 | #     action = [ a / sd for a in action]
106 | 
107 |     ### Reward
108 |     if len(data[0]) == 3:
109 |         reward = [a[2] for a in data]
110 |         a = np.array(reward)
111 |         mean, sd = np.mean(a), np.std(a)
112 |         if sd == 0:
113 |             sd = 1
114 |         if centralized:
115 |             reward = [ (a - mean) / sd for a in reward]
116 |         else:
117 |             reward = [ a / sd for a in reward]
118 |         return [[state[i],action[i],reward[i]] for i in range(n)]
119 |     else:
120 |         return [[state[i],action[i]] for i in range(n)]
121 | 
122 | #%% utility funs
123 | 
124 | 
125 | def p_value(test_stat,sim_test_stats):
126 |     """
127 |     one testing result (p-value), Bootstrap-based.
128 |     
129 |     Default: the larger, the significant
130 |     Return: p-value
131 |     """
132 |     return round(1 - sum(np.abs(test_stat) > np.abs(sim_test_stats)) / len(sim_test_stats),4)
133 | 
134 | def rej_rate(p_values, alphas):
135 |     rep_times = len(p_values)
136 |     p_values = np.array(p_values)
137 |     RRs = []
138 |     for alpha in alphas:
139 |         RR = sum(p_values < alpha) / rep_times
140 |         RRs.append(RR)
141 |         print("Under alpha", alpha, "the rejection rate is:", RR)
142 |     return RRs
143 | 
144 | def rej_rate_quite(p_values,alphas,file = None):
145 |     rep_times = len(p_values)
146 |     p_values = np.array(p_values)
147 |     
148 |     RRs = []
149 |     for alpha in alphas:
150 |         RR = sum(p_values < alpha) / rep_times
151 |         RRs.append(RR)
152 |     return RRs
153 | 
154 | 
155 | def rej_rate_quick(p):
156 |     r = []
157 |     T = len(p)
158 |     p = np.array(p)
159 |     for i in [0.01,0.05,0.1]:
160 |         r.append(np.sum( p < i) / T)
161 |     return r
162 | 
163 | def rej_rate_seq(results):
164 |     """
165 |     Imput: a list (len = times) of [1,0]
166 |     Output: [0.2,0.7]
167 |     """
168 |     results = np.array(results)
169 |     times = results.shape[0]
170 |     return np.sum(results,0) / times
171 | 
172 | 
173 | 
174 | def seq_rej_rate_mul_J(ps,alphas):
175 |     """
176 |     ps: len-J_upper list of np.array(times * 2)
177 |     Output: if always rej, then rej
178 |     """
179 |     rej = []
180 |     for alpha in alphas:
181 |         aa = [np.array(p) < alpha for p in ps]
182 |         bb = np.sum(np.array(aa), 0) == len(ps)
183 |         rate = np.round(np.mean(bb, 0),3)
184 |         rej.append(rate)
185 |     return rej
186 | 
187 | 
188 | #%%
189 | def truncateMDP(MDPs,T):
190 |     data = []
191 |     l = len(MDPs[0])
192 |     for MDP in MDPs:
193 |         if (MDP[0].shape[0]) >= T:
194 |             data.append([MDP[i][:T] for i in range(l)])
195 |     return data
196 | 
197 | 
198 | def p_sd(T):
199 |     r = []
200 |     for p_true in [0.01,0.05,0.1]:
201 |         r.append(np.round(np.sqrt(p_true * (1 - p_true) / T),4))
202 |     return r
203 | 
204 | def latex_ohio_one_T_sd_G_mul_j(a, file):
205 |     for J in range(len(a)):
206 |         print("J = ", J + 1, end = "    " , file = file)
207 |         aa = a[J]
208 |         for alpha in range(3):
209 |             print(aa[alpha][0],"& ", end = "", file = file) # max
210 |         print("\n", file = file)
211 |         
212 | def print_progress(i, N):
213 |     if (i * 100 // N == 0):
214 |         print("#", end = "", flush = True)


--------------------------------------------------------------------------------
/test_func/.ipynb_checkpoints/_DGP_Ohio-checkpoint.py:
--------------------------------------------------------------------------------
  1 | #%% packages
  2 | 
  3 | ################################################################################################
  4 | from ._utility import *
  5 | from ._utility_RL import *
  6 | ################################################ OHIO ##########################################
  7 | ################################################################################################
  8 | # the following parameters will not change with the LM fitting
  9 | const = 39.03
 10 | init_u_G = 162  
 11 | init_sd_G = 60
 12 | p_D, u_D, sd_D = 0.17, 44.4, 35.5
 13 | p_E, u_E, sd_E = 0.05, 4.9, 1.04
 14 | p_A = [0.805, 0.084, 0.072, 0.029, 0.010] # new discritization
 15 | range_a = [0, 1, 2, 3, 4]
 16 | 
 17 | ##########################################
 18 | # left to right: t-4, .. , t-1
 19 | coefficients = [-0.008     ,  0.106     , -0.481     ,  1.171     ,  # glucose
 20 |           0.008     ,  -0.004     ,  0.08      ,  0.23      ,  # diet
 21 |           0.009     , -1.542     , 3.097     , -3.489     ,  # exercise
 22 |           -0.30402253, -2.02343638, -0.3310525 , -0.43941028] # action
 23 | 
 24 | def Glucose2Reward(gl, definition = 1):
 25 |     # Q: too sensitive?
 26 |     low_gl = 80
 27 |     high_gl = 140
 28 |     return np.select([gl>=high_gl, gl<=low_gl, low_gl<gl<high_gl], [-(gl-high_gl)**1.35/30, -(low_gl-gl)**2/30, 0])
 29 | 
 30 | ################################################################################################
 31 | ################################################################################################
 32 | 
 33 | def init_MDPs(T, N, sd_G = 3, seed = 0): 
 34 |     """
 35 |     Randomly initialize 
 36 |         1. G_t [0,..., T_true_lag];
 37 |         2. errors for G_t
 38 |         3. when to take how many diets/exercises [matters?]
 39 |     Outputs:
 40 |         init G_t and its future erroes; all D_t and E_t
 41 |     """
 42 |     rseed(seed); npseed(seed)
 43 |     true_lag = 4
 44 |     obs = np.zeros((3, T, N))  # [Gi, D, Ex]
 45 |     obs[0, :true_lag, :] = rnorm(init_u_G, init_sd_G, true_lag * N).reshape(true_lag, N)
 46 |     
 47 |     e_D = abs(rnorm(u_D, sd_D, T * N))
 48 |     e_E = abs(rnorm(u_E, sd_E, T * N))
 49 |     obs[1, :, :] = (rbin(1, p_D, T * N) * e_D).reshape((T, N))
 50 |     obs[2, :, :] = (rbin(1, p_E, T * N) * e_E).reshape((T, N))
 51 |     
 52 |     e_G = rnorm(0, sd_G, T * N).reshape((T, N))
 53 |     
 54 |     return obs, e_G
 55 | 
 56 | 
 57 | 
 58 | def useful_obs(obs, actions, t):
 59 |     true_lag = 4
 60 |     r = np.vstack([
 61 |         obs[0, (t - true_lag):t, :], obs[1, (t - true_lag):t, :],
 62 |         obs[2, (t - true_lag):t, :], actions[(t - true_lag):t, :]])
 63 |     return r
 64 | 
 65 | def next_obs(tran_mat, useful_last_obs, e_G, t):
 66 |     return np.array(const).reshape((1, 1)) + tran_mat.dot(useful_last_obs) + np.array([e_G[t, :]])
 67 | 
 68 | 
 69 | ################################################################################################
 70 | ################################################################################################
 71 | 
 72 | def simu_Ohio(T = 5, N = 2, seed = 1, sd_G = 5.5, matrix_output = False, is_real = False):
 73 |     """ Simulate N patient trajectories with length T, calibrated from the Ohio dataset.
 74 |     """
 75 |     tran_mat = np.expand_dims(arr(coefficients), 0)
 76 | 
 77 |     # Initialization
 78 |     if is_real:
 79 |         obs, e_G = init_MDPs_real(T = T, N = N, sd_G = sd_G, seed = seed)
 80 |     else:
 81 |         obs, e_G = init_MDPs(T = T, N = N, sd_G = sd_G, seed = seed)
 82 |     rseed(seed); npseed(seed)
 83 |     actions = np.random.choice(range(len(p_A)), size=T * N, p=p_A).reshape((T, N))
 84 |     # Transition
 85 |     for t in range(4, T):
 86 |         useful_last_obs = useful_obs(obs = obs, actions = actions, t = t)
 87 |         obs[0, t, :] = next_obs(tran_mat = tran_mat, useful_last_obs = useful_last_obs, e_G = e_G, t = t)
 88 |     
 89 |     if matrix_output: # for eval_Ohio_policy below
 90 |         return obs, actions
 91 |     # Collection
 92 |     MDPs = []
 93 |     for i in range(N):
 94 |         MDPs.append([obs[:, :, i],
 95 |                      actions[:, i]])
 96 |     s_a = [[a[0].T, np.array(a[1]).reshape(T, 1), ] for a in MDPs]
 97 |     MDPs = [[a[0], a[1], np.roll(apply_v(Glucose2Reward, a[0][:, 0].reshape(-1, 1)), shift = -1).reshape(-1, 1)] 
 98 |             for a in s_a]
 99 |     return  MDPs # a list of [obs = T * 3, actions, rewards]
100 | 
101 | ################################################################################################
102 | # Funcs for Simu Ohio Values
103 | ################################################################################################
104 | 
105 | def eval_Ohio_policy(Q_func, J_Q, T, N, J_upper, sd_G = 0, gamma = 0.9, debug = 0, seed = 0):
106 |     """ Evaluate the value of a policy in simulation.
107 |     
108 |     Randomly the first four time points，
109 |     and then follow the simulation model until T = 10, 
110 |     and then begin to use policy and collect rewards:
111 |     
112 |     1. choosing actions following Q, 
113 |     2. trans following the environment
114 |     3. collecting rewards.
115 |     """
116 |     policy = Estpolicy(Q_func, range_a)
117 |     tran_mat = np.expand_dims(arr(coefficients), 0)
118 |     
119 |     ### Initialize the first 10
120 |     true_lag = 4
121 |     init_obs, init_A = simu_Ohio(T = 10, N = N, seed = 0, sd_G = sd_G, matrix_output = True)
122 |     obs, e_G = init_MDPs(T = T, N = N, sd_G = sd_G, seed = seed)
123 |     obs[:, :10, :] = init_obs
124 |     actions = np.zeros((T, N)) # store previous actions
125 |     actions[:10, :] = init_A
126 |     
127 |     rseed(seed); npseed(seed)
128 |     dim_obs = obs.shape[0]
129 |     for t in range(J_upper, T): 
130 |         # next observations: based on ...,t-1, to decide t.
131 |         useful_last_obs = useful_obs(obs = obs, actions = actions, t = t)
132 |         obs[0, t, :] = next_obs(tran_mat = tran_mat, useful_last_obs = useful_last_obs, e_G = e_G, t = t)
133 |         
134 |         # choose actions based on status. obs = [3, T, N]      
135 |         s = ObsAct2State(obs, actions, t, J_Q, multiple_N = True) # dim * N
136 |         A_t = policy(s.T).T # s [N * dx] -> actions [N * 1] -> 1 * N
137 |         actions[t, :] = A_t
138 |                 
139 |     # collect rewards
140 |     Values = est_values(obs, gamma = gamma, init_T = J_upper)
141 |     return Values
142 | 
143 | def est_values(obs, gamma = 0.9, init_T = 10):
144 |     """ Tool to calculate culmulative rewards from observation (glucose histroy)
145 |     Input: the observed trajectories (possibly based on the optimal policy)
146 |     3 * T * N
147 |     Output: the collected culmulative rewards
148 |     
149 |     init_T: when the glucose becomes stable
150 |     """
151 |     Values = []
152 |     N = obs.shape[2]
153 |     T = obs.shape[1]
154 |     for i in range(N):
155 |         rewards = np.roll(apply_v(Glucose2Reward, obs[0, init_T:, i]), shift = -1).reshape(-1, 1)
156 |         est_Value = np.round(cum_r(rewards, gamma), 3)
157 |         Values.append(est_Value[0])
158 |     return Values
159 | 
160 | 
161 | def init_MDPs_real(T, N, sd_G, seed = 0): # version of new -> bad simu results
162 |     """
163 |     Randomly initialize 
164 |         1. G_t [0,..., T_true_lag];
165 |         2. errors for G_t
166 |         3. when to take how many diets/exercises [matters?]
167 |     Outputs:
168 |         init G_t and its future erroes; all D_t and E_t
169 |     """
170 |     rseed(seed); npseed(seed)
171 |     true_lag = 4
172 |     obs = np.zeros((3, T, N))  # [Gi, D, Ex]
173 |     e_D = abs(rnorm(u_D, sd_D, T * N))
174 |     e_E = abs(rnorm(u_E, sd_E, T * N))
175 |     e_G = rnorm(0, sd_G, T * N).reshape((T, N))
176 |     
177 |     obs[0, :true_lag, :] = rnorm(init_u_G, init_sd_G, true_lag * N).reshape(true_lag, N)
178 |     obs[1, :, :] = (rbin(1, p_D, T * N) * e_D).reshape((T, N))
179 |     obs[2, :, :] = (rbin(1, p_E, T * N) * e_E).reshape((T, N))
180 |     
181 |     return obs, e_G


--------------------------------------------------------------------------------
/experiment_func/_DGP_Ohio.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | #%% packages
  5 | 
  6 | ################################################################################################
  7 | import os, sys
  8 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  9 | 
 10 | sys.path.insert(0, package_path + "/test_func")
 11 | from _core_test_fun import *
 12 | from _utility_RL import *
 13 | 
 14 | ################################################ OHIO ##########################################
 15 | ################################################################################################
 16 | # the following parameters will not change with the LM fitting
 17 | const = 39.03
 18 | init_u_G = 162  
 19 | init_sd_G = 60
 20 | p_D, u_D, sd_D = 0.17, 44.4, 35.5
 21 | p_E, u_E, sd_E = 0.05, 4.9, 1.04
 22 | p_A = [0.805, 0.084, 0.072, 0.029, 0.010] # new discritization
 23 | range_a = [0, 1, 2, 3, 4]
 24 | 
 25 | ##########################################
 26 | # left to right: t-4, .. , t-1
 27 | coefficients = [-0.008     ,  0.106     , -0.481     ,  1.171     ,  # glucose
 28 |           0.008     ,  -0.004     ,  0.08      ,  0.23      ,  # diet
 29 |           0.009     , -1.542     , 3.097     , -3.489     ,  # exercise
 30 |           -0.30402253, -2.02343638, -0.3310525 , -0.43941028] # action
 31 | 
 32 | def Glucose2Reward(gl, definition = 1):
 33 |     # Q: too sensitive?
 34 |     low_gl = 80
 35 |     high_gl = 140
 36 |     return np.select([gl>=high_gl, gl<=low_gl, low_gl<gl<high_gl], [-(gl-high_gl)**1.35/30, -(low_gl-gl)**2/30, 0])
 37 | 
 38 | ################################################################################################
 39 | ################################################################################################
 40 | 
 41 | def init_MDPs(T, N, sd_G = 3, seed = 0): 
 42 |     """
 43 |     Randomly initialize 
 44 |         1. G_t [0,..., T_true_lag];
 45 |         2. errors for G_t
 46 |         3. when to take how many diets/exercises [matters?]
 47 |     Outputs:
 48 |         init G_t and its future erroes; all D_t and E_t
 49 |     """
 50 |     rseed(seed); npseed(seed)
 51 |     true_lag = 4
 52 |     obs = np.zeros((3, T, N))  # [Gi, D, Ex]
 53 |     obs[0, :true_lag, :] = rnorm(init_u_G, init_sd_G, true_lag * N).reshape(true_lag, N)
 54 |     
 55 |     e_D = abs(rnorm(u_D, sd_D, T * N))
 56 |     e_E = abs(rnorm(u_E, sd_E, T * N))
 57 |     obs[1, :, :] = (rbin(1, p_D, T * N) * e_D).reshape((T, N))
 58 |     obs[2, :, :] = (rbin(1, p_E, T * N) * e_E).reshape((T, N))
 59 |     
 60 |     e_G = rnorm(0, sd_G, T * N).reshape((T, N))
 61 |     
 62 |     return obs, e_G
 63 | 
 64 | 
 65 | 
 66 | def useful_obs(obs, actions, t):
 67 |     true_lag = 4
 68 |     r = np.vstack([
 69 |         obs[0, (t - true_lag):t, :], obs[1, (t - true_lag):t, :],
 70 |         obs[2, (t - true_lag):t, :], actions[(t - true_lag):t, :]])
 71 |     return r
 72 | 
 73 | def next_obs(tran_mat, useful_last_obs, e_G, t):
 74 |     return np.array(const).reshape((1, 1)) + tran_mat.dot(useful_last_obs) + np.array([e_G[t, :]])
 75 | 
 76 | 
 77 | ################################################################################################
 78 | ################################################################################################
 79 | 
 80 | def simu_Ohio(T = 5, N = 2, seed = 1, sd_G = 5.5, matrix_output = False, is_real = False):
 81 |     """ Simulate N patient trajectories with length T, calibrated from the Ohio dataset.
 82 |     """
 83 |     tran_mat = np.expand_dims(arr(coefficients), 0)
 84 | 
 85 |     # Initialization
 86 |     if is_real:
 87 |         obs, e_G = init_MDPs_real(T = T, N = N, sd_G = sd_G, seed = seed)
 88 |     else:
 89 |         obs, e_G = init_MDPs(T = T, N = N, sd_G = sd_G, seed = seed)
 90 |     rseed(seed); npseed(seed)
 91 |     actions = np.random.choice(range(len(p_A)), size=T * N, p=p_A).reshape((T, N))
 92 |     # Transition
 93 |     for t in range(4, T):
 94 |         useful_last_obs = useful_obs(obs = obs, actions = actions, t = t)
 95 |         obs[0, t, :] = next_obs(tran_mat = tran_mat, useful_last_obs = useful_last_obs, e_G = e_G, t = t)
 96 |     
 97 |     if matrix_output: # for eval_Ohio_policy below
 98 |         return obs, actions
 99 |     # Collection
100 |     MDPs = []
101 |     for i in range(N):
102 |         MDPs.append([obs[:, :, i],
103 |                      actions[:, i]])
104 |     s_a = [[a[0].T, np.array(a[1]).reshape(T, 1), ] for a in MDPs]
105 |     MDPs = [[a[0], a[1], np.roll(apply_v(Glucose2Reward, a[0][:, 0].reshape(-1, 1)), shift = -1).reshape(-1, 1)] 
106 |             for a in s_a]
107 |     return  MDPs # a list of [obs = T * 3, actions, rewards]
108 | 
109 | ################################################################################################
110 | # Funcs for Simu Ohio Values
111 | ################################################################################################
112 | 
113 | def eval_Ohio_policy(Q_func, J_Q, T, N, J_upper, sd_G = 0, gamma = 0.9, debug = 0, seed = 0):
114 |     """ Evaluate the value of a policy in simulation.
115 |     
116 |     Randomly the first four time points，
117 |     and then follow the simulation model until T = 10, 
118 |     and then begin to use policy and collect rewards:
119 |     
120 |     1. choosing actions following Q, 
121 |     2. trans following the environment
122 |     3. collecting rewards.
123 |     """
124 |     policy = Estpolicy(Q_func, range_a)
125 |     tran_mat = np.expand_dims(arr(coefficients), 0)
126 |     
127 |     ### Initialize the first 10
128 |     true_lag = 4
129 |     init_obs, init_A = simu_Ohio(T = 10, N = N, seed = 0, sd_G = sd_G, matrix_output = True)
130 |     obs, e_G = init_MDPs(T = T, N = N, sd_G = sd_G, seed = seed)
131 |     obs[:, :10, :] = init_obs
132 |     actions = np.zeros((T, N)) # store previous actions
133 |     actions[:10, :] = init_A
134 |     
135 |     rseed(seed); npseed(seed)
136 |     dim_obs = obs.shape[0]
137 |     for t in range(J_upper, T): 
138 |         # next observations: based on ...,t-1, to decide t.
139 |         useful_last_obs = useful_obs(obs = obs, actions = actions, t = t)
140 |         obs[0, t, :] = next_obs(tran_mat = tran_mat, useful_last_obs = useful_last_obs, e_G = e_G, t = t)
141 |         
142 |         # choose actions based on status. obs = [3, T, N]      
143 |         s = ObsAct2State(obs, actions, t, J_Q, multiple_N = True) # dim * N
144 |         A_t = policy(s.T).T # s [N * dx] -> actions [N * 1] -> 1 * N
145 |         actions[t, :] = A_t
146 |                 
147 |     # collect rewards
148 |     Values = est_values(obs, gamma = gamma, init_T = J_upper)
149 |     return Values
150 | 
151 | def est_values(obs, gamma = 0.9, init_T = 10):
152 |     """ Tool to calculate culmulative rewards from observation (glucose histroy)
153 |     Input: the observed trajectories (possibly based on the optimal policy)
154 |     3 * T * N
155 |     Output: the collected culmulative rewards
156 |     
157 |     init_T: when the glucose becomes stable
158 |     """
159 |     Values = []
160 |     N = obs.shape[2]
161 |     T = obs.shape[1]
162 |     for i in range(N):
163 |         rewards = np.roll(apply_v(Glucose2Reward, obs[0, init_T:, i]), shift = -1).reshape(-1, 1)
164 |         est_Value = np.round(cum_r(rewards, gamma), 3)
165 |         Values.append(est_Value[0])
166 |     return Values
167 | 
168 | 
169 | def init_MDPs_real(T, N, sd_G, seed = 0): # version of new -> bad simu results
170 |     """
171 |     Randomly initialize 
172 |         1. G_t [0,..., T_true_lag];
173 |         2. errors for G_t
174 |         3. when to take how many diets/exercises [matters?]
175 |     Outputs:
176 |         init G_t and its future erroes; all D_t and E_t
177 |     """
178 |     rseed(seed); npseed(seed)
179 |     true_lag = 4
180 |     obs = np.zeros((3, T, N))  # [Gi, D, Ex]
181 |     e_D = abs(rnorm(u_D, sd_D, T * N))
182 |     e_E = abs(rnorm(u_E, sd_E, T * N))
183 |     e_G = rnorm(0, sd_G, T * N).reshape((T, N))
184 |     
185 |     obs[0, :true_lag, :] = rnorm(init_u_G, init_sd_G, true_lag * N).reshape(true_lag, N)
186 |     obs[1, :, :] = (rbin(1, p_D, T * N) * e_D).reshape((T, N))
187 |     obs[2, :, :] = (rbin(1, p_E, T * N) * e_E).reshape((T, N))
188 |     
189 |     return obs, e_G


--------------------------------------------------------------------------------
/experiment_func/.ipynb_checkpoints/_DGP_Ohio-checkpoint.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | #%% packages
  5 | 
  6 | ################################################################################################
  7 | import os, sys
  8 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  9 | 
 10 | sys.path.insert(0, package_path + "/test_func")
 11 | from _core_test_fun import *
 12 | from _utility_RL import *
 13 | 
 14 | ################################################ OHIO ##########################################
 15 | ################################################################################################
 16 | # the following parameters will not change with the LM fitting
 17 | const = 39.03
 18 | init_u_G = 162  
 19 | init_sd_G = 60
 20 | p_D, u_D, sd_D = 0.17, 44.4, 35.5
 21 | p_E, u_E, sd_E = 0.05, 4.9, 1.04
 22 | p_A = [0.805, 0.084, 0.072, 0.029, 0.010] # new discritization
 23 | range_a = [0, 1, 2, 3, 4]
 24 | 
 25 | ##########################################
 26 | # left to right: t-4, .. , t-1
 27 | coefficients = [-0.008     ,  0.106     , -0.481     ,  1.171     ,  # glucose
 28 |           0.008     ,  -0.004     ,  0.08      ,  0.23      ,  # diet
 29 |           0.009     , -1.542     , 3.097     , -3.489     ,  # exercise
 30 |           -0.30402253, -2.02343638, -0.3310525 , -0.43941028] # action
 31 | 
 32 | def Glucose2Reward(gl, definition = 1):
 33 |     # Q: too sensitive?
 34 |     low_gl = 80
 35 |     high_gl = 140
 36 |     return np.select([gl>=high_gl, gl<=low_gl, low_gl<gl<high_gl], [-(gl-high_gl)**1.35/30, -(low_gl-gl)**2/30, 0])
 37 | 
 38 | ################################################################################################
 39 | ################################################################################################
 40 | 
 41 | def init_MDPs(T, N, sd_G = 3, seed = 0): 
 42 |     """
 43 |     Randomly initialize 
 44 |         1. G_t [0,..., T_true_lag];
 45 |         2. errors for G_t
 46 |         3. when to take how many diets/exercises [matters?]
 47 |     Outputs:
 48 |         init G_t and its future erroes; all D_t and E_t
 49 |     """
 50 |     rseed(seed); npseed(seed)
 51 |     true_lag = 4
 52 |     obs = np.zeros((3, T, N))  # [Gi, D, Ex]
 53 |     obs[0, :true_lag, :] = rnorm(init_u_G, init_sd_G, true_lag * N).reshape(true_lag, N)
 54 |     
 55 |     e_D = abs(rnorm(u_D, sd_D, T * N))
 56 |     e_E = abs(rnorm(u_E, sd_E, T * N))
 57 |     obs[1, :, :] = (rbin(1, p_D, T * N) * e_D).reshape((T, N))
 58 |     obs[2, :, :] = (rbin(1, p_E, T * N) * e_E).reshape((T, N))
 59 |     
 60 |     e_G = rnorm(0, sd_G, T * N).reshape((T, N))
 61 |     
 62 |     return obs, e_G
 63 | 
 64 | 
 65 | 
 66 | def useful_obs(obs, actions, t):
 67 |     true_lag = 4
 68 |     r = np.vstack([
 69 |         obs[0, (t - true_lag):t, :], obs[1, (t - true_lag):t, :],
 70 |         obs[2, (t - true_lag):t, :], actions[(t - true_lag):t, :]])
 71 |     return r
 72 | 
 73 | def next_obs(tran_mat, useful_last_obs, e_G, t):
 74 |     return np.array(const).reshape((1, 1)) + tran_mat.dot(useful_last_obs) + np.array([e_G[t, :]])
 75 | 
 76 | 
 77 | ################################################################################################
 78 | ################################################################################################
 79 | 
 80 | def simu_Ohio(T = 5, N = 2, seed = 1, sd_G = 5.5, matrix_output = False, is_real = False):
 81 |     """ Simulate N patient trajectories with length T, calibrated from the Ohio dataset.
 82 |     """
 83 |     tran_mat = np.expand_dims(arr(coefficients), 0)
 84 | 
 85 |     # Initialization
 86 |     if is_real:
 87 |         obs, e_G = init_MDPs_real(T = T, N = N, sd_G = sd_G, seed = seed)
 88 |     else:
 89 |         obs, e_G = init_MDPs(T = T, N = N, sd_G = sd_G, seed = seed)
 90 |     rseed(seed); npseed(seed)
 91 |     actions = np.random.choice(range(len(p_A)), size=T * N, p=p_A).reshape((T, N))
 92 |     # Transition
 93 |     for t in range(4, T):
 94 |         useful_last_obs = useful_obs(obs = obs, actions = actions, t = t)
 95 |         obs[0, t, :] = next_obs(tran_mat = tran_mat, useful_last_obs = useful_last_obs, e_G = e_G, t = t)
 96 |     
 97 |     if matrix_output: # for eval_Ohio_policy below
 98 |         return obs, actions
 99 |     # Collection
100 |     MDPs = []
101 |     for i in range(N):
102 |         MDPs.append([obs[:, :, i],
103 |                      actions[:, i]])
104 |     s_a = [[a[0].T, np.array(a[1]).reshape(T, 1), ] for a in MDPs]
105 |     MDPs = [[a[0], a[1], np.roll(apply_v(Glucose2Reward, a[0][:, 0].reshape(-1, 1)), shift = -1).reshape(-1, 1)] 
106 |             for a in s_a]
107 |     return  MDPs # a list of [obs = T * 3, actions, rewards]
108 | 
109 | ################################################################################################
110 | # Funcs for Simu Ohio Values
111 | ################################################################################################
112 | 
113 | def eval_Ohio_policy(Q_func, J_Q, T, N, J_upper, sd_G = 0, gamma = 0.9, debug = 0, seed = 0):
114 |     """ Evaluate the value of a policy in simulation.
115 |     
116 |     Randomly the first four time points，
117 |     and then follow the simulation model until T = 10, 
118 |     and then begin to use policy and collect rewards:
119 |     
120 |     1. choosing actions following Q, 
121 |     2. trans following the environment
122 |     3. collecting rewards.
123 |     """
124 |     policy = Estpolicy(Q_func, range_a)
125 |     tran_mat = np.expand_dims(arr(coefficients), 0)
126 |     
127 |     ### Initialize the first 10
128 |     true_lag = 4
129 |     init_obs, init_A = simu_Ohio(T = 10, N = N, seed = 0, sd_G = sd_G, matrix_output = True)
130 |     obs, e_G = init_MDPs(T = T, N = N, sd_G = sd_G, seed = seed)
131 |     obs[:, :10, :] = init_obs
132 |     actions = np.zeros((T, N)) # store previous actions
133 |     actions[:10, :] = init_A
134 |     
135 |     rseed(seed); npseed(seed)
136 |     dim_obs = obs.shape[0]
137 |     for t in range(J_upper, T): 
138 |         # next observations: based on ...,t-1, to decide t.
139 |         useful_last_obs = useful_obs(obs = obs, actions = actions, t = t)
140 |         obs[0, t, :] = next_obs(tran_mat = tran_mat, useful_last_obs = useful_last_obs, e_G = e_G, t = t)
141 |         
142 |         # choose actions based on status. obs = [3, T, N]      
143 |         s = ObsAct2State(obs, actions, t, J_Q, multiple_N = True) # dim * N
144 |         A_t = policy(s.T).T # s [N * dx] -> actions [N * 1] -> 1 * N
145 |         actions[t, :] = A_t
146 |                 
147 |     # collect rewards
148 |     Values = est_values(obs, gamma = gamma, init_T = J_upper)
149 |     return Values
150 | 
151 | def est_values(obs, gamma = 0.9, init_T = 10):
152 |     """ Tool to calculate culmulative rewards from observation (glucose histroy)
153 |     Input: the observed trajectories (possibly based on the optimal policy)
154 |     3 * T * N
155 |     Output: the collected culmulative rewards
156 |     
157 |     init_T: when the glucose becomes stable
158 |     """
159 |     Values = []
160 |     N = obs.shape[2]
161 |     T = obs.shape[1]
162 |     for i in range(N):
163 |         rewards = np.roll(apply_v(Glucose2Reward, obs[0, init_T:, i]), shift = -1).reshape(-1, 1)
164 |         est_Value = np.round(cum_r(rewards, gamma), 3)
165 |         Values.append(est_Value[0])
166 |     return Values
167 | 
168 | 
169 | def init_MDPs_real(T, N, sd_G, seed = 0): # version of new -> bad simu results
170 |     """
171 |     Randomly initialize 
172 |         1. G_t [0,..., T_true_lag];
173 |         2. errors for G_t
174 |         3. when to take how many diets/exercises [matters?]
175 |     Outputs:
176 |         init G_t and its future erroes; all D_t and E_t
177 |     """
178 |     rseed(seed); npseed(seed)
179 |     true_lag = 4
180 |     obs = np.zeros((3, T, N))  # [Gi, D, Ex]
181 |     e_D = abs(rnorm(u_D, sd_D, T * N))
182 |     e_E = abs(rnorm(u_E, sd_E, T * N))
183 |     e_G = rnorm(0, sd_G, T * N).reshape((T, N))
184 |     
185 |     obs[0, :true_lag, :] = rnorm(init_u_G, init_sd_G, true_lag * N).reshape(true_lag, N)
186 |     obs[1, :, :] = (rbin(1, p_D, T * N) * e_D).reshape((T, N))
187 |     obs[2, :, :] = (rbin(1, p_E, T * N) * e_E).reshape((T, N))
188 |     
189 |     return obs, e_G


--------------------------------------------------------------------------------
/experiment_script/Ohio_simu_testing.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os, sys
  4 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  5 | 
  6 | sys.path.insert(0, package_path + "/test_func")
  7 | from _core_test_fun import *
  8 | 
  9 | sys.path.insert(0, package_path + "/experiment_func")
 10 | from _DGP_Ohio import *
 11 | 
 12 | os.environ["OMP_NUM_THREADS"] = "1"
 13 | #####################################
 14 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 
 15 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 
 16 | # the difference with standard cross-validation is negligible and will not affect our findings.
 17 | #####################################
 18 | 
 19 | def one_time(seed = 1, J = 1, J_upper = 10,
 20 |                      N = 30, T = 2 * 24, B = 200, Q = 10, sd_G = 5,
 21 |                      gamma_NFQ = 0.95, 
 22 |                      T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4,
 23 |                      paras = "CV", n_trees = 200,
 24 |                      first_T = 10, 
 25 |                      do_eval = True):
 26 |     ## generate data
 27 |     data = simu_Ohio(T, N, seed = seed, sd_G = sd_G)
 28 |     data = burn_in(data,first_T)
 29 |     T -= first_T
 30 |     # for value evaluation [we will use the original transition], 
 31 |     # do not use normalized data[will not be dominated like testing]
 32 |     value_data = data
 33 |     testing_data = [a[:2] for a in normalize(data)]
 34 |     ## this one time is used to get paras
 35 |     if paras == "CV_once": 
 36 |         return lam_est(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees)
 37 |     time = now()
 38 |     p_value = test(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees, print_time = False, method = "QRF")
 39 |     if seed % 100 == 0:
 40 |         print("** testing time:", now() - time, " for seed = ", seed,"**"); time = now()
 41 |     
 42 |     if do_eval: # for the currect J, get data, learn a function, and evaluate via simulations
 43 |         Learning_PatternSets = MDP2Trans(MDPs = value_data, J = J, action_in_states = True)
 44 |         Q_func = NFQ(PatternSets = Learning_PatternSets, gamma = gamma_NFQ,
 45 |                      RF_paras = paras, n_trees = n_trees, threshold = thre_eval)
 46 |         J_values = eval_Ohio_policy(Q_func = Q_func, J_Q = J, J_upper = J_upper,
 47 |                                    T = T_eval, gamma = gamma_eval, N = N_eval, 
 48 |                                    sd_G = sd_G, seed = 0)
 49 |         return [p_value, np.mean(J_values)] 
 50 |     else:
 51 |         return p_value
 52 | 
 53 | 
 54 | def one_setting_one_J(rep_times = 2, J = 1, J_upper = 5,
 55 |                       N = 20, T = 2 * 24, B = 2, Q = 10, sd_G = 5,
 56 |                       paras = "CV_once", n_trees = 20, 
 57 |                       init_seed = 0, do_eval = False, parallel = False, email = False):
 58 |     a = now()
 59 |     if paras == "CV_once":
 60 |         paras = one_time(seed = 0, J = J, J_upper = J_upper,
 61 |                          N = N, T = T, B = B, Q = Q,
 62 |                          sd_G = sd_G, 
 63 |                          paras = "CV_once", n_trees = n_trees,
 64 |                          do_eval = do_eval)   
 65 |         print("CV paras:",paras)
 66 |     
 67 |     def one_test(seed):
 68 |         return one_time(seed = seed, J = J, J_upper = J_upper,
 69 |                      N = N, T = T, B = B, Q = Q, sd_G = sd_G,
 70 |                      paras = paras, n_trees = n_trees, 
 71 |                      do_eval = do_eval)            
 72 |     if parallel:
 73 |         if rep_times == 500 and do_eval:
 74 |             r = []
 75 |             for i in range(5): # connection
 76 |                 r_i = parmap(one_test, range(init_seed + i * 100, init_seed + (i + 1) * 100), parallel)
 77 |                 print("the first", (i + 1) * 100, "reps in 500 reps Done: \n",
 78 |                       rej_rate([a[0] for a in r_i], [.1,.05,.01]), 
 79 |                      "\n with time cost: \n", now() - a)
 80 |                 r += r_i
 81 |         else:
 82 |             r = parmap(one_test, range(init_seed, init_seed + rep_times), parallel)
 83 |     else:
 84 |         r = rep_seeds_print(one_test,rep_times,init_seed)
 85 |     print("total testing time cost for one J:", np.round(now() - a,3),Dash)
 86 |     if do_eval:
 87 |         p_values = [a[0] for a in r]
 88 |         rej_rates = rej_rate(p_values, [.1,.05,.01])
 89 |         values = [a[1] for a in r]
 90 |         if email:
 91 |             send_email("J = " + str(J) + "with testing results: \n" + str(rej_rates) + \
 92 |                       "\n and values: \n" + str([np.mean(values), np.std(values)]))
 93 |         return rej_rates, np.round(np.mean(values),4), np.round(np.std(values),4)
 94 |     else:
 95 |         rej_rates = rej_rate(r, [.1,.05,.01])
 96 |         return rej_rates
 97 | 
 98 | def one_setting_mul_J(rep_times = 50, N = 30, T = 24 * 2, B = 200, Q = 10, sd_G = 5, 
 99 |                       paras = "CV_once", n_trees = 200, 
100 |                       init_seed = 0,
101 |                       file = None, email = False, J_low = 1, J_upper = 5, 
102 |                       do_eval = True, parallel = False, print_every_J = False):
103 |     J_rej_rates, J_values = [], []
104 |     true_lag = 4
105 |     ## Prepare log
106 |     setting = [N, T, sd_G]
107 |     email_contents = ""
108 |     email_setting = [rep_times, N, T, B, sd_G]
109 |     email_str = "rep_times, N, T, B, sd_G"
110 |     print(dash, "Setting running [N,T,sd_G]:", setting, dash, "\n")
111 |     if file is not None:
112 |         print(dash, "Setting running [N,T,sd_G]:", setting, dash, "\n", file = file)
113 |     ## Testing and value results for each J, with one true_lag
114 |     for J in range(J_low, J_upper + 1):
115 |         a = now()
116 |         rej_rates, mean_value, std_value = one_setting_one_J(rep_times = rep_times, J = J, J_upper = J_upper,
117 |                                                  N = N, T = T, B = B,Q=Q, sd_G = sd_G, 
118 |                                                  paras = paras, n_trees = n_trees, 
119 |                                                  init_seed = init_seed, 
120 |                                                  do_eval = do_eval, parallel = parallel)
121 | 
122 |         #### Store results
123 |         J_rej_rates.append(rej_rates)
124 |         J_values.append([mean_value, std_value]) # sd_over_500(mean_over_10)
125 |         
126 |         #### Prepare log
127 |         print_res = ' '.join(["\n", "Above: when true_lag = ",str(true_lag),
128 |               "and we do J = ", str(J), "testing",str(is_null(true_lag = true_lag, J = J)), 
129 |               "[supremum-based, integration-based]", "\n The average and std of values: \n",
130 |                               str([mean_value, std_value])])
131 |         print(print_res)
132 |         if file is not None:
133 |             print(print_res, file = file)
134 |         
135 |         print_time = ' '.join(["Time cost:", str(np.round( (now() - a)/60,2)), "mins","\n",DASH])
136 |         print(print_time)
137 |         if file is not None:
138 |             print(print_time, file = file)
139 |         
140 |         log = "12_16, lag4 OLS AWS" + ", init_seed - " + str(init_seed) + "\n" + dash + "\n"
141 |         email_this_J = email_str + "\n" + str(email_setting)+ '; J=' + str(J) + ' DONE!\n' \
142 |             +'alpha = [.1,.05,.01]' + ', [supremum-based, integration-based] \n' + str(rej_rates) + "\n" \
143 |             + str([mean_value, std_value]) + "\n"
144 |         email_contents += dash + "\n" + email_this_J
145 |         if print_every_J:
146 |             print(J_rej_rates, DASH, J_values)
147 |     ## Final printing out
148 |     if email:
149 |         send_email(log + email_contents)
150 |     if  file is not None: # print latex
151 |         latex_ohio_one_T_sd_G_mul_j(J_rej_rates,file)
152 |         
153 |     return J_rej_rates, J_values
154 | 
155 | print("Import DONE!")
156 | 
157 | 
158 | rr = []
159 | for N in [10, 15, 20]:
160 |     r = one_setting_mul_J(rep_times = 500, N = N, T = 7 * 8 * 24, sd_G = 3,
161 |                   paras = "CV_once", n_trees = 100, 
162 |                   B = 100, init_seed = 0, 
163 |                   J_low = 1, J_upper = 10, 
164 |                   do_eval = False, parallel = n_cores, print_every_J = True)
165 |     print(r)
166 |     rr.append(r)
167 |     print(rr)
168 | 


--------------------------------------------------------------------------------
/experiment_script/.ipynb_checkpoints/Ohio_simu_testing-checkpoint.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os, sys
  4 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  5 | 
  6 | sys.path.insert(0, package_path + "/test_func")
  7 | from _core_test_fun import *
  8 | 
  9 | sys.path.insert(0, package_path + "/experiment_func")
 10 | from _DGP_Ohio import *
 11 | 
 12 | os.environ["OMP_NUM_THREADS"] = "1"
 13 | #####################################
 14 | # To reduce computational cost, in our experiment, we use the “CV_once” option, which means we only do cross-validation in the 1st replication, 
 15 | # and use the chosen parameters in the remaining replications. With small-scale experiments, 
 16 | # the difference with standard cross-validation is negligible and will not affect our findings.
 17 | #####################################
 18 | 
 19 | def one_time(seed = 1, J = 1, J_upper = 10,
 20 |                      N = 30, T = 2 * 24, B = 200, Q = 10, sd_G = 5,
 21 |                      gamma_NFQ = 0.95, 
 22 |                      T_eval = 60, N_eval = 100, gamma_eval = 0.9, thre_eval = 1e-4,
 23 |                      paras = "CV", n_trees = 200,
 24 |                      first_T = 10, 
 25 |                      do_eval = True):
 26 |     ## generate data
 27 |     data = simu_Ohio(T, N, seed = seed, sd_G = sd_G)
 28 |     data = burn_in(data,first_T)
 29 |     T -= first_T
 30 |     # for value evaluation [we will use the original transition], 
 31 |     # do not use normalized data[will not be dominated like testing]
 32 |     value_data = data
 33 |     testing_data = [a[:2] for a in normalize(data)]
 34 |     ## this one time is used to get paras
 35 |     if paras == "CV_once": 
 36 |         return lam_est(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees)
 37 |     time = now()
 38 |     p_value = test(data = testing_data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees, print_time = False, method = "QRF")
 39 |     if seed % 100 == 0:
 40 |         print("** testing time:", now() - time, " for seed = ", seed,"**"); time = now()
 41 |     
 42 |     if do_eval: # for the currect J, get data, learn a function, and evaluate via simulations
 43 |         Learning_PatternSets = MDP2Trans(MDPs = value_data, J = J, action_in_states = True)
 44 |         Q_func = NFQ(PatternSets = Learning_PatternSets, gamma = gamma_NFQ,
 45 |                      RF_paras = paras, n_trees = n_trees, threshold = thre_eval)
 46 |         J_values = eval_Ohio_policy(Q_func = Q_func, J_Q = J, J_upper = J_upper,
 47 |                                    T = T_eval, gamma = gamma_eval, N = N_eval, 
 48 |                                    sd_G = sd_G, seed = 0)
 49 |         return [p_value, np.mean(J_values)] 
 50 |     else:
 51 |         return p_value
 52 | 
 53 | 
 54 | def one_setting_one_J(rep_times = 2, J = 1, J_upper = 5,
 55 |                       N = 20, T = 2 * 24, B = 2, Q = 10, sd_G = 5,
 56 |                       paras = "CV_once", n_trees = 20, 
 57 |                       init_seed = 0, do_eval = False, parallel = False, email = False):
 58 |     a = now()
 59 |     if paras == "CV_once":
 60 |         paras = one_time(seed = 0, J = J, J_upper = J_upper,
 61 |                          N = N, T = T, B = B, Q = Q,
 62 |                          sd_G = sd_G, 
 63 |                          paras = "CV_once", n_trees = n_trees,
 64 |                          do_eval = do_eval)   
 65 |         print("CV paras:",paras)
 66 |     
 67 |     def one_test(seed):
 68 |         return one_time(seed = seed, J = J, J_upper = J_upper,
 69 |                      N = N, T = T, B = B, Q = Q, sd_G = sd_G,
 70 |                      paras = paras, n_trees = n_trees, 
 71 |                      do_eval = do_eval)            
 72 |     if parallel:
 73 |         if rep_times == 500 and do_eval:
 74 |             r = []
 75 |             for i in range(5): # connection
 76 |                 r_i = parmap(one_test, range(init_seed + i * 100, init_seed + (i + 1) * 100), parallel)
 77 |                 print("the first", (i + 1) * 100, "reps in 500 reps Done: \n",
 78 |                       rej_rate([a[0] for a in r_i], [.1,.05,.01]), 
 79 |                      "\n with time cost: \n", now() - a)
 80 |                 r += r_i
 81 |         else:
 82 |             r = parmap(one_test, range(init_seed, init_seed + rep_times), parallel)
 83 |     else:
 84 |         r = rep_seeds_print(one_test,rep_times,init_seed)
 85 |     print("total testing time cost for one J:", np.round(now() - a,3),Dash)
 86 |     if do_eval:
 87 |         p_values = [a[0] for a in r]
 88 |         rej_rates = rej_rate(p_values, [.1,.05,.01])
 89 |         values = [a[1] for a in r]
 90 |         if email:
 91 |             send_email("J = " + str(J) + "with testing results: \n" + str(rej_rates) + \
 92 |                       "\n and values: \n" + str([np.mean(values), np.std(values)]))
 93 |         return rej_rates, np.round(np.mean(values),4), np.round(np.std(values),4)
 94 |     else:
 95 |         rej_rates = rej_rate(r, [.1,.05,.01])
 96 |         return rej_rates
 97 | 
 98 | def one_setting_mul_J(rep_times = 50, N = 30, T = 24 * 2, B = 200, Q = 10, sd_G = 5, 
 99 |                       paras = "CV_once", n_trees = 200, 
100 |                       init_seed = 0,
101 |                       file = None, email = False, J_low = 1, J_upper = 5, 
102 |                       do_eval = True, parallel = False, print_every_J = False):
103 |     J_rej_rates, J_values = [], []
104 |     true_lag = 4
105 |     ## Prepare log
106 |     setting = [N, T, sd_G]
107 |     email_contents = ""
108 |     email_setting = [rep_times, N, T, B, sd_G]
109 |     email_str = "rep_times, N, T, B, sd_G"
110 |     print(dash, "Setting running [N,T,sd_G]:", setting, dash, "\n")
111 |     if file is not None:
112 |         print(dash, "Setting running [N,T,sd_G]:", setting, dash, "\n", file = file)
113 |     ## Testing and value results for each J, with one true_lag
114 |     for J in range(J_low, J_upper + 1):
115 |         a = now()
116 |         rej_rates, mean_value, std_value = one_setting_one_J(rep_times = rep_times, J = J, J_upper = J_upper,
117 |                                                  N = N, T = T, B = B,Q=Q, sd_G = sd_G, 
118 |                                                  paras = paras, n_trees = n_trees, 
119 |                                                  init_seed = init_seed, 
120 |                                                  do_eval = do_eval, parallel = parallel)
121 | 
122 |         #### Store results
123 |         J_rej_rates.append(rej_rates)
124 |         J_values.append([mean_value, std_value]) # sd_over_500(mean_over_10)
125 |         
126 |         #### Prepare log
127 |         print_res = ' '.join(["\n", "Above: when true_lag = ",str(true_lag),
128 |               "and we do J = ", str(J), "testing",str(is_null(true_lag = true_lag, J = J)), 
129 |               "[supremum-based, integration-based]", "\n The average and std of values: \n",
130 |                               str([mean_value, std_value])])
131 |         print(print_res)
132 |         if file is not None:
133 |             print(print_res, file = file)
134 |         
135 |         print_time = ' '.join(["Time cost:", str(np.round( (now() - a)/60,2)), "mins","\n",DASH])
136 |         print(print_time)
137 |         if file is not None:
138 |             print(print_time, file = file)
139 |         
140 |         log = "12_16, lag4 OLS AWS" + ", init_seed - " + str(init_seed) + "\n" + dash + "\n"
141 |         email_this_J = email_str + "\n" + str(email_setting)+ '; J=' + str(J) + ' DONE!\n' \
142 |             +'alpha = [.1,.05,.01]' + ', [supremum-based, integration-based] \n' + str(rej_rates) + "\n" \
143 |             + str([mean_value, std_value]) + "\n"
144 |         email_contents += dash + "\n" + email_this_J
145 |         if print_every_J:
146 |             print(J_rej_rates, DASH, J_values)
147 |     ## Final printing out
148 |     if email:
149 |         send_email(log + email_contents)
150 |     if  file is not None: # print latex
151 |         latex_ohio_one_T_sd_G_mul_j(J_rej_rates,file)
152 |         
153 |     return J_rej_rates, J_values
154 | 
155 | print("Import DONE!")
156 | 
157 | 
158 | rr = []
159 | for N in [10, 15, 20]:
160 |     r = one_setting_mul_J(rep_times = 500, N = N, T = 7 * 8 * 24, sd_G = 3,
161 |                   paras = "CV_once", n_trees = 100, 
162 |                   B = 100, init_seed = 0, 
163 |                   J_low = 1, J_upper = 10, 
164 |                   do_eval = False, parallel = n_cores, print_every_J = True)
165 |     print(r)
166 |     rr.append(r)
167 |     print(rr)
168 | 


--------------------------------------------------------------------------------
/test_func/.ipynb_checkpoints/_utility_RL-checkpoint.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | """
  5 | ##########################################################################
  6 | from ._utility import *
  7 | from ._uti_basic import *
  8 | ##########################################################################
  9 | param_grid = {'max_depth': [2, 6, 10], 'min_samples_leaf': [5, 10, 20]}
 10 | n_jobs = multiprocessing.cpu_count() 
 11 | ##########################################################################
 12 | def change_rate(y_old, y_new):
 13 |     return norm(y_old - y_new)**2 / norm(y_old)**2
 14 | 
 15 | def flatten(l): 
 16 |     # list of sublist -> list
 17 |     return [item for sublist in l for item in sublist]
 18 | 
 19 | def cum_r(rewards, gamma):
 20 |     """ rewards -> culmulative reward
 21 |     """
 22 |     return sum(
 23 |         map(operator.mul, [gamma ** j for j in range(len(rewards))], rewards))
 24 | cum_rewards = cum_r
 25 | ##########################################################################
 26 | 
 27 | #%% Prepare training data for the Fitted-Q: based on state (mul-J) transition and observed rewards
 28 | 
 29 | def ObsAct2State(obs, actions, t, J, multiple_N = False):
 30 |     """ Based on our discussion on 12/03, to form a lag-J states from history obs and A
 31 |     For RL purpose. The testing part is clear.
 32 |     To make A_t, we need to define S_t, which is (with lag-J) (e.g., when lag-1, S_t+1 only depneds on X_t and A_t): 
 33 |     O_(t-J + 1), A_(t - J+1), ..., O_t
 34 |     """
 35 |     if not multiple_N:
 36 |         if J == 1:
 37 |             s = obs[t, :].ravel(order='C')
 38 |         else:
 39 |             s = np.hstack([obs[(t - J + 1): t, :], actions[(t - J + 1):t]]).ravel(order='C')
 40 |             s = np.append(s, obs[t, :].ravel())
 41 |         return s
 42 |     else: # obs: 3 * T * N
 43 |         N = obs.shape[2]
 44 |         dim_obs = 3
 45 |         if J == 1:
 46 |             s = obs[:, t, :]
 47 |         else: # target: (4 * J_Q - 1) * N
 48 |             s = np.vstack(([
 49 |                 obs[:, (t - J + 1 ):t, :],
 50 |                 actions[(t - J + 1):t, :].reshape((1, J - 1, N))])) # extend_dim for first one
 51 |             s = s.reshape(((dim_obs + 1) * (J - 1), N), order = 'F')
 52 |             obs_0 = obs[:, t, :] # 3 * N
 53 |             s = np.vstack([s, obs_0])
 54 |         return s # dim * N
 55 |             
 56 | 
 57 | 
 58 | def MDP2Trans(MDPs, J, action_in_states = False, combined = True):
 59 |     """
 60 |     Input: a list (len-N) of trajectory [state matrix [T * 3], actions, rewards] - I need to modify evaluate.ipynb
 61 |     Output: a list of (s,a,s',u) (combined together)
 62 |     """
 63 |     def MDP2Trans_one_traj(i):
 64 |         obs, actions, utilities = MDPs[i]
 65 |         T = obs.shape[0]
 66 |         result = []
 67 |         for t in range(J - 1, T - 1):
 68 |             s = ObsAct2State(obs, actions, t, J)
 69 |             ss = ObsAct2State(obs, actions, t + 1, J)
 70 |             
 71 |             a = actions[t]
 72 |             u = utilities[t]
 73 |             result.append([s, a, ss, u])
 74 |         return result
 75 |     r = rep_seeds(MDP2Trans_one_traj, len(MDPs) - 1)
 76 |     if combined:
 77 |         return flatten(r) # put every patient together; not into a metrix
 78 |     else:
 79 |         return r
 80 | 
 81 | ##########################################################################
 82 | """ Fitted Q
 83 | 1. fit (x,a) -> q(x,a)
 84 | 2. update q(a,x) = max_{a'}(q(a',x')) + gamma * r  # (x',r) is observed
 85 | """
 86 | ##########################################################################
 87 | # %% Main functions for Fitted-Q
 88 | def NFQ(PatternSets, gamma, RF_paras = [3,20], n_trees = 200, threshold = 1e-5, initialize = "mine"):
 89 |     """ Learn optimal Q function from batch data (RF + fitted-Q)
 90 |     Input: a list of (s,a,s',u)
 91 |     Output: Q function
 92 |     """
 93 |     rseed(0); npseed(0)
 94 |     ### Preparing training data
 95 |     s, a, ss, r = [np.array([a[i] for a in PatternSets]) for i in range(4)]
 96 |     a = np.array([a[1] for a in PatternSets]).reshape((-1, 1))
 97 |     range_a = np.unique(a)
 98 |     x_train = np.hstack((s, a))
 99 |     
100 |     ### Initialization
101 |     init_y = r * (1 / (1 - gamma)) # based on the series result
102 |     is_CV = False
103 |     if RF_paras == "CV": 
104 |         rseed(0); npseed(0)
105 |         is_CV = True
106 |         rfqr = RF(random_state = 0, n_estimators = n_trees)
107 |         gd = GridSearchCV(estimator = rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0)
108 |         gd.fit(x_train, init_y.ravel())
109 |         RF_paras = gd.best_params_
110 |         RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']]
111 |     
112 |     rseed(0); npseed(0)
113 |     max_depth, min_samples_leaf = RF_paras
114 |     Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf =   
115 |            min_samples_leaf, n_jobs = n_jobs, 
116 |            verbose = 0) 
117 |     Q.fit(x_train, init_y.ravel())
118 |     
119 |     ### Iterations
120 |     y_old = init_y.copy()
121 |     # update the estimated Q
122 |     rep, epsilon = 0, 100
123 |     while(epsilon > threshold and rep < 100): # 200 before
124 |         rseed(0); npseed(0)
125 |         y_train = UpdatedValues(ss, range_a, r, Q, gamma)
126 |         epsilon = change_rate( y_old = y_old, y_new = y_train)
127 |         Q.fit(x_train, y_train.ravel()) 
128 |         y_old = y_train.copy()
129 |         rep += 1
130 |     return Q
131 | 
132 | 
133 | def UpdatedValues(ss, range_a, r, Q, gamma):
134 |     """ Update the estimated optimal v(s,a) with the fitted Q function
135 |     Input: 
136 |         PatternSets = a list of (s,a,s',r), Q
137 |         ss0, ss1: (s', 0), (s', 1) --- just for lasy
138 |         r: observed rewards
139 |         Q: for values at next states
140 |     Output: ((s,a),v), where v = r + gamma * max_a' Q(s',a'); 0/1 action in this example.
141 |     """
142 |     v_as = []
143 |     N = ss.shape[0]
144 |     for a in range_a:
145 |         ss_a = np.hstack((ss, np.ones((N, 1)) * a ))
146 |         v_a = Q.predict(ss_a)
147 |         v_as.append(v_a.reshape(N, 1))
148 |     v_max = np.amax(np.hstack(v_as), 1)
149 |     Q_new = r.reshape(N, 1) + gamma * v_max.reshape(N, 1)
150 |     return Q_new
151 | 
152 | 
153 | def Estpolicy(Q_func, range_a):
154 |     """ Q function to Policy
155 |     Input:
156 |         Q-function and the range of available actions
157 |     Output:
158 |         The optimal action policy  (discrete) at this state [given a state, output an action]
159 |     """
160 |     def policy(s, debug = 0): 
161 |         """
162 |         Input: s [N * dx]
163 |         Output: actions [N * 1]
164 |         """   
165 |         rseed(0); npseed(0)
166 |         N  = s.shape[0]
167 |         v_as = []
168 |         for a in range_a:
169 |             s_a = np.hstack([s,np.repeat(a, N).reshape(-1,1)])
170 |             v_a = Q_func.predict(s_a)
171 |             v_as.append(v_a.reshape(-1, 1))
172 |         v_as = np.round(np.hstack(v_as), 4)
173 |         actions = np.array([range_a[i] for i in np.argmax(v_as, 1)]).reshape(-1, 1)
174 |         if debug == 1:
175 |             print(v_as - v_as[:,1].reshape(-1,1), DASH, actions)
176 |         return actions
177 | 
178 |     return policy
179 | 
180 | ##########################################################################
181 | ##########################################################################
182 | def UpdatedValues_eval(ss, policy, J, r, Q, gamma):
183 |     """ Version of 1-step forward in Evaluations
184 |     """
185 |     dx = ss.shape[1]
186 |     sss = ss[:,(dx - (4 * J - 1)):dx]
187 |     As = policy(sss)
188 |     sa = np.hstack([ss,As])
189 |     return gamma * Q.predict(sa).reshape(-1,1) + r.reshape(-1,1)
190 | 
191 | def FQE(PatternSets, Q_func, J, gamma = 0.9, RF_paras = [3, 20], n_trees = 200, 
192 |                          threshold = 1e-4):
193 |     """ 
194 |     Fitted-Q Evaluation for off-policy evaluation (OPE) in REAL DATA
195 |         
196 |         1. fit RF q: (x,a) -> value
197 |         2. update the value function of policy:
198 |             q_policy(x, a) = gamma * q(x', policy(x'[, (dx - J): dx])) + r
199 |             
200 |     3. q_policy(x, x[, (dx - J): dx])
201 |     
202 |     Input: 
203 |         PatternSets: a list of (s, a, s', u) [have been transformed]
204 |         
205 |     Output: V function
206 | 
207 |     """
208 |     rseed(0); npseed(0)
209 |     
210 |     # Preparing training data
211 |     s_bef, a_bef, ss_bef, r_bef = [np.array([a[i] for a in PatternSets]) for i in range(4)]
212 |     a_bef = a_bef.reshape(-1, 1)
213 |     range_a = np.unique(a_bef)
214 |     
215 |     policy = Estpolicy(Q_func, range_a)
216 |     time = now()
217 |     
218 |     dx = s_bef.shape[1]    
219 |     s1 = s_bef[:,(dx - (4 * J - 1)):dx].copy()
220 |     As = policy(s1)
221 |     selected = (As == a_bef)
222 | 
223 |     s2, a2, ss2, r2 = [], [], [], []
224 |     for i in range(s_bef.shape[0]):
225 |         if selected[i, 0]:
226 |             s2.append(s_bef[i,])
227 |             a2.append(a_bef[i,])
228 |             ss2.append(ss_bef[i,])
229 |             r2.append(r_bef[i,])
230 |     s, a, ss, r = np.vstack(s2).copy(), np.vstack(a2).copy(), np.vstack(ss2).copy(), np.vstack(r2).copy()
231 |     
232 |     
233 |     ### Initialization
234 |     x_train = np.hstack((s, a))
235 |     init_y = r * (1 / (1 - gamma))
236 |     if RF_paras == "CV":
237 |         rseed(0); npseed(0)
238 |         rfqr = RF(random_state = 0, n_estimators = n_trees)
239 |         gd = GridSearchCV(estimator=rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0)
240 |         gd.fit(x_train, init_y.ravel())
241 |         RF_paras = gd.best_params_
242 |         RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']]
243 |             
244 |     max_depth, min_samples_leaf = RF_paras
245 |     rseed(0); npseed(0)
246 |     Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf =   
247 |            min_samples_leaf, n_jobs = n_jobs, verbose = 0) 
248 |     Q.fit(x_train, init_y.ravel())
249 | 
250 |     y_old = init_y.copy()
251 |     # evaluate the policy policy
252 |     rep, epsilon = 0, 100
253 |     while(epsilon > threshold and rep < 100):
254 |         rseed(0); npseed(0)
255 |         y_train = UpdatedValues_eval(ss, policy, J, r, Q, gamma) # too slow [?]
256 |         y_train = np.round(y_train, 6)
257 |         epsilon = change_rate( y_old = y_old, y_new = y_train)
258 |         Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf =   
259 |            min_samples_leaf, n_jobs = n_jobs, verbose = 0)
260 |         Q.fit(x_train, y_train.ravel())  # regression function: (s,a) -> v
261 | 
262 |         y_old = y_train.copy()
263 |         rep += 1
264 |             
265 |     def V_func(s):
266 |         dx = s.shape[1]
267 |         a = policy(s[:,(dx - (4 * J - 1)):dx]).reshape(-1,1)
268 |         return Q.predict(np.hstack([s,a]))
269 |     
270 |     return V_func
271 | 
272 | 
273 | 
274 | 


--------------------------------------------------------------------------------
/experiment_func/_utility_RL.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | ##########################################################################
  4 | import os, sys
  5 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  6 | 
  7 | sys.path.insert(0, package_path + "/test_func")
  8 | from _core_test_fun import *
  9 | ##########################################################################
 10 | param_grid = {'max_depth': [2, 6, 10], 'min_samples_leaf': [5, 10, 20]}
 11 | n_jobs = multiprocessing.cpu_count() 
 12 | ##########################################################################
 13 | def change_rate(y_old, y_new):
 14 |     return norm(y_old - y_new)**2 / norm(y_old)**2
 15 | 
 16 | def flatten(l): 
 17 |     # list of sublist -> list
 18 |     return [item for sublist in l for item in sublist]
 19 | 
 20 | def cum_r(rewards, gamma):
 21 |     """ rewards -> culmulative reward
 22 |     """
 23 |     return sum(
 24 |         map(operator.mul, [gamma ** j for j in range(len(rewards))], rewards))
 25 | cum_rewards = cum_r
 26 | ##########################################################################
 27 | 
 28 | #%% Prepare training data for the Fitted-Q: based on state (mul-J) transition and observed rewards
 29 | 
 30 | def ObsAct2State(obs, actions, t, J, multiple_N = False):
 31 |     """ Based on our discussion on 12/03, to form a lag-J states from history obs and A
 32 |     For RL purpose. The testing part is clear.
 33 |     To make A_t, we need to define S_t, which is (with lag-J) (e.g., when lag-1, S_t+1 only depneds on X_t and A_t): 
 34 |     O_(t-J + 1), A_(t - J+1), ..., O_t
 35 |     """
 36 |     if not multiple_N:
 37 |         if J == 1:
 38 |             s = obs[t, :].ravel(order='C')
 39 |         else:
 40 |             s = np.hstack([obs[(t - J + 1): t, :], actions[(t - J + 1):t]]).ravel(order='C')
 41 |             s = np.append(s, obs[t, :].ravel())
 42 |         return s
 43 |     else: # obs: 3 * T * N
 44 |         N = obs.shape[2]
 45 |         dim_obs = 3
 46 |         if J == 1:
 47 |             s = obs[:, t, :]
 48 |         else: # target: (4 * J_Q - 1) * N
 49 |             s = np.vstack(([
 50 |                 obs[:, (t - J + 1 ):t, :],
 51 |                 actions[(t - J + 1):t, :].reshape((1, J - 1, N))])) # extend_dim for first one
 52 |             s = s.reshape(((dim_obs + 1) * (J - 1), N), order = 'F')
 53 |             obs_0 = obs[:, t, :] # 3 * N
 54 |             s = np.vstack([s, obs_0])
 55 |         return s # dim * N
 56 |             
 57 | 
 58 | 
 59 | def MDP2Trans(MDPs, J, action_in_states = False, combined = True):
 60 |     """
 61 |     Input: a list (len-N) of trajectory [state matrix [T * 3], actions, rewards] - I need to modify evaluate.ipynb
 62 |     Output: a list of (s,a,s',u) (combined together)
 63 |     """
 64 |     def MDP2Trans_one_traj(i):
 65 |         obs, actions, utilities = MDPs[i]
 66 |         T = obs.shape[0]
 67 |         result = []
 68 |         for t in range(J - 1, T - 1):
 69 |             s = ObsAct2State(obs, actions, t, J)
 70 |             ss = ObsAct2State(obs, actions, t + 1, J)
 71 |             
 72 |             a = actions[t]
 73 |             u = utilities[t]
 74 |             result.append([s, a, ss, u])
 75 |         return result
 76 |     r = rep_seeds(MDP2Trans_one_traj, len(MDPs) - 1)
 77 |     if combined:
 78 |         return flatten(r) # put every patient together; not into a metrix
 79 |     else:
 80 |         return r
 81 | 
 82 | ##########################################################################
 83 | """ Fitted Q
 84 | 1. fit (x,a) -> q(x,a)
 85 | 2. update q(a,x) = max_{a'}(q(a',x')) + gamma * r  # (x',r) is observed
 86 | """
 87 | ##########################################################################
 88 | # %% Main functions for Fitted-Q
 89 | def NFQ(PatternSets, gamma, RF_paras = [3,20], n_trees = 200, threshold = 1e-5, initialize = "mine"):
 90 |     """ Learn optimal Q function from batch data (RF + fitted-Q)
 91 |     Input: a list of (s,a,s',u)
 92 |     Output: Q function
 93 |     """
 94 |     rseed(0); npseed(0)
 95 |     ### Preparing training data
 96 |     s, a, ss, r = [np.array([a[i] for a in PatternSets]) for i in range(4)]
 97 |     a = np.array([a[1] for a in PatternSets]).reshape((-1, 1))
 98 |     range_a = np.unique(a)
 99 |     x_train = np.hstack((s, a))
100 |     
101 |     ### Initialization
102 |     init_y = r * (1 / (1 - gamma)) # based on the series result
103 |     is_CV = False
104 |     if RF_paras == "CV": 
105 |         rseed(0); npseed(0)
106 |         is_CV = True
107 |         rfqr = RF(random_state = 0, n_estimators = n_trees)
108 |         gd = GridSearchCV(estimator = rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0)
109 |         gd.fit(x_train, init_y.ravel())
110 |         RF_paras = gd.best_params_
111 |         RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']]
112 |     
113 |     rseed(0); npseed(0)
114 |     max_depth, min_samples_leaf = RF_paras
115 |     Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf =   
116 |            min_samples_leaf, n_jobs = n_jobs, 
117 |            verbose = 0) 
118 |     Q.fit(x_train, init_y.ravel())
119 |     
120 |     ### Iterations
121 |     y_old = init_y.copy()
122 |     # update the estimated Q
123 |     rep, epsilon = 0, 100
124 |     while(epsilon > threshold and rep < 100): # 200 before
125 |         rseed(0); npseed(0)
126 |         y_train = UpdatedValues(ss, range_a, r, Q, gamma)
127 |         epsilon = change_rate( y_old = y_old, y_new = y_train)
128 |         Q.fit(x_train, y_train.ravel()) 
129 |         y_old = y_train.copy()
130 |         rep += 1
131 |     return Q
132 | 
133 | 
134 | def UpdatedValues(ss, range_a, r, Q, gamma):
135 |     """ Update the estimated optimal v(s,a) with the fitted Q function
136 |     Input: 
137 |         PatternSets = a list of (s,a,s',r), Q
138 |         ss0, ss1: (s', 0), (s', 1) --- just for lasy
139 |         r: observed rewards
140 |         Q: for values at next states
141 |     Output: ((s,a),v), where v = r + gamma * max_a' Q(s',a'); 0/1 action in this example.
142 |     """
143 |     v_as = []
144 |     N = ss.shape[0]
145 |     for a in range_a:
146 |         ss_a = np.hstack((ss, np.ones((N, 1)) * a ))
147 |         v_a = Q.predict(ss_a)
148 |         v_as.append(v_a.reshape(N, 1))
149 |     v_max = np.amax(np.hstack(v_as), 1)
150 |     Q_new = r.reshape(N, 1) + gamma * v_max.reshape(N, 1)
151 |     return Q_new
152 | 
153 | 
154 | def Estpolicy(Q_func, range_a):
155 |     """ Q function to Policy
156 |     Input:
157 |         Q-function and the range of available actions
158 |     Output:
159 |         The optimal action policy  (discrete) at this state [given a state, output an action]
160 |     """
161 |     def policy(s, debug = 0): 
162 |         """
163 |         Input: s [N * dx]
164 |         Output: actions [N * 1]
165 |         """   
166 |         rseed(0); npseed(0)
167 |         N  = s.shape[0]
168 |         v_as = []
169 |         for a in range_a:
170 |             s_a = np.hstack([s,np.repeat(a, N).reshape(-1,1)])
171 |             v_a = Q_func.predict(s_a)
172 |             v_as.append(v_a.reshape(-1, 1))
173 |         v_as = np.round(np.hstack(v_as), 4)
174 |         actions = np.array([range_a[i] for i in np.argmax(v_as, 1)]).reshape(-1, 1)
175 |         if debug == 1:
176 |             print(v_as - v_as[:,1].reshape(-1,1), DASH, actions)
177 |         return actions
178 | 
179 |     return policy
180 | 
181 | ##########################################################################
182 | ##########################################################################
183 | def UpdatedValues_eval(ss, policy, J, r, Q, gamma):
184 |     """ Version of 1-step forward in Evaluations
185 |     """
186 |     dx = ss.shape[1]
187 |     sss = ss[:,(dx - (4 * J - 1)):dx]
188 |     As = policy(sss)
189 |     sa = np.hstack([ss,As])
190 |     return gamma * Q.predict(sa).reshape(-1,1) + r.reshape(-1,1)
191 | 
192 | def FQE(PatternSets, Q_func, J, gamma = 0.9, RF_paras = [3, 20], n_trees = 200, 
193 |                          threshold = 1e-4):
194 |     """ 
195 |     Fitted-Q Evaluation for off-policy evaluation (OPE) in REAL DATA
196 |         
197 |         1. fit RF q: (x,a) -> value
198 |         2. update the value function of policy:
199 |             q_policy(x, a) = gamma * q(x', policy(x'[, (dx - J): dx])) + r
200 |             
201 |     3. q_policy(x, x[, (dx - J): dx])
202 |     
203 |     Input: 
204 |         PatternSets: a list of (s, a, s', u) [have been transformed]
205 |         
206 |     Output: V function
207 | 
208 |     """
209 |     rseed(0); npseed(0)
210 |     
211 |     # Preparing training data
212 |     s_bef, a_bef, ss_bef, r_bef = [np.array([a[i] for a in PatternSets]) for i in range(4)]
213 |     a_bef = a_bef.reshape(-1, 1)
214 |     range_a = np.unique(a_bef)
215 |     
216 |     policy = Estpolicy(Q_func, range_a)
217 |     time = now()
218 |     
219 |     dx = s_bef.shape[1]    
220 |     s1 = s_bef[:,(dx - (4 * J - 1)):dx].copy()
221 |     As = policy(s1)
222 |     selected = (As == a_bef)
223 | 
224 |     s2, a2, ss2, r2 = [], [], [], []
225 |     for i in range(s_bef.shape[0]):
226 |         if selected[i, 0]:
227 |             s2.append(s_bef[i,])
228 |             a2.append(a_bef[i,])
229 |             ss2.append(ss_bef[i,])
230 |             r2.append(r_bef[i,])
231 |     s, a, ss, r = np.vstack(s2).copy(), np.vstack(a2).copy(), np.vstack(ss2).copy(), np.vstack(r2).copy()
232 |     
233 |     
234 |     ### Initialization
235 |     x_train = np.hstack((s, a))
236 |     init_y = r * (1 / (1 - gamma))
237 |     if RF_paras == "CV":
238 |         rseed(0); npseed(0)
239 |         rfqr = RF(random_state = 0, n_estimators = n_trees)
240 |         gd = GridSearchCV(estimator=rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0)
241 |         gd.fit(x_train, init_y.ravel())
242 |         RF_paras = gd.best_params_
243 |         RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']]
244 |             
245 |     max_depth, min_samples_leaf = RF_paras
246 |     rseed(0); npseed(0)
247 |     Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf =   
248 |            min_samples_leaf, n_jobs = n_jobs, verbose = 0) 
249 |     Q.fit(x_train, init_y.ravel())
250 | 
251 |     y_old = init_y.copy()
252 |     # evaluate the policy policy
253 |     rep, epsilon = 0, 100
254 |     while(epsilon > threshold and rep < 100):
255 |         rseed(0); npseed(0)
256 |         y_train = UpdatedValues_eval(ss, policy, J, r, Q, gamma) # too slow [?]
257 |         y_train = np.round(y_train, 6)
258 |         epsilon = change_rate( y_old = y_old, y_new = y_train)
259 |         Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf =   
260 |            min_samples_leaf, n_jobs = n_jobs, verbose = 0)
261 |         Q.fit(x_train, y_train.ravel())  # regression function: (s,a) -> v
262 | 
263 |         y_old = y_train.copy()
264 |         rep += 1
265 |             
266 |     def V_func(s):
267 |         dx = s.shape[1]
268 |         a = policy(s[:,(dx - (4 * J - 1)):dx]).reshape(-1,1)
269 |         return Q.predict(np.hstack([s,a]))
270 |     
271 |     return V_func
272 | 
273 | 
274 | 
275 | 


--------------------------------------------------------------------------------
/experiment_func/.ipynb_checkpoints/_utility_RL-checkpoint.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | ##########################################################################
  4 | import os, sys
  5 | package_path = os.path.dirname(os.path.abspath(os.getcwd()))
  6 | 
  7 | sys.path.insert(0, package_path + "/test_func")
  8 | from _core_test_fun import *
  9 | ##########################################################################
 10 | param_grid = {'max_depth': [2, 6, 10], 'min_samples_leaf': [5, 10, 20]}
 11 | n_jobs = multiprocessing.cpu_count() 
 12 | ##########################################################################
 13 | def change_rate(y_old, y_new):
 14 |     return norm(y_old - y_new)**2 / norm(y_old)**2
 15 | 
 16 | def flatten(l): 
 17 |     # list of sublist -> list
 18 |     return [item for sublist in l for item in sublist]
 19 | 
 20 | def cum_r(rewards, gamma):
 21 |     """ rewards -> culmulative reward
 22 |     """
 23 |     return sum(
 24 |         map(operator.mul, [gamma ** j for j in range(len(rewards))], rewards))
 25 | cum_rewards = cum_r
 26 | ##########################################################################
 27 | 
 28 | #%% Prepare training data for the Fitted-Q: based on state (mul-J) transition and observed rewards
 29 | 
 30 | def ObsAct2State(obs, actions, t, J, multiple_N = False):
 31 |     """ Based on our discussion on 12/03, to form a lag-J states from history obs and A
 32 |     For RL purpose. The testing part is clear.
 33 |     To make A_t, we need to define S_t, which is (with lag-J) (e.g., when lag-1, S_t+1 only depneds on X_t and A_t): 
 34 |     O_(t-J + 1), A_(t - J+1), ..., O_t
 35 |     """
 36 |     if not multiple_N:
 37 |         if J == 1:
 38 |             s = obs[t, :].ravel(order='C')
 39 |         else:
 40 |             s = np.hstack([obs[(t - J + 1): t, :], actions[(t - J + 1):t]]).ravel(order='C')
 41 |             s = np.append(s, obs[t, :].ravel())
 42 |         return s
 43 |     else: # obs: 3 * T * N
 44 |         N = obs.shape[2]
 45 |         dim_obs = 3
 46 |         if J == 1:
 47 |             s = obs[:, t, :]
 48 |         else: # target: (4 * J_Q - 1) * N
 49 |             s = np.vstack(([
 50 |                 obs[:, (t - J + 1 ):t, :],
 51 |                 actions[(t - J + 1):t, :].reshape((1, J - 1, N))])) # extend_dim for first one
 52 |             s = s.reshape(((dim_obs + 1) * (J - 1), N), order = 'F')
 53 |             obs_0 = obs[:, t, :] # 3 * N
 54 |             s = np.vstack([s, obs_0])
 55 |         return s # dim * N
 56 |             
 57 | 
 58 | 
 59 | def MDP2Trans(MDPs, J, action_in_states = False, combined = True):
 60 |     """
 61 |     Input: a list (len-N) of trajectory [state matrix [T * 3], actions, rewards] - I need to modify evaluate.ipynb
 62 |     Output: a list of (s,a,s',u) (combined together)
 63 |     """
 64 |     def MDP2Trans_one_traj(i):
 65 |         obs, actions, utilities = MDPs[i]
 66 |         T = obs.shape[0]
 67 |         result = []
 68 |         for t in range(J - 1, T - 1):
 69 |             s = ObsAct2State(obs, actions, t, J)
 70 |             ss = ObsAct2State(obs, actions, t + 1, J)
 71 |             
 72 |             a = actions[t]
 73 |             u = utilities[t]
 74 |             result.append([s, a, ss, u])
 75 |         return result
 76 |     r = rep_seeds(MDP2Trans_one_traj, len(MDPs) - 1)
 77 |     if combined:
 78 |         return flatten(r) # put every patient together; not into a metrix
 79 |     else:
 80 |         return r
 81 | 
 82 | ##########################################################################
 83 | """ Fitted Q
 84 | 1. fit (x,a) -> q(x,a)
 85 | 2. update q(a,x) = max_{a'}(q(a',x')) + gamma * r  # (x',r) is observed
 86 | """
 87 | ##########################################################################
 88 | # %% Main functions for Fitted-Q
 89 | def NFQ(PatternSets, gamma, RF_paras = [3,20], n_trees = 200, threshold = 1e-5, initialize = "mine"):
 90 |     """ Learn optimal Q function from batch data (RF + fitted-Q)
 91 |     Input: a list of (s,a,s',u)
 92 |     Output: Q function
 93 |     """
 94 |     rseed(0); npseed(0)
 95 |     ### Preparing training data
 96 |     s, a, ss, r = [np.array([a[i] for a in PatternSets]) for i in range(4)]
 97 |     a = np.array([a[1] for a in PatternSets]).reshape((-1, 1))
 98 |     range_a = np.unique(a)
 99 |     x_train = np.hstack((s, a))
100 |     
101 |     ### Initialization
102 |     init_y = r * (1 / (1 - gamma)) # based on the series result
103 |     is_CV = False
104 |     if RF_paras == "CV": 
105 |         rseed(0); npseed(0)
106 |         is_CV = True
107 |         rfqr = RF(random_state = 0, n_estimators = n_trees)
108 |         gd = GridSearchCV(estimator = rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0)
109 |         gd.fit(x_train, init_y.ravel())
110 |         RF_paras = gd.best_params_
111 |         RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']]
112 |     
113 |     rseed(0); npseed(0)
114 |     max_depth, min_samples_leaf = RF_paras
115 |     Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf =   
116 |            min_samples_leaf, n_jobs = n_jobs, 
117 |            verbose = 0) 
118 |     Q.fit(x_train, init_y.ravel())
119 |     
120 |     ### Iterations
121 |     y_old = init_y.copy()
122 |     # update the estimated Q
123 |     rep, epsilon = 0, 100
124 |     while(epsilon > threshold and rep < 100): # 200 before
125 |         rseed(0); npseed(0)
126 |         y_train = UpdatedValues(ss, range_a, r, Q, gamma)
127 |         epsilon = change_rate( y_old = y_old, y_new = y_train)
128 |         Q.fit(x_train, y_train.ravel()) 
129 |         y_old = y_train.copy()
130 |         rep += 1
131 |     return Q
132 | 
133 | 
134 | def UpdatedValues(ss, range_a, r, Q, gamma):
135 |     """ Update the estimated optimal v(s,a) with the fitted Q function
136 |     Input: 
137 |         PatternSets = a list of (s,a,s',r), Q
138 |         ss0, ss1: (s', 0), (s', 1) --- just for lasy
139 |         r: observed rewards
140 |         Q: for values at next states
141 |     Output: ((s,a),v), where v = r + gamma * max_a' Q(s',a'); 0/1 action in this example.
142 |     """
143 |     v_as = []
144 |     N = ss.shape[0]
145 |     for a in range_a:
146 |         ss_a = np.hstack((ss, np.ones((N, 1)) * a ))
147 |         v_a = Q.predict(ss_a)
148 |         v_as.append(v_a.reshape(N, 1))
149 |     v_max = np.amax(np.hstack(v_as), 1)
150 |     Q_new = r.reshape(N, 1) + gamma * v_max.reshape(N, 1)
151 |     return Q_new
152 | 
153 | 
154 | def Estpolicy(Q_func, range_a):
155 |     """ Q function to Policy
156 |     Input:
157 |         Q-function and the range of available actions
158 |     Output:
159 |         The optimal action policy  (discrete) at this state [given a state, output an action]
160 |     """
161 |     def policy(s, debug = 0): 
162 |         """
163 |         Input: s [N * dx]
164 |         Output: actions [N * 1]
165 |         """   
166 |         rseed(0); npseed(0)
167 |         N  = s.shape[0]
168 |         v_as = []
169 |         for a in range_a:
170 |             s_a = np.hstack([s,np.repeat(a, N).reshape(-1,1)])
171 |             v_a = Q_func.predict(s_a)
172 |             v_as.append(v_a.reshape(-1, 1))
173 |         v_as = np.round(np.hstack(v_as), 4)
174 |         actions = np.array([range_a[i] for i in np.argmax(v_as, 1)]).reshape(-1, 1)
175 |         if debug == 1:
176 |             print(v_as - v_as[:,1].reshape(-1,1), DASH, actions)
177 |         return actions
178 | 
179 |     return policy
180 | 
181 | ##########################################################################
182 | ##########################################################################
183 | def UpdatedValues_eval(ss, policy, J, r, Q, gamma):
184 |     """ Version of 1-step forward in Evaluations
185 |     """
186 |     dx = ss.shape[1]
187 |     sss = ss[:,(dx - (4 * J - 1)):dx]
188 |     As = policy(sss)
189 |     sa = np.hstack([ss,As])
190 |     return gamma * Q.predict(sa).reshape(-1,1) + r.reshape(-1,1)
191 | 
192 | def FQE(PatternSets, Q_func, J, gamma = 0.9, RF_paras = [3, 20], n_trees = 200, 
193 |                          threshold = 1e-4):
194 |     """ 
195 |     Fitted-Q Evaluation for off-policy evaluation (OPE) in REAL DATA
196 |         
197 |         1. fit RF q: (x,a) -> value
198 |         2. update the value function of policy:
199 |             q_policy(x, a) = gamma * q(x', policy(x'[, (dx - J): dx])) + r
200 |             
201 |     3. q_policy(x, x[, (dx - J): dx])
202 |     
203 |     Input: 
204 |         PatternSets: a list of (s, a, s', u) [have been transformed]
205 |         
206 |     Output: V function
207 | 
208 |     """
209 |     rseed(0); npseed(0)
210 |     
211 |     # Preparing training data
212 |     s_bef, a_bef, ss_bef, r_bef = [np.array([a[i] for a in PatternSets]) for i in range(4)]
213 |     a_bef = a_bef.reshape(-1, 1)
214 |     range_a = np.unique(a_bef)
215 |     
216 |     policy = Estpolicy(Q_func, range_a)
217 |     time = now()
218 |     
219 |     dx = s_bef.shape[1]    
220 |     s1 = s_bef[:,(dx - (4 * J - 1)):dx].copy()
221 |     As = policy(s1)
222 |     selected = (As == a_bef)
223 | 
224 |     s2, a2, ss2, r2 = [], [], [], []
225 |     for i in range(s_bef.shape[0]):
226 |         if selected[i, 0]:
227 |             s2.append(s_bef[i,])
228 |             a2.append(a_bef[i,])
229 |             ss2.append(ss_bef[i,])
230 |             r2.append(r_bef[i,])
231 |     s, a, ss, r = np.vstack(s2).copy(), np.vstack(a2).copy(), np.vstack(ss2).copy(), np.vstack(r2).copy()
232 |     
233 |     
234 |     ### Initialization
235 |     x_train = np.hstack((s, a))
236 |     init_y = r * (1 / (1 - gamma))
237 |     if RF_paras == "CV":
238 |         rseed(0); npseed(0)
239 |         rfqr = RF(random_state = 0, n_estimators = n_trees)
240 |         gd = GridSearchCV(estimator=rfqr, param_grid = param_grid, cv = 3, n_jobs = n_jobs, verbose=0)
241 |         gd.fit(x_train, init_y.ravel())
242 |         RF_paras = gd.best_params_
243 |         RF_paras = [RF_paras['max_depth'], RF_paras['min_samples_leaf']]
244 |             
245 |     max_depth, min_samples_leaf = RF_paras
246 |     rseed(0); npseed(0)
247 |     Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf =   
248 |            min_samples_leaf, n_jobs = n_jobs, verbose = 0) 
249 |     Q.fit(x_train, init_y.ravel())
250 | 
251 |     y_old = init_y.copy()
252 |     # evaluate the policy policy
253 |     rep, epsilon = 0, 100
254 |     while(epsilon > threshold and rep < 100):
255 |         rseed(0); npseed(0)
256 |         y_train = UpdatedValues_eval(ss, policy, J, r, Q, gamma) # too slow [?]
257 |         y_train = np.round(y_train, 6)
258 |         epsilon = change_rate( y_old = y_old, y_new = y_train)
259 |         Q = RF(max_depth = max_depth, random_state = 0, n_estimators = n_trees, min_samples_leaf =   
260 |            min_samples_leaf, n_jobs = n_jobs, verbose = 0)
261 |         Q.fit(x_train, y_train.ravel())  # regression function: (s,a) -> v
262 | 
263 |         y_old = y_train.copy()
264 |         rep += 1
265 |             
266 |     def V_func(s):
267 |         dx = s.shape[1]
268 |         a = policy(s[:,(dx - (4 * J - 1)):dx]).reshape(-1,1)
269 |         return Q.predict(np.hstack([s,a]))
270 |     
271 |     return V_func
272 | 
273 | 
274 | 
275 | 


--------------------------------------------------------------------------------
/test_func/_QRF.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | This file is for the random forest-based method used in the paper "Does MDP Fit the Data?" to estimate conditional characteristic functions. 
  5 | The majority of functions in this file were adapted from the source code of the paper "Quantile Regression Forest" on Github.
  6 |     Date: 10/12/2019.
  7 |     URL:https://github.com/scikit-garden/scikit-garden/tree/master/skgarden
  8 | """
  9 | ##########################################################################
 10 | from  _uti_basic import *
 11 | ##########################################################################
 12 | import warnings
 13 | warnings.filterwarnings('ignore')
 14 | from numpy.random import seed as rseed 
 15 | from numpy.random import randn # randn(d1,d2) is d1*d2 i.i.d N(0,1)
 16 | import numpy as np
 17 | from numpy import ma
 18 | from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor
 19 | from sklearn.ensemble.forest import ForestRegressor
 20 | from sklearn.utils import check_array, check_random_state, check_X_y
 21 | from sklearn.tree.tree import BaseDecisionTree
 22 | from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
 23 | import time
 24 | now = time.time
 25 | ##########################################################################
 26 | 
 27 | def weighted_est(y,uv,cos_sin,weights=None):
 28 |     """
 29 |     # weights: array-like, shape=(n_samples,)
 30 |     #    weights[i] is the weight given to point a[i] while computing the
 31 |     #    quantile. If weights[i] is zero, a[i] is simply ignored during the
 32 |     #    percentile computation.
 33 |     
 34 |     Parameters
 35 |     ----------
 36 |     # uv: assume is B * d_
 37 |     
 38 |     Returns
 39 |     -------
 40 |     B * 1, for a given T
 41 |     """
 42 |     if weights is None:
 43 |         return np.mean(cos_sin(y.dot(uv)),axis = 0)
 44 |     return weights.T.dot(cos_sin(y.dot(uv))) # v.T
 45 | 
 46 | def generate_sample_indices(random_state, n_samples):
 47 |     """
 48 |     [Just copied and pasted]
 49 |     Generates bootstrap indices for each tree fit.
 50 | 
 51 |     Parameters
 52 |     ----------
 53 |     random_state: int, RandomState instance or None
 54 |         If int, random_state is the seed used by the random number generator.
 55 |         If RandomState instance, random_state is the random number generator.
 56 |         If None, the random number generator is the RandomState instance used
 57 |         by np.random.
 58 | 
 59 |     n_samples: int
 60 |         Number of samples to generate from each tree.
 61 | 
 62 |     Returns
 63 |     -------
 64 |     sample_indices: array-like, shape=(n_samples), dtype=np.int32
 65 |         Sample indices.
 66 |     """
 67 |     random_instance = check_random_state(random_state)
 68 |     sample_indices = random_instance.randint(0, n_samples, n_samples)
 69 |     return sample_indices
 70 | ##########################################################################   
 71 | # QRF <- QBF,QDT
 72 | 
 73 | class BaseForestQuantileRegressor(ForestRegressor):
 74 |     def fit(self, X, y):
 75 |         """
 76 |         Build a forest from the training set (X, y).
 77 | 
 78 |         Parameters
 79 |         ----------
 80 |         X : array-like or sparse matrix, shape = [n_samples, n_features]
 81 |             The training input samples. Internally, it will be converted to
 82 |             ``dtype=np.float32`` and if a sparse matrix is provided
 83 |             to a sparse ``csc_matrix``.
 84 | 
 85 |         y : array-like, shape = [n_samples] or [n_samples, n_outputs]
 86 |             The target values (class labels) as integers or strings.
 87 |         Returns
 88 |         -------
 89 |         self : object
 90 |             Returns self.
 91 |         """
 92 |         # apply method requires X to be of dtype np.float32
 93 |         X, y = check_X_y(
 94 |             X, y, accept_sparse="csc", dtype=np.float32, multi_output=1)
 95 |         super(BaseForestQuantileRegressor, self).fit(X, y)
 96 | 
 97 |         self.y_train_ = y
 98 |         self.y_train_leaves_ = -np.ones((self.n_estimators, len(y)), dtype=np.int32)
 99 |         self.y_weights_ = np.zeros_like((self.y_train_leaves_), dtype=np.float32)
100 | 
101 |         for i, est in enumerate(self.estimators_):
102 |             bootstrap_indices = generate_sample_indices(est.random_state, len(y))
103 |             est_weights = np.bincount(bootstrap_indices, minlength=len(y))
104 |             y_train_leaves = est.y_train_leaves_
105 |             for curr_leaf in np.unique(y_train_leaves):
106 |                 y_ind = y_train_leaves == curr_leaf
107 |                 self.y_weights_[i, y_ind] = (
108 |                     est_weights[y_ind] / np.sum(est_weights[y_ind]))
109 | 
110 |             self.y_train_leaves_[i, bootstrap_indices] = y_train_leaves[bootstrap_indices]
111 |         return self
112 |     
113 |     def predict(self, X, uv=0): # , cos_sin
114 |         """
115 |         Predict cond. char. values for either forward or backward
116 | 
117 |         Parameters
118 |         ----------
119 |         X : array-like or sparse matrix of shape = [n_samples, n_features]
120 |         uv: [B,dim_y]. can be either u or v
121 |         Returns
122 |         -------
123 |         char_est : array of shape = [n_samples,B]
124 |         """
125 |         # apply method requires X to be of dtype np.float32
126 |         X = check_array(X, dtype=np.float32, accept_sparse="csc") # around N * T
127 |         X_leaves = self.apply(X) # (n_test = N * T, n_tree) 
128 |         weights = np.zeros((X.shape[0], len(self.y_train_)))# N_test * N_train
129 |         begin = now()
130 |         a = now()
131 |         mask_time = 0
132 |         sum_time= 0
133 |         
134 | 
135 |         for i, x_leaf in enumerate(X_leaves): # n_test
136 |             mask = (self.y_train_leaves_ != np.expand_dims(x_leaf, 1))
137 |             x_weights = ma.masked_array(self.y_weights_, mask)# n_tree * n_train. for each n_test
138 |             b = now()
139 |             mask_time += b - a
140 |             weights[i,:] = x_weights.sum(axis = 0)
141 |             a = now()
142 |             sum_time += a - b       
143 |         # print("prediction iteration:", now()- begin, " with mask:", mask_time, "sum:", sum_time)
144 |         if uv is 0: # debug. E(X_t|X_t-1). for CV too.
145 |             return weights.dot(self.y_train_) / np.sum(weights,axis=1)[:,None]
146 |         else:
147 |             char_est_cos = weights.dot(np.cos(self.y_train_.dot(uv.T))) / np.sum(weights,axis=1)[:,None]
148 |             char_est_sin = weights.dot(np.sin(self.y_train_.dot(uv.T))) / np.sum(weights,axis=1)[:,None]
149 |         return char_est_cos, char_est_sin
150 |  
151 | class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
152 |     """
153 |     Based on BaseForestQuantileRegressor. What is the purpose?
154 |     
155 |     The sub-sample size is always the same as the original
156 |     input sample size but the samples are drawn with replacement if
157 |     `bootstrap=True` (default).
158 | 
159 |     """
160 |     def __init__(self,
161 |                  n_estimators=10,
162 |                  criterion='mse',
163 |                  max_depth=None,
164 |                  min_samples_split=2,
165 |                  min_samples_leaf=1,
166 |                  min_weight_fraction_leaf=0.0,
167 |                  max_features='auto',
168 |                  max_leaf_nodes=None,
169 |                  bootstrap=True,
170 |                  oob_score=False,
171 |                  n_jobs=1,
172 |                  random_state=None,
173 |                  verbose=0,
174 |                  warm_start=False):
175 |         super(RandomForestQuantileRegressor, self).__init__(
176 |             base_estimator=DecisionTreeQuantileRegressor(),
177 |             n_estimators=n_estimators,
178 |             estimator_params=("criterion", "max_depth", "min_samples_split",
179 |                               "min_samples_leaf", "min_weight_fraction_leaf",
180 |                               "max_features", "max_leaf_nodes",
181 |                               "random_state"),
182 |             bootstrap=bootstrap,
183 |             oob_score=oob_score,
184 |             n_jobs=n_jobs,
185 |             random_state=random_state,
186 |             verbose=verbose,
187 |             warm_start=warm_start)
188 | 
189 |         self.criterion = criterion
190 |         self.max_depth = max_depth
191 |         self.min_samples_split = min_samples_split
192 |         self.min_samples_leaf = min_samples_leaf
193 |         self.min_weight_fraction_leaf = min_weight_fraction_leaf
194 |         self.max_features = max_features
195 |         self.max_leaf_nodes = max_leaf_nodes
196 | 
197 | class BaseTreeQuantileRegressor(BaseDecisionTree):
198 |     def fit(self, X, y, sample_weight=None, check_input=True,
199 |             X_idx_sorted=None):
200 |         """
201 |         Child of BaseDecisionTree (sklearn), which use a single DecisionTree to do the same kind of Quantile things.
202 | 
203 |         Parameters
204 |         ----------
205 |         X : array-like or sparse matrix, shape = [n_samples, n_features]
206 |             The training input samples. Internally, it will be converted to
207 |             ``dtype=np.float32`` and if a sparse matrix is provided
208 |             to a sparse ``csc_matrix``.
209 | 
210 |         y : array-like, shape = [n_samples] or [n_samples, n_outputs]
211 |             The target values (class labels) as integers or strings.
212 | 
213 |         sample_weight : array-like, shape = [n_samples] or None
214 |             Sample weights. If None, then samples are equally weighted. Splits
215 |             that would create child nodes with net zero or negative weight are
216 |             ignored while searching for a split in each node. Splits are also
217 |             ignored if they would result in any single class carrying a
218 |             negative weight in either child node.
219 | 
220 |         check_input : boolean, (default=True)
221 |             Allow to bypass several input checking.
222 |             Don't use this parameter unless you know what you do.
223 | 
224 | 
225 |         Returns
226 |         -------
227 |         self : object
228 |             Returns self.
229 |         """
230 |         # y passed from a forest is 2-D. This is to silence the
231 |         # annoying data-conversion warnings.
232 |         y = np.asarray(y)
233 |         if np.ndim(y) == 2 and y.shape[1] == 1:
234 |             y = np.ravel(y)
235 | 
236 |         # apply method requires X to be of dtype np.float32
237 |         X, y = check_X_y(
238 |             X, y, accept_sparse="csc", dtype=np.float32, multi_output=1)
239 |         super(BaseTreeQuantileRegressor, self).fit(
240 |             X, y, sample_weight=sample_weight, check_input=check_input,
241 |             X_idx_sorted=X_idx_sorted)
242 |         self.y_train_ = y
243 | 
244 |         # Stores the leaf nodes that the samples lie in.
245 |         self.y_train_leaves_ = self.tree_.apply(X)
246 |         return self
247 |     
248 |     def predict(self, X,u, check_input=False): # ,cos_sin
249 |         """
250 |         Predict regression value for X.
251 | 
252 |         Parameters
253 |         ----------
254 |         X : array-like or sparse matrix of shape = [n_samples, n_features]
255 |             The input samples. Internally, it will be converted to
256 |             ``dtype=np.float32`` and if a sparse matrix is provided
257 |             to a sparse ``csr_matrix``.
258 | 
259 |         quantile : int, optional
260 |             Value ranging from 0 to 100. By default, the mean is returned.
261 | 
262 |         check_input : boolean, (default=True)
263 |             Allow to bypass several input checking.
264 |             Don't use this parameter unless you know what you do.
265 | 
266 |         Returns
267 |         -------
268 |         y : array of shape = [n_samples]
269 |             If quantile is set to None, then return E(Y | X). Else return
270 |             y such that F(Y=y | x) = quantile.
271 |         """
272 |         # apply method requires X to be of dtype np.float32
273 |         X = check_array(X, dtype=np.float32, accept_sparse="csc")
274 |         if quantile is None:
275 |             return super(BaseTreeQuantileRegressor, self).predict(X, check_input=check_input)
276 | 
277 |         B = u.shape[0]
278 |         r_cos, r_sin = np.zeros((X.shape[0],B)), np.zeros((X.shape[0],B))
279 |         X_leaves = self.apply(X)
280 |         unique_leaves = np.unique(X_leaves)
281 | 
282 |         for leaf in unique_leaves:
283 |             # for those X_test in that leaf, we use training in that leaf to cal the quantiles.
284 |             y = self.y_train_[self.y_train_leaves_ == leaf]
285 |             r_cos[X_leaves == leaf,:] = np.mean(np.cos(y.dot(uv.T)),axis = 0)
286 |             r_sin[X_leaves == leaf,:] = np.mean(np.sin(y.dot(uv.T)),axis = 0)
287 |         return r_cos, r_sin
288 | 
289 | class DecisionTreeQuantileRegressor(DecisionTreeRegressor, BaseTreeQuantileRegressor):
290 |     """
291 |     Just combine QBT and DecisionTreeRegressor, and provide _init_
292 |     
293 |     A decision tree regressor that provides quantile estimates.
294 |     """
295 |     def __init__(self,
296 |                  criterion="mse",
297 |                  splitter="best",
298 |                  max_depth=None,
299 |                  min_samples_split=2,
300 |                  min_samples_leaf=1,
301 |                  min_weight_fraction_leaf=0.,
302 |                  max_features=None,
303 |                  random_state=None,
304 |                  max_leaf_nodes=None):
305 |         super(DecisionTreeQuantileRegressor, self).__init__(
306 |             criterion=criterion,
307 |             splitter=splitter,
308 |             max_depth=max_depth,
309 |             min_samples_split=min_samples_split,
310 |             min_samples_leaf=min_samples_leaf,
311 |             min_weight_fraction_leaf=min_weight_fraction_leaf,
312 |             max_features=max_features,
313 |             max_leaf_nodes=max_leaf_nodes,
314 |             random_state=random_state)
315 | 


--------------------------------------------------------------------------------
/test_func/.ipynb_checkpoints/_QRF-checkpoint.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | This file is for the random forest-based method used in the paper "Does MDP Fit the Data?" to estimate conditional characteristic functions. 
  5 | The majority of functions in this file were adapted from the source code of the paper "Quantile Regression Forest" on Github.
  6 |     Date: 10/12/2019.
  7 |     URL:https://github.com/scikit-garden/scikit-garden/tree/master/skgarden
  8 | """
  9 | ##########################################################################
 10 | from  _uti_basic import *
 11 | ##########################################################################
 12 | import warnings
 13 | warnings.filterwarnings('ignore')
 14 | from numpy.random import seed as rseed 
 15 | from numpy.random import randn # randn(d1,d2) is d1*d2 i.i.d N(0,1)
 16 | import numpy as np
 17 | from numpy import ma
 18 | from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor
 19 | from sklearn.ensemble.forest import ForestRegressor
 20 | from sklearn.utils import check_array, check_random_state, check_X_y
 21 | from sklearn.tree.tree import BaseDecisionTree
 22 | from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
 23 | import time
 24 | now = time.time
 25 | ##########################################################################
 26 | 
 27 | def weighted_est(y,uv,cos_sin,weights=None):
 28 |     """
 29 |     # weights: array-like, shape=(n_samples,)
 30 |     #    weights[i] is the weight given to point a[i] while computing the
 31 |     #    quantile. If weights[i] is zero, a[i] is simply ignored during the
 32 |     #    percentile computation.
 33 |     
 34 |     Parameters
 35 |     ----------
 36 |     # uv: assume is B * d_
 37 |     
 38 |     Returns
 39 |     -------
 40 |     B * 1, for a given T
 41 |     """
 42 |     if weights is None:
 43 |         return np.mean(cos_sin(y.dot(uv)),axis = 0)
 44 |     return weights.T.dot(cos_sin(y.dot(uv))) # v.T
 45 | 
 46 | def generate_sample_indices(random_state, n_samples):
 47 |     """
 48 |     [Just copied and pasted]
 49 |     Generates bootstrap indices for each tree fit.
 50 | 
 51 |     Parameters
 52 |     ----------
 53 |     random_state: int, RandomState instance or None
 54 |         If int, random_state is the seed used by the random number generator.
 55 |         If RandomState instance, random_state is the random number generator.
 56 |         If None, the random number generator is the RandomState instance used
 57 |         by np.random.
 58 | 
 59 |     n_samples: int
 60 |         Number of samples to generate from each tree.
 61 | 
 62 |     Returns
 63 |     -------
 64 |     sample_indices: array-like, shape=(n_samples), dtype=np.int32
 65 |         Sample indices.
 66 |     """
 67 |     random_instance = check_random_state(random_state)
 68 |     sample_indices = random_instance.randint(0, n_samples, n_samples)
 69 |     return sample_indices
 70 | ##########################################################################   
 71 | # QRF <- QBF,QDT
 72 | 
 73 | class BaseForestQuantileRegressor(ForestRegressor):
 74 |     def fit(self, X, y):
 75 |         """
 76 |         Build a forest from the training set (X, y).
 77 | 
 78 |         Parameters
 79 |         ----------
 80 |         X : array-like or sparse matrix, shape = [n_samples, n_features]
 81 |             The training input samples. Internally, it will be converted to
 82 |             ``dtype=np.float32`` and if a sparse matrix is provided
 83 |             to a sparse ``csc_matrix``.
 84 | 
 85 |         y : array-like, shape = [n_samples] or [n_samples, n_outputs]
 86 |             The target values (class labels) as integers or strings.
 87 |         Returns
 88 |         -------
 89 |         self : object
 90 |             Returns self.
 91 |         """
 92 |         # apply method requires X to be of dtype np.float32
 93 |         X, y = check_X_y(
 94 |             X, y, accept_sparse="csc", dtype=np.float32, multi_output=1)
 95 |         super(BaseForestQuantileRegressor, self).fit(X, y)
 96 | 
 97 |         self.y_train_ = y
 98 |         self.y_train_leaves_ = -np.ones((self.n_estimators, len(y)), dtype=np.int32)
 99 |         self.y_weights_ = np.zeros_like((self.y_train_leaves_), dtype=np.float32)
100 | 
101 |         for i, est in enumerate(self.estimators_):
102 |             bootstrap_indices = generate_sample_indices(est.random_state, len(y))
103 |             est_weights = np.bincount(bootstrap_indices, minlength=len(y))
104 |             y_train_leaves = est.y_train_leaves_
105 |             for curr_leaf in np.unique(y_train_leaves):
106 |                 y_ind = y_train_leaves == curr_leaf
107 |                 self.y_weights_[i, y_ind] = (
108 |                     est_weights[y_ind] / np.sum(est_weights[y_ind]))
109 | 
110 |             self.y_train_leaves_[i, bootstrap_indices] = y_train_leaves[bootstrap_indices]
111 |         return self
112 |     
113 |     def predict(self, X, uv=0): # , cos_sin
114 |         """
115 |         Predict cond. char. values for either forward or backward
116 | 
117 |         Parameters
118 |         ----------
119 |         X : array-like or sparse matrix of shape = [n_samples, n_features]
120 |         uv: [B,dim_y]. can be either u or v
121 |         Returns
122 |         -------
123 |         char_est : array of shape = [n_samples,B]
124 |         """
125 |         # apply method requires X to be of dtype np.float32
126 |         X = check_array(X, dtype=np.float32, accept_sparse="csc") # around N * T
127 |         X_leaves = self.apply(X) # (n_test = N * T, n_tree) 
128 |         weights = np.zeros((X.shape[0], len(self.y_train_)))# N_test * N_train
129 |         begin = now()
130 |         a = now()
131 |         mask_time = 0
132 |         sum_time= 0
133 |         
134 | 
135 |         for i, x_leaf in enumerate(X_leaves): # n_test
136 |             mask = (self.y_train_leaves_ != np.expand_dims(x_leaf, 1))
137 |             x_weights = ma.masked_array(self.y_weights_, mask)# n_tree * n_train. for each n_test
138 |             b = now()
139 |             mask_time += b - a
140 |             weights[i,:] = x_weights.sum(axis = 0)
141 |             a = now()
142 |             sum_time += a - b       
143 |         # print("prediction iteration:", now()- begin, " with mask:", mask_time, "sum:", sum_time)
144 |         if uv is 0: # debug. E(X_t|X_t-1). for CV too.
145 |             return weights.dot(self.y_train_) / np.sum(weights,axis=1)[:,None]
146 |         else:
147 |             char_est_cos = weights.dot(np.cos(self.y_train_.dot(uv.T))) / np.sum(weights,axis=1)[:,None]
148 |             char_est_sin = weights.dot(np.sin(self.y_train_.dot(uv.T))) / np.sum(weights,axis=1)[:,None]
149 |         return char_est_cos, char_est_sin
150 |  
151 | class RandomForestQuantileRegressor(BaseForestQuantileRegressor):
152 |     """
153 |     Based on BaseForestQuantileRegressor. What is the purpose?
154 |     
155 |     The sub-sample size is always the same as the original
156 |     input sample size but the samples are drawn with replacement if
157 |     `bootstrap=True` (default).
158 | 
159 |     """
160 |     def __init__(self,
161 |                  n_estimators=10,
162 |                  criterion='mse',
163 |                  max_depth=None,
164 |                  min_samples_split=2,
165 |                  min_samples_leaf=1,
166 |                  min_weight_fraction_leaf=0.0,
167 |                  max_features='auto',
168 |                  max_leaf_nodes=None,
169 |                  bootstrap=True,
170 |                  oob_score=False,
171 |                  n_jobs=1,
172 |                  random_state=None,
173 |                  verbose=0,
174 |                  warm_start=False):
175 |         super(RandomForestQuantileRegressor, self).__init__(
176 |             base_estimator=DecisionTreeQuantileRegressor(),
177 |             n_estimators=n_estimators,
178 |             estimator_params=("criterion", "max_depth", "min_samples_split",
179 |                               "min_samples_leaf", "min_weight_fraction_leaf",
180 |                               "max_features", "max_leaf_nodes",
181 |                               "random_state"),
182 |             bootstrap=bootstrap,
183 |             oob_score=oob_score,
184 |             n_jobs=n_jobs,
185 |             random_state=random_state,
186 |             verbose=verbose,
187 |             warm_start=warm_start)
188 | 
189 |         self.criterion = criterion
190 |         self.max_depth = max_depth
191 |         self.min_samples_split = min_samples_split
192 |         self.min_samples_leaf = min_samples_leaf
193 |         self.min_weight_fraction_leaf = min_weight_fraction_leaf
194 |         self.max_features = max_features
195 |         self.max_leaf_nodes = max_leaf_nodes
196 | 
197 | class BaseTreeQuantileRegressor(BaseDecisionTree):
198 |     def fit(self, X, y, sample_weight=None, check_input=True,
199 |             X_idx_sorted=None):
200 |         """
201 |         Child of BaseDecisionTree (sklearn), which use a single DecisionTree to do the same kind of Quantile things.
202 | 
203 |         Parameters
204 |         ----------
205 |         X : array-like or sparse matrix, shape = [n_samples, n_features]
206 |             The training input samples. Internally, it will be converted to
207 |             ``dtype=np.float32`` and if a sparse matrix is provided
208 |             to a sparse ``csc_matrix``.
209 | 
210 |         y : array-like, shape = [n_samples] or [n_samples, n_outputs]
211 |             The target values (class labels) as integers or strings.
212 | 
213 |         sample_weight : array-like, shape = [n_samples] or None
214 |             Sample weights. If None, then samples are equally weighted. Splits
215 |             that would create child nodes with net zero or negative weight are
216 |             ignored while searching for a split in each node. Splits are also
217 |             ignored if they would result in any single class carrying a
218 |             negative weight in either child node.
219 | 
220 |         check_input : boolean, (default=True)
221 |             Allow to bypass several input checking.
222 |             Don't use this parameter unless you know what you do.
223 | 
224 | 
225 |         Returns
226 |         -------
227 |         self : object
228 |             Returns self.
229 |         """
230 |         # y passed from a forest is 2-D. This is to silence the
231 |         # annoying data-conversion warnings.
232 |         y = np.asarray(y)
233 |         if np.ndim(y) == 2 and y.shape[1] == 1:
234 |             y = np.ravel(y)
235 | 
236 |         # apply method requires X to be of dtype np.float32
237 |         X, y = check_X_y(
238 |             X, y, accept_sparse="csc", dtype=np.float32, multi_output=1)
239 |         super(BaseTreeQuantileRegressor, self).fit(
240 |             X, y, sample_weight=sample_weight, check_input=check_input,
241 |             X_idx_sorted=X_idx_sorted)
242 |         self.y_train_ = y
243 | 
244 |         # Stores the leaf nodes that the samples lie in.
245 |         self.y_train_leaves_ = self.tree_.apply(X)
246 |         return self
247 |     
248 |     def predict(self, X,u, check_input=False): # ,cos_sin
249 |         """
250 |         Predict regression value for X.
251 | 
252 |         Parameters
253 |         ----------
254 |         X : array-like or sparse matrix of shape = [n_samples, n_features]
255 |             The input samples. Internally, it will be converted to
256 |             ``dtype=np.float32`` and if a sparse matrix is provided
257 |             to a sparse ``csr_matrix``.
258 | 
259 |         quantile : int, optional
260 |             Value ranging from 0 to 100. By default, the mean is returned.
261 | 
262 |         check_input : boolean, (default=True)
263 |             Allow to bypass several input checking.
264 |             Don't use this parameter unless you know what you do.
265 | 
266 |         Returns
267 |         -------
268 |         y : array of shape = [n_samples]
269 |             If quantile is set to None, then return E(Y | X). Else return
270 |             y such that F(Y=y | x) = quantile.
271 |         """
272 |         # apply method requires X to be of dtype np.float32
273 |         X = check_array(X, dtype=np.float32, accept_sparse="csc")
274 |         if quantile is None:
275 |             return super(BaseTreeQuantileRegressor, self).predict(X, check_input=check_input)
276 | 
277 |         B = u.shape[0]
278 |         r_cos, r_sin = np.zeros((X.shape[0],B)), np.zeros((X.shape[0],B))
279 |         X_leaves = self.apply(X)
280 |         unique_leaves = np.unique(X_leaves)
281 | 
282 |         for leaf in unique_leaves:
283 |             # for those X_test in that leaf, we use training in that leaf to cal the quantiles.
284 |             y = self.y_train_[self.y_train_leaves_ == leaf]
285 |             r_cos[X_leaves == leaf,:] = np.mean(np.cos(y.dot(uv.T)),axis = 0)
286 |             r_sin[X_leaves == leaf,:] = np.mean(np.sin(y.dot(uv.T)),axis = 0)
287 |         return r_cos, r_sin
288 | 
289 | class DecisionTreeQuantileRegressor(DecisionTreeRegressor, BaseTreeQuantileRegressor):
290 |     """
291 |     Just combine QBT and DecisionTreeRegressor, and provide _init_
292 |     
293 |     A decision tree regressor that provides quantile estimates.
294 |     """
295 |     def __init__(self,
296 |                  criterion="mse",
297 |                  splitter="best",
298 |                  max_depth=None,
299 |                  min_samples_split=2,
300 |                  min_samples_leaf=1,
301 |                  min_weight_fraction_leaf=0.,
302 |                  max_features=None,
303 |                  random_state=None,
304 |                  max_leaf_nodes=None):
305 |         super(DecisionTreeQuantileRegressor, self).__init__(
306 |             criterion=criterion,
307 |             splitter=splitter,
308 |             max_depth=max_depth,
309 |             min_samples_split=min_samples_split,
310 |             min_samples_leaf=min_samples_leaf,
311 |             min_weight_fraction_leaf=min_weight_fraction_leaf,
312 |             max_features=max_features,
313 |             max_leaf_nodes=max_leaf_nodes,
314 |             random_state=random_state)
315 | 


--------------------------------------------------------------------------------
/test_func/_core_test_fun.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Main functions for the test proposed in the paper "Does MDP Fit the Data?". Refer to the Algorithm 1 and 2 therein.
  5 | """
  6 | 
  7 | ##########################################################################
  8 | #%% 
  9 | from _QRF import *
 10 | from _uti_basic import *
 11 | from _utility import *
 12 | ##########################################################################
 13 | # %%
 14 | n_jobs = multiprocessing.cpu_count()
 15 | param_grid = {'max_depth': [2, 4, 6, 8], 'min_samples_leaf': [5, 10, 20]}
 16 | 
 17 | ##########################################################################
 18 | #%% Algorithm 1
 19 | def test(data, J = 1,
 20 |         B = 200, Q = 10, L = 3, 
 21 |         paras="CV", n_trees = 200, 
 22 |         print_time = False,
 23 |         include_reward = False, fixed_state_comp = None, 
 24 |         method = "QRF"):
 25 |     """
 26 |     The main test function
 27 | 
 28 |     Parameters
 29 |     ----------
 30 |     data: the observed trajectories. A len-N list of [X, A], where X and A are T * dim arrays.
 31 |     J: the null hyphothesis that the MDP is lag-J. Donoted as k in the paper
 32 |     B, Q: required  hyperparameters; The definition of Q is slightly different with the paper. Q_here = Q_paper + 2
 33 |     paras: the parameters [max_depth, min_samples_leaf] used in the random forests.
 34 |     n_trees: the number of trees used in the random forests
 35 |     print_time: whether or not to print out the time cost for each part
 36 |     include_reward: whether or not to include the R_t as part of X_t for our testing
 37 |     fixed_state_comp: to resolve the duplicate S problem in the TIGER
 38 |     method: the estimators used for the conditional characteristic function estimation.
 39 |     
 40 |     Returns
 41 |     -------
 42 |     p-values
 43 |     """
 44 |     N = len(data)
 45 |     data = normalize(data.copy())
 46 |     T = data[0][0].shape[0]
 47 |     a = now()
 48 |     lam = lam_est(data = data, J = J, B = B, Q = Q, paras = paras, n_trees = n_trees, 
 49 |                   include_reward = include_reward, L = L, 
 50 |                   fixed_state_comp = fixed_state_comp, method = method)
 51 |     r, pValues = [], []
 52 |     Sigma_q_s = Sigma_q(lam)  # a list (len = Q-1) 2B * 2B.
 53 |     if print_time:
 54 |         print("RF:", now() - a)
 55 |     a = now()
 56 |     S = S_hat(lam = lam, dims = [N, T], J = J)  # Construct the test statistics
 57 |     pValues = bootstrap_p_value(Sigma_q_s, rep_times = int(1e3), test_stat = S)  # Construct the resampling-based c.v.
 58 |     if print_time:
 59 |         print("Bootstrap:", now() - a)
 60 |     return pValues
 61 | 
 62 | #%% Algorithm 2
 63 | 
 64 | 
 65 | 
 66 | def selectOrder(data, B = 100, Q = 10, L = 3, alpha = 0.01, K = 10, paras="CV", n_trees = 200, 
 67 |                         print_time = False,
 68 |                         include_reward = False, fixed_state_comp = None, 
 69 |                         method = "QRF"): 
 70 |     p_values = []
 71 |     for k in range(1, K + 1):
 72 |         p_value = test(data, J = k,
 73 |                         B = B, Q = Q, L = L, 
 74 |                         paras=paras, n_trees = n_trees, 
 75 |                         print_time = print_time,
 76 |                         include_reward = include_reward, fixed_state_comp = fixed_state_comp, 
 77 |                         method = method)
 78 |         p_values.append(p_value)
 79 |         if p_value > alpha:
 80 |             print("Conclude the system is of order:", k)
 81 |             return p_values
 82 |     print("Conclude the system is a POMDP")
 83 |     return p_values
 84 |             
 85 |     
 86 |     
 87 | 
 88 | ##########################################################################
 89 | #%% Getting data. Helper functions
 90 | def get_pairs( data, is_forward, J = 1, as_array = 1, include_reward = 0, fixed_state_comp = None):
 91 |     """
 92 |     get [(x_{t-1},a_{t-1}),x_t] or [(x_t,a_t),(x_{t-1},a_{t-1})] pairs, only for training[can not distinguish patients]
 93 | 
 94 |     forward: indicator
 95 |     as_array: by default, into pred/response array
 96 |     """
 97 |     def get_pairs_one_traj(i, is_forward, J):
 98 |         """
 99 |         do one patient for <get_pairs>, get trainiinig data
100 |         patient = [X,A]
101 |         X = T * d_x, A = T * d_a
102 |         """
103 |         patient = data[i]
104 |         if include_reward:
105 |             X, A, R = patient
106 |         else:
107 |             X, A = patient
108 |         T = X.shape[0]
109 |         r = []
110 |         dx = X.shape[1]
111 |         XA = np.hstack([X, A])
112 |         if include_reward:
113 |             XR = np.hstack([X, R])
114 | 
115 |         for t in range(T - J):
116 |             if is_forward:
117 |                 if include_reward:
118 |                     pair = [
119 |                         XA[t:t + J, :].reshape(1, J * (dx + 1)), XR[t + J, :]]
120 |                 else:
121 |                     pair = [
122 |                         XA[t:t + J, :].reshape(1, J * (dx + 1)), X[t + J, :]]
123 |             else:
124 |                 pair = [XA[(t + 1):(t + J + 1), :].reshape(1,
125 |                                                            J * (dx + 1)), XA[t, :]]
126 |             if fixed_state_comp is not None:
127 |                 true_state = fixed_state_comp[i]
128 |                 # both pred and response have the true_state before.
129 |                 pair = [
130 |                     np.append(
131 |                         true_state, pair[0]), np.append(
132 |                         true_state, pair[1])]
133 |             r.append(pair)
134 |         return r
135 | 
136 |     # get pairs for each patient and put together
137 |     r = flatten([get_pairs_one_traj(i, is_forward, J)
138 |                  for i in range(len(data))])
139 |     if as_array:
140 |         r = [np.vstack([a[0] for a in r]), np.vstack([a[1] for a in r])]
141 |     return r
142 | 
143 | 
144 | def get_test_data(test_data, J=1, fixed_state_comp=None):
145 |     """
146 |     Get testing predictors
147 |     """
148 |     def patient_2_predictors(i, J=1):
149 |         """
150 |         XA: T * (d_x + d_a)
151 |         Return: T * ((d_x + d_a) * J)
152 |         """
153 |         patient = test_data[i]
154 |         XA = np.hstack([patient[0], patient[1]])
155 |         T = XA.shape[0]
156 |         r = XA.copy()
157 |         for j in range(1, J): 
158 |             r = np.hstack([r, roll(XA, -j, 0)])
159 |         if fixed_state_comp is not None:
160 |             true_state = np.repeat(fixed_state_comp[i], T).reshape(T, 1)
161 |             r = np.hstack((true_state, r))
162 |         return r
163 | 
164 |     return np.vstack([patient_2_predictors(i, J)
165 |                       for i in range(len(test_data))])
166 | 
167 | ##########################################################################
168 | # Functions for estimating the CCF and constructing the conditional covariances. Step 2 - 3 of Algorithm 1.
169 | 
170 | # %% Conditional covariance lam construction
171 | def lam_est(data, J, B, Q, L = 3, 
172 |             paras = [3, 20], n_trees = 200, include_reward = 0, fixed_state_comp = None, method = "QRF"):
173 |     """
174 |     construct the pointwise cov lam (for both test stat and c.v.), by combine the two parts (estimated and observed)
175 | 
176 |     Returns
177 |     -------
178 |     lam: (Q-1)-len list of four lam matrices (n * T-q * B)
179 |     """
180 | 
181 |     dx, da = data[0][0].shape[1], data[0][1].shape[1]
182 |     if fixed_state_comp is not None:
183 |         dx += 1
184 | 
185 |     # generate uv
186 |     rseed(0); npseed(0)
187 |     if include_reward:
188 |         uv = [randn(B, dx + 1), randn(B, dx + da)]
189 |     else:
190 |         uv = [randn(B, dx), randn(B, dx + da)]
191 | 
192 |     # estimate characteristic values (cross-fitting): phi_R, psi_R, phi_I,
193 |     # psi_I
194 |     estimated = cond_char_vaule_est(data = data, uv = uv,
195 |             paras = paras, n_trees = n_trees, L = L, 
196 |                                     J = J, 
197 |             include_reward = include_reward, fixed_state_comp = fixed_state_comp, 
198 |                                    method = method)  # ,obs_ys
199 |     if paras == "CV_once":
200 |         CV_paras = estimated
201 |         return CV_paras
202 |     else:
203 |         estimated_cond_char = estimated
204 |         # cos and sin in batch. (n*T*dx) * (dx* B)  = n * T * B:
205 |         # c_X,s_X,c_XA,s_XA
206 |         observed_cond_char = obs_char(data = data, uv = uv, 
207 |             include_reward = include_reward, fixed_state_comp = fixed_state_comp)
208 |         # combine the above two parts to get cond. corr. estimation.
209 |         lam = lam_formula(estimated_cond_char, observed_cond_char, J, Q)
210 |         return lam
211 | 
212 | 
213 | def cond_char_vaule_est(data, uv,
214 |         paras = "CV_once", n_trees = 200, L = 3, 
215 |         J = 1, include_reward = 0, fixed_state_comp = None, method = "QRF"):
216 |     """
217 |     Cross-fitting-type prediction of the cond. char "values"
218 | 
219 |     Returns
220 |     -------
221 |     phi_R, phi_I, psi_R, psi_I values as [n * T * B] tensors.
222 |     """
223 |     T = data[0][0].shape[0]
224 |     n = N = len(data)
225 |     B = uv[0].shape[0]
226 |     dx, dxa = uv[0].shape[1], uv[1].shape[1]
227 |     char_values, obs_ys = [np.zeros([n, T, B]) for i in range(4)], [
228 |         np.zeros([n, T, B]) for i in range(4)]
229 |     K = L  # num of cross-fitting
230 |     kf = KFold(n_splits=K)
231 |     kf.get_n_splits(zeros(n))
232 | 
233 |     # Just to get CV-based paras
234 |     if paras == "CV_once":
235 |         for train_index, test_index in kf.split(
236 |                 data):  # only do this one time to get paras by using CV
237 |             if fixed_state_comp:
238 |                 true_state_train = [fixed_state_comp[i] for i in train_index]
239 |             else:
240 |                 true_state_train = None
241 |             train_data, test_data = [data[i] for i in train_index], [data[i] for i in test_index]
242 |             CV_paras = char_fun_est(train_data = train_data,
243 |                 paras = "CV_once", n_trees = n_trees, uv = uv, J = J,
244 |                 include_reward=include_reward, fixed_state_comp=true_state_train)
245 |             return CV_paras
246 | 
247 |     # estimate char values by cross-fitting
248 |     for train_index, test_index in kf.split(data):
249 |         if fixed_state_comp:
250 |             true_state_train = [fixed_state_comp[i] for i in train_index]
251 |             true_state_test = [fixed_state_comp[i] for i in test_index]
252 |         else:
253 |             true_state_train, true_state_test = None, None
254 |         train_data, test_data = [data[i] for i in train_index], [data[i] for i in test_index]
255 |         test_pred = get_test_data(test_data = test_data, J = J, fixed_state_comp = true_state_test)
256 |         a = now()
257 |         
258 |         if method == "QRF":
259 |             char_funs = char_fun_est(train_data=train_data, paras=paras, n_trees = n_trees, 
260 |                                      uv=uv, J=J, include_reward=include_reward,
261 |                                      fixed_state_comp=true_state_train) # a list of four estimated fun
262 | 
263 |             for i in range(2):  # forward / backward
264 |                 r = char_funs[i].predict(test_pred, uv[i])  # return: char_est_cos, char_est_sin
265 |                 char_values[0 + i][test_index] = r[0].reshape((len(test_index), T, B))
266 |                 char_values[2 + i][test_index] = r[1].reshape((len(test_index), T, B))
267 |         elif method == "RF":
268 |             char_funs = char_fun_est_RF(train_data = train_data, 
269 |                                         paras = paras, n_trees = n_trees, uv = uv, J = J,
270 |                                        include_reward = include_reward, fixed_state_comp = fixed_state_comp)
271 |             for i in range(2):
272 |                 r = char_funs[i]
273 |                 char_values[0 + i][test_index] = r[0].predict(test_pred).reshape((len(test_index), T, B))
274 |                 char_values[2 + i][test_index] = r[1].predict(test_pred).reshape((len(test_index), T, B))
275 |     return char_values 
276 | 
277 | 
278 | def char_fun_est(
279 |         train_data,
280 |         paras=[3, 20], n_trees = 200, uv = 0, J = 1, include_reward = 0, fixed_state_comp = None):
281 |     """
282 |     For each cross-fitting-task, use QRF to do prediction
283 | 
284 |     paras == "CV_once": use CV_once to fit
285 |     get_CV_paras == True: just to get paras by using CV
286 | 
287 |     Returns
288 |     -------
289 |     a list of four estimated fun, and a list of four true y vectors
290 |     """
291 | 
292 |     char_funs = []
293 | 
294 |     X1, y1 = get_pairs(train_data, is_forward = 1, J = J,
295 |                        include_reward = include_reward, fixed_state_comp = fixed_state_comp)
296 |     X2, y2 = get_pairs(train_data, is_forward = 0, J = J,
297 |                        include_reward = include_reward, fixed_state_comp = fixed_state_comp)
298 | 
299 |     X, y = [X1, X2], [y1, y2]
300 | 
301 |     if paras in ["CV", "CV_once"]:
302 |         for i in range(2):
303 |             rfqr = RandomForestQuantileRegressor(random_state=0, n_estimators = n_trees)
304 |             gd = GridSearchCV(estimator = rfqr, param_grid = param_grid, 
305 |                               cv = 5, n_jobs = n_jobs, verbose=0)
306 |             gd.fit(X[i], y[i])
307 |             best_paras = gd.best_params_
308 | 
309 |             if paras == "CV_once":  # only return forward
310 |                 return [best_paras['max_depth'], best_paras['min_samples_leaf']]
311 | 
312 |             elif paras == "CV":
313 |                 print("best_paras:", best_paras)
314 |                 # use the optimal paras and the whole dataset
315 |                 rfqr1 = RandomForestQuantileRegressor(
316 |                     random_state=0,
317 |                     n_estimators = n_trees, 
318 |                     max_depth=best_paras['max_depth'],
319 |                     min_samples_leaf=best_paras['min_samples_leaf'], 
320 |                     n_jobs = n_jobs)
321 |                 char_funs.append(rfqr1.fit(X[i], y[i]))
322 | 
323 |     else:  # pre-specified paras
324 |         max_depth, min_samples_leaf = paras
325 |         for i in range(2):
326 |             char_funs.append(
327 |                 RandomForestQuantileRegressor(
328 |                     random_state=0, n_estimators = n_trees, 
329 |                     max_depth = max_depth, min_samples_leaf = min_samples_leaf, 
330 |                     n_jobs = n_jobs).fit( X[i], y[i]))
331 | 
332 |     return char_funs
333 | 
334 | 
335 | def obs_char(data, uv, include_reward, fixed_state_comp=None):
336 |     """
337 |     Batchwise calculation for the cos/sin terms, used to define lam
338 |     (n*T*dx) * (dx* B)  = n * T * B
339 |     """
340 |     T = data[0][0].shape[0]
341 |     X_mat = np.array([a[0] for a in data])
342 |     N = X_mat.shape[0]
343 |     if fixed_state_comp:
344 |         true_state = np.repeat(fixed_state_comp, T).reshape(N, T, 1)
345 |         X_mat = np.concatenate([true_state, X_mat], 2)
346 |     A_mat = np.array([a[1] for a in data])
347 |     XA_mat = np.concatenate([X_mat, A_mat], 2)
348 |     if include_reward:
349 |         R_mat = np.array([a[2] for a in data])
350 |         XR_mat = np.concatenate([X_mat, R_mat], 2)
351 |         S = [XR_mat, XA_mat]
352 |     else:
353 |         S = [X_mat, XA_mat]
354 |     r = []
355 |     for i in range(2):
356 |         temp = S[i].dot(uv[i].T)
357 |         r += [cos(temp), sin(temp)]
358 |     return r
359 | 
360 | 
361 | def lam_formula(char_values, c_s_values, J, Q):
362 |     """
363 |     implement the 4 lam formula (point cond. cov)
364 |     # char_values: predict t + J and t - 1; # len-4 list, the  element is len-n [T_i, B]
365 |     Inputs:
366 |         char_values: predicted values, at point t, they are [t, …, t + J - 1] -> [t - 1] and [t + J]
367 |         c_s_values: observed values, t is just t
368 |     Outputs:
369 |         lam: (Q-1)-len list with every entry as [four (n * T-q * B) matries about lam values]
370 |     """
371 |     phi_R, psi_R, phi_I, psi_I = char_values
372 |     c_X, s_X, c_XA, s_XA = c_s_values
373 | 
374 |     # forward, t is the residual at time t
375 |     left_cos_R = c_X - roll(phi_R, J, 1)
376 |     left_sin_I = s_X - roll(phi_I, J, 1)
377 |     # backward, t is the residual at time t
378 |     right_cos_R = c_XA - roll(psi_R, -1, 1)
379 |     right_sin_I = s_XA - roll(psi_I, -1, 1)
380 | 
381 |     lam = []
382 | 
383 |     for q in range(2, Q + 1):
384 |         shift = q + J - 1
385 |         startT = q + J - 1
386 |         lam_RR = multiply(
387 |             left_cos_R, roll(
388 |                 right_cos_R, shift, 1))[
389 |             :, startT:, :]
390 |         lam_II = multiply(
391 |             left_sin_I, roll(
392 |                 right_sin_I, shift, 1))[
393 |             :, startT:, :]
394 |         lam_IR = multiply(
395 |             left_sin_I, roll(
396 |                 right_cos_R, shift, 1))[
397 |             :, startT:, :]
398 |         lam_RI = multiply(
399 |             left_cos_R, roll(
400 |                 right_sin_I, shift, 1))[
401 |             :, startT:, :]
402 |         lam.append([lam_RR, lam_II, lam_IR, lam_RI])
403 |     return lam
404 | 
405 | 
406 | ##########################################################################
407 | # %% The final test statistics and p-values
408 | # only rely on estimated cond. cov [est cond. char v.s. obs cond. char]
409 | # has nothing to do with the char estimation part
410 | ##########################################################################
411 | 
412 | #%% part 2 of Step 3 for Algorithm 1
413 | def S_hat(lam, dims, J = 1):
414 |     """
415 |     Construct the test stat S based on cond. covs.
416 |         1. construct (Q-1 * B) Gammas from lam(sample lag-q covariance functions)
417 |         2. Step3 - aggregate to get S_hat
418 | 
419 |     Inputs:
420 |         lam: (Q-1)-len list of four lam matrices (n * T-q * B)
421 | 
422 |     Ourputs:
423 |     """
424 |     Gamma = [np.array([np.mean(a[i], (0, 1)) for a in lam]) for i in range(4)]
425 |     Gamma_R = Gamma[0] - Gamma[1]  # Gamma_RR - Gamma_II
426 |     Gamma_I = Gamma[2] + Gamma[3]  # Gamma_IR + Gamma_RI
427 | 
428 |     N, T = dims
429 |     Q = Gamma_R.shape[0] + 1
430 |     B = Gamma_R.shape[1]
431 |     r = []
432 | 
433 |     for q in range(2, Q + 1):
434 |         c = sqrt(N * (T + 1 - q - J))
435 |         r.append(c * max(max(Gamma_R[q - 2, :]), max(Gamma_I[q - 2, :])))
436 |     return max(r)
437 | 
438 | #%% Step 4 of Algorithm 1
439 | def Sigma_q(Q_four_lams):
440 |     """
441 |     sample covariance matrix, prepare for resampling
442 |     Paras:
443 |     lams: (Q-1)-len list of four lam matrices (n * T-q * B)
444 |     """
445 |     sigma_q_s_max, sigma_q_s_mean = [], []
446 |     Q = len(Q_four_lams) + 1
447 |     q = 2
448 |     for four_lams in Q_four_lams:  # for each q
449 |         lam_RR, lam_II, lam_IR, lam_RI = four_lams  # (n * T-q * B) matrix
450 | 
451 |         lam = concatenate([lam_RR - lam_II, lam_RI + lam_IR],
452 |                           2)  # into (n * T-q * 2B)
453 |         N, T_q, BB = lam.shape
454 |         sigma_q = np.zeros((BB, BB))
455 |         for i in range(N):
456 |             # aggregate across T with the .dot()
457 |             sigma_q += lam[i].T.dot(lam[i])
458 |         sigma_q_s_max.append(sigma_q / (N * T_q))
459 |         q += 1
460 |     return sigma_q_s_max
461 | 
462 | 
463 | #%% Step 5 of Algorithm 1
464 | def bootstrap_p_value(Q_Sigma_q, rep_times, test_stat=0):
465 |     """
466 |     resampling to get cv/p-values
467 |     """
468 |     BB = Q_Sigma_q[0].shape[0]
469 |     Q = len(Q_Sigma_q) + 1
470 |     Sigma_q_squares = [sqrtm(a) for a in Q_Sigma_q]
471 | 
472 |     def one_time(seed):
473 |         rseed(seed); npseed(seed)
474 |         Z = randn(BB, Q - 1)
475 |         r = []
476 |         for q in range(Q - 1):
477 |             z = Z[:, q]
478 |             r.append(max(Sigma_q_squares[q].dot(z)))
479 |         return max(r)
480 |     # generate boostrapped test stats
481 |     r = rep_seeds(one_time, rep_times)
482 |     p = p_value(test_stat, r)
483 |     return p
484 | ##########################################################################
485 | ############  ARCHIVE   ########################################
486 | ##########################################################################
487 | 
488 | def char_fun_est_RF(train_data, paras=[3, 20], n_trees = 200, uv = 0, J = 1,
489 |                     include_reward = 0, fixed_state_comp = None):
490 |     """ cond. char. fun. estimaton with the alternative estimator (multi-outcome random forests)
491 |     """
492 |     char_funs = []
493 |     X1, y1 = get_pairs(train_data, is_forward = 1, J = J,
494 |                        include_reward = include_reward, fixed_state_comp = fixed_state_comp)
495 |     X2, y2 = get_pairs(train_data, is_forward = 0, J = J,
496 |                        include_reward = include_reward, fixed_state_comp = fixed_state_comp)
497 |     XX, yy = [X1, X2], [y1, y2]
498 | 
499 |     max_depth, min_samples_leaf = paras
500 |     for i in range(2):
501 |         X, y = XX[i], yy[i]
502 |         y_cos, y_sin = cos(y.dot(uv[i].T)), sin(y.dot(uv[i].T))
503 |         regr_cos = RandomForest(random_state = 0, n_estimators = n_trees, 
504 |                 max_depth = max_depth, min_samples_leaf = min_samples_leaf, 
505 |                 n_jobs = n_jobs)
506 |         regr_sin = RandomForest(random_state = 0, n_estimators = n_trees, 
507 |                 max_depth = max_depth, min_samples_leaf = min_samples_leaf, 
508 |                 n_jobs = n_jobs)
509 |         regr_cos.fit(X, y_cos)
510 |         regr_sin.fit(X, y_sin)
511 |         char_funs.append([regr_cos, regr_sin])
512 |     return char_funs
513 |             


--------------------------------------------------------------------------------