├── images
    ├── box.JPG
    ├── cat.JPG
    ├── cube.JPG
    ├── temp.JPG
    ├── cc_data.JPG
    ├── cnn_rnn.JPG
    ├── curious.JPG
    ├── drawing.pdf
    ├── ladder.png
    ├── mb_task.png
    ├── mc_tree.JPG
    ├── objects.JPG
    ├── cnn_rnn-2.JPG
    ├── code_rep.png
    ├── glacier_1.JPG
    ├── glacier_2.JPG
    ├── glacier_3.JPG
    ├── mbrl_results.JPG
    ├── transporter.JPG
    ├── cc_challenges.JPG
    └── child_learners.JPG
├── main.tex
├── commands.tex
├── days
    ├── tuesday_5_7_19.tex
    ├── thursday_5_9_19.tex
    ├── wednesday_5_8_19.tex
    └── monday_5_6_19.tex
└── iclr.bib


/images/box.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/box.JPG


--------------------------------------------------------------------------------
/images/cat.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/cat.JPG


--------------------------------------------------------------------------------
/images/cube.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/cube.JPG


--------------------------------------------------------------------------------
/images/temp.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/temp.JPG


--------------------------------------------------------------------------------
/images/cc_data.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/cc_data.JPG


--------------------------------------------------------------------------------
/images/cnn_rnn.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/cnn_rnn.JPG


--------------------------------------------------------------------------------
/images/curious.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/curious.JPG


--------------------------------------------------------------------------------
/images/drawing.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/drawing.pdf


--------------------------------------------------------------------------------
/images/ladder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/ladder.png


--------------------------------------------------------------------------------
/images/mb_task.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/mb_task.png


--------------------------------------------------------------------------------
/images/mc_tree.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/mc_tree.JPG


--------------------------------------------------------------------------------
/images/objects.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/objects.JPG


--------------------------------------------------------------------------------
/images/cnn_rnn-2.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/cnn_rnn-2.JPG


--------------------------------------------------------------------------------
/images/code_rep.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/code_rep.png


--------------------------------------------------------------------------------
/images/glacier_1.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/glacier_1.JPG


--------------------------------------------------------------------------------
/images/glacier_2.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/glacier_2.JPG


--------------------------------------------------------------------------------
/images/glacier_3.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/glacier_3.JPG


--------------------------------------------------------------------------------
/images/mbrl_results.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/mbrl_results.JPG


--------------------------------------------------------------------------------
/images/transporter.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/transporter.JPG


--------------------------------------------------------------------------------
/images/cc_challenges.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/cc_challenges.JPG


--------------------------------------------------------------------------------
/images/child_learners.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/david-abel/iclr_2019/HEAD/images/child_learners.JPG


--------------------------------------------------------------------------------
/main.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[11pt]{article}
 2 | 
 3 | % --- Packages ---
 4 | \usepackage[usenames, dvipsnames]{color} % Cool colors
 5 | \usepackage{enumerate, amsmath, amsthm, amssymb, mathrsfs, algorithm, algpseudocode, fontawesome, pifont, subfig, fullpage, csquotes, dashrule, tikz, bbm, booktabs, bm, hyperref}
 6 | \usepackage[framemethod=TikZ]{mdframed}
 7 | \usepackage[numbers]{natbib}
 8 | \usepackage[normalem]{ulem}
 9 | 
10 | % --- Misc. ---
11 | \hbadness=10000 % No "underfull hbox" messages.
12 | \setlength{\parindent}{0pt} % Removes all indentation.
13 | 
14 | % -- Commands --
15 | \input{commands}
16 | 
17 | \title{ICLR 2019 Notes \\ \Large{New Orleans, LA, USA}}
18 | \author{David Abel\footnote{\durl{http://david-abel.github.io}} \\ \durl{david_abel@brown.edu}}
19 | \date{May 2019}
20 | 
21 | \begin{document}
22 | \maketitle
23 | \tableofcontents
24 | \newpage
25 | 
26 | 
27 | This document contains notes I took during the events I managed to make it to at (my first) ICLR, in New Orleans, LA, USA. Please feel free to distribute it and shoot me an email at \durl{david_abel@brown.edu} if you find any typos or other items that need correcting. 
28 | 
29 | 
30 | \section{Conference Highlights}
31 | 
32 | Sadly, I missed more talks than normal this conference (and had to fly out a bit early so I missed a good chunk of Thursday). Some highlights:
33 | \begin{itemize}
34 |     \item Lots of continued discussion following the recent wave of conversations around Rich Sutton's bitter lesson\footnote{\url{http://incompleteideas.net/IncIdeas/BitterLesson.html}}. The debate held at the main conference track (see Section~\ref{sec:debate}) and the panel at the SPiRL workshop\footnote{\url{http://spirl.info}} (see Section~\ref{sec:panel}) featured lots of insights about the topic -- highly recommended to check them out!
35 |     \item The SPiRL workshop was {\it outstanding}. The speaker lineup, contributed talks, and panel were all exceptional (see Section~\ref{sec:spirl}). A huge thanks to the organizers for putting on such a great event.
36 |     \item Hot topics: 1) Meta learning is very popular (particularly meta RL), and 2) Graph neural networks.
37 |     \item Favorite talks: I loved the keynotes! I didn't make it to all of them but the ones I did catch were all fantastic. Definitely check out Prof. Zeynep Tufekci's amazing talk if you can find a video (my summary is in Section~\ref{sec:keynote_zeynep}). If I happen to find a recording I'll link it here.
38 |     \item Some really nice papers in learning abstraction/hierarchies in RL: 1) ``Near-Optimal Representation Learning for Hierarchical Reinforcement Learning" by \citet{nachum2018near}, and 2) ``Learning Multi-Level Hierarchies with Hindsight" by \citet{levy2018learning}; and ``Learning Finite State Representations of Recurrent Policy Networks' by~\citet{koul2018learning}.
39 |     \item A small thing, but I *loved* how large the poster were allowed to be (something like 5ft wide). Encouraged some great designs and easy viewing for the onlookers, even from a distance.
40 | \end{itemize}
41 | 
42 | % ------------
43 | % -- Sunday --
44 | % ------------
45 | \newpage
46 | \section{Monday May 6th: Workshops}
47 | \input{days/monday_5_6_19.tex}
48 | 
49 | 
50 | 
51 | % ------------
52 | % -- Monday --
53 | % ------------
54 | \newpage
55 | \section{Tuesday May 7th: Main Conference}
56 | \input{days/tuesday_5_7_19.tex}
57 | 
58 | 
59 | 
60 | % ------------
61 | % -- Tuesday --
62 | % ------------
63 | \newpage
64 | \section{Wednesday May 8th: Main Conference}
65 | \input{days/wednesday_5_8_19.tex}
66 | 
67 | 
68 | 
69 | % ---------------
70 | % -- Wednesday --
71 | % ---------------
72 | \newpage
73 | \section{Thursday May 9th: Main Conference}
74 | \input{days/thursday_5_9_19.tex}
75 | 
76 | 
77 | 
78 | 
79 | 
80 | % --- Bibliography ---
81 | \newpage
82 | \bibliographystyle{plainnat}
83 | \bibliography{iclr}
84 | 
85 | \end{document}


--------------------------------------------------------------------------------
/commands.tex:
--------------------------------------------------------------------------------
  1 | % COMMANDS:
  2 | % - bigmid: Dynamically sized mid bar.
  3 | % - spacerule: add a centered dashed line with space above and below
  4 | % - \dbox{#1}: Adds a nicely formatted slightly grey box around #1
  5 | % - \begin{dproof} ... \end{dproof}: A nicely formatted proof. Use \qedhere to place qed
  6 | % - \ddef{#1}{#2}: Makes a definition (and counts defs). #1 goes inside parens at beginning, #2 is actual def.
  7 | % - \begin{dtable}{#1} ... \end{dtable}: Makes a minimalist table. #1 is the alignment, for example: {clrr} would be a 4 column, center left right right table.
  8 | 
  9 | % Dynamically sized mid bar.
 10 | \newcommand{\bigmid}{\mathrel{\Big|}}
 11 | 
 12 | 
 13 | % ---- Colors and Notes ----
 14 | \definecolor{dblue}{RGB}{98, 140, 190}
 15 | \definecolor{dlblue}{RGB}{216, 235, 255}
 16 | \definecolor{dgreen}{RGB}{124, 155, 127}
 17 | \definecolor{dpink}{RGB}{207, 166, 208}
 18 | \definecolor{dyellow}{RGB}{255, 248, 199}
 19 | \definecolor{dgray}{RGB}{46, 49, 49}
 20 | 
 21 | % TODO
 22 | \newcommand{\todo}[1]{\textcolor{red}{TODO: #1}}
 23 | \newcommand{\dnote}[1]{\textcolor{dblue}{Dave: #1}}
 24 | 
 25 | % URL
 26 | \newcommand{\durl}[1]{\textcolor{dblue}{\underline{\url{#1}}}}
 27 | 
 28 | % Circled Numbers
 29 | \newcommand*\circled[1]{\tikz[baseline=(char.base)]{\node[shape=circle,draw,inner sep=0.7pt] (char) {\footnotesize{#1}};}}
 30 | % From: http://tex.stackexchange.com/questions/7032/good-way-to-make-textcircled-numbers
 31 | 
 32 | % Under set numbered subset of equation
 33 | \newcommand{\numeq}[3]{\underset{\textcolor{#2}{\circled{#1}}}{\textcolor{#2}{#3}}}
 34 | 
 35 | % ---- Abbreviations -----
 36 | \newcommand{\tc}[2]{\textcolor{#1}{#2}}
 37 | \newcommand{\ubr}[1]{\underbrace{#1}}
 38 | \newcommand{\uset}[2]{\underset{#1}{#2}}
 39 | \newcommand{\eps}{\varepsilon}
 40 | 
 41 | % Typical limit:
 42 | \newcommand{\nlim}{\underset{n \rightarrow \infty}{\lim}}
 43 | \newcommand{\nsum}{\sum_{i = 1}^n}
 44 | \newcommand{\nprod}{\prod_{i = 1}^n}
 45 | 
 46 | % Add an hrule with some space
 47 | \newcommand{\spacerule}{\begin{center}\hdashrule{2cm}{1pt}{1pt}\end{center}}
 48 | 
 49 | % Mathcal and Mathbb
 50 | \newcommand{\mc}[1]{\mathcal{#1}}
 51 | \newcommand{\indic}{\mathbbm{1}}
 52 | \newcommand{\bE}{\mathbb{E}}
 53 | 
 54 | \newcommand{\ra}{\rightarrow}
 55 | \newcommand{\la}{\leftarrow}
 56 | 
 57 | % ---- Figures, Boxes, Theorems, Etc. ----
 58 | 
 59 | % Basic Image
 60 | \newcommand{\img}[1]{
 61 | \begin{center}
 62 | \includegraphics[\width=0.6\textwidth]{#1}
 63 | \end{center}}
 64 | 
 65 | % Put a fancy box around things.
 66 | \newcommand{\dbox}[1]{
 67 | \begin{mdframed}[roundcorner=4pt, backgroundcolor=gray!5]
 68 | \vspace{1mm}
 69 | {#1}
 70 | \end{mdframed}
 71 | }
 72 | 
 73 | %  --- PROOFS ---
 74 | 
 75 | % Inner environment for Proofs
 76 | \newmdenv[
 77 |   topline=false,
 78 |   bottomline=false,
 79 |   rightline = false,
 80 |   leftmargin=10pt,
 81 |   rightmargin=0pt,
 82 |   innertopmargin=0pt,
 83 |   innerbottommargin=0pt
 84 | ]{innerproof}
 85 | 
 86 | % Proof Command
 87 | %\newenvironment{dproof}{\begin{proof} \text{\vspace{2mm}} \begin{innerproof}}{\end{innerproof}\end{proof}\vspace{4mm}}
 88 | \newenvironment{dproof}[1][Proof]{\begin{proof}[#1] \text{\vspace{2mm}} \begin{innerproof}}{\end{innerproof}\end{proof}\vspace{4mm}}
 89 | 
 90 | 
 91 | % Dave Definition
 92 | \newcounter{DaveDefCounter}
 93 | \setcounter{DaveDefCounter}{1}
 94 | 
 95 | \newcommand{\ddef}[2]
 96 | {
 97 | \begin{mdframed}[roundcorner=1pt, backgroundcolor=white]
 98 | \vspace{1mm}
 99 | {\bf Definition \theDaveDefCounter} (#1): {\it #2}
100 | \stepcounter{DaveDefCounter}
101 | \end{mdframed}
102 | }
103 | 
104 | % Block Quote
105 | \newenvironment{dblockquote}[2]{
106 | \begin{blockquote}
107 | #2
108 | \vspace{-2mm}\hspace{10mm}{#1} \\
109 | \end{blockquote}}
110 | 
111 | % Algorithm
112 | \newenvironment{dalg}[1]
113 | {\begin{algorithm}\caption{#1}\begin{algorithmic}}
114 | {\end{algorithmic}\end{algorithm}}
115 | 
116 | 
117 | 
118 | 
119 | % Dave Table
120 | \newenvironment{dtable}[1]
121 | {\begin{figure}[h]
122 | \centering
123 | \begin{tabular}{#1}\toprule}
124 | {\bottomrule
125 | \end{tabular}
126 | \end{figure}}
127 | 
128 | % For numbering the last of an align*
129 | \newcommand\numberthis{\addtocounter{equation}{1}\tag{\theequation}}
130 | 
131 | \DeclareMathOperator*{\argmin}{arg\,min}
132 | \DeclareMathOperator*{\argmax}{arg\,max}
133 | 
134 | \newtheorem{conjecture}{Conjecture}[section]
135 | \newtheorem{remark}{Remark}[section]
136 | \newtheorem{theorem}{Theorem}[section]
137 | \newtheorem{corollary}{Corollary}[theorem]
138 | \newtheorem{lemma}[theorem]{Lemma}
139 | \newtheorem{assumption}{Assumption}
140 | 


--------------------------------------------------------------------------------
/days/tuesday_5_7_19.tex:
--------------------------------------------------------------------------------
  1 | Onto day two! Today is entirely the main conference. I have a lot of meetings today so I will only be around for a few talks, sadly. 
  2 | 
  3 | 
  4 | \subsection{Keynote: Emily Shuckburgh on ML Conducting a Planetary Healthcheck}
  5 | 
  6 | Remember: NOLA in 2005 post Katrina. We thought this would be a wake up call. \\
  7 | 
  8 | CO$_2$ in the atmosphere in 2005: 378 parts per million, in 2019; CO$_2$ 415 parts per million. Hurricanes and cyclones in Mumbai, rising seas, wetter skies $\implies$ devastation caused by these events is that much worse. \\
  9 | 
 10 | {\bf Note:} One {\it million} species at risk of extinction in the next decades (from a recent study on biodiversity). \\
 11 | 
 12 | $\ra$ We are having a huge impact on our planet. \\
 13 | 
 14 | \dbox{{\bf Guiding Question:} How can we get a sense of the health of our planet, and turn it around? Can we use Machine Learning to make that happen?}
 15 | 
 16 | \subsubsection{Challenges for ML in Climate Science}
 17 | 
 18 | Key questions, observations, and action items:
 19 | \begin{enumerate}
 20 |     \item Urgently need actionable information on climate risk
 21 |     
 22 |     Need to understand potential risk and outcomes from
 23 |     \begin{enumerate}
 24 |         \item Flooding, heat waves, and other disasters.
 25 |         \item Effects of changes in biodiversity.
 26 |         \item Impact on supply chains (food, water, and beyond), and 4) effects on the natural world (coral reefs, forests, arctic sea ice, permafrost).
 27 |     \end{enumerate}
 28 |     
 29 |     \item We have vast data-sets describing how the planet is changing.
 30 |     
 31 |     Includes data from satellites, robotic instruments under water, networked sensors, massive computer simulations, crowd sourcing.
 32 |     
 33 |     $\ra$ We have more data than we know what to do with.
 34 |     
 35 |     \item {\bf Main Point:} Can we employ advances in data science and machine learning to harness this data (from 2.) to help address the challenges in (1.)?
 36 |     
 37 | \end{enumerate}
 38 | 
 39 | \begin{figure}[h!]
 40 |     \centering
 41 |     \includegraphics[width=0.4\textwidth]{images/cc_challenges.JPG}
 42 |     \caption{Challenges in bringing tools from ML to bear on problems in Climate Science.}
 43 |     \label{fig:cc_ml}
 44 | \end{figure}
 45 | 
 46 | Q: In spite of the challenges (see Figure~\ref{fig:cc_ml}), what can we do? \\
 47 | 
 48 | A: Three steps to conduct a planetary healthcheck:
 49 | \begin{enumerate}
 50 |     \item Monitoring the planet
 51 |     \item Treating the symptoms
 52 |     \item Curing the disease
 53 | \end{enumerate}
 54 | 
 55 | \subsubsection{Step One: Monitoring The Planet}
 56 | 
 57 | Q: How can we appropriately monitor the health of the planet? It's a huge challenge! Lots of important data is sparse, while less important (or low signal-noise ratio data) is abundant. \\
 58 | 
 59 | A: More comprehensive testing -- not just temperature, but lots of other properties, too.
 60 | 
 61 | \begin{figure}
 62 |     \centering
 63 |     \subfloat[Surface temperature over time]{\includegraphics[width=0.4\textwidth]{images/temp.JPG}} \hspace{5mm}
 64 |     \subfloat[Other climate-relevant data over time]{\includegraphics[width=0.4\textwidth]{images/cc_data.JPG}}
 65 |     \caption{Changes in properties of the earth's health over time.}
 66 |     \label{fig:cc_data}
 67 | \end{figure}
 68 | 
 69 | 
 70 | \subsubsection{Step Two: Treating the Symptoms}
 71 | 
 72 | Standard tools: co-ordinated international climate modeling project (CMIP6): \~ 40 Petabytes. Around a million lines of code, used to run simulations of surface radiation, changes in solar radiation, and so on. \\
 73 | 
 74 | Q: What do these models do for us? \\
 75 | 
 76 | A: They make predictions about critical properties in the future, like emissions due to greenhouse gases with and without different interventions, and so on. \\
 77 | 
 78 | $\ra$ we can actually predict global average surface temperature extremely well. \\
 79 | 
 80 | Q: What will future conditions be like in the world's cities and megacities? How can we predict these things? \\
 81 | 
 82 | A: clime models can project these changes many years into the future! \\
 83 | 
 84 | But: 1) have coarse resolution, 2) have systematic biases at a local level, and 3) different clime models do better/worse at representing different aspects of the climate system. \\
 85 | 
 86 | Example: Consider a climate model making predictions about temperatures in London. \\
 87 | 
 88 | $\ra$ Sometimes, the model is systematically wrong (biased). It's too high for long periods, then too low, and so on. So how can we remedy this? \\
 89 | 
 90 | {\bf Approach:} Apply probabilistic machine learning to build a new predictive model from actual observed weather data. That is, learn $f : \mc{X} \ra \mc{Y}$, given lots of weather data. \\
 91 | 
 92 | Q: Can we go further? Can we extend this model to account for correlated risks and map to data on impacts? \\
 93 | 
 94 | $\ra$ Really we'd like to regulate sustainable urban drainage, thermal comfort in buildings, and address questions like how vulnerable a particular country/region is to clime disruption? \\
 95 | 
 96 | Similar approach---consider a task where: \\
 97 | \hspace{8mm}{\bf input:} time, space, climate model outputs, meteorological data
 98 | \hspace{8mm}{\bf output}: future risk of specific impact occurring, with the {\bf task:} of synthesizing and interpolating different datasets, learn mappings between different variables, may need to find novel sources of data.\\
 99 | 
100 | \subsubsection{Step Three: Cure the Disease}
101 | 
102 | {\bf Key Takeaway:} many opportunities for improve future projections of climate change to inform policymaking. Here are a few:
103 | 
104 | \begin{enumerate}
105 |     \item Blend data-driven and physics based approaches
106 |     
107 |     $\ra$ Can combine physics models of ice melt and machine learning models (with our large dataset) to make more accurate predictions of ice melt.
108 |     
109 |     \item Develop data-based simulators of key processes
110 |     
111 |     $\ra$ Given massive datasets of key processes, such as cloud formations, we can help to build more accurate models. Current climate models don't scale well, so we need to find new ways to model climate change.
112 |     
113 |     \item Use ML to better understand the {\it physical processes} involved in key shifts, as in glacier shifts (see Figure~\ref{fig:glacier}.
114 |     
115 | \end{enumerate}
116 | 
117 | \begin{figure}
118 |     \centering
119 |     \subfloat[]{\includegraphics[width=0.29\textwidth]{images/glacier_1.JPG}} \hspace{3mm}
120 |     \subfloat[]{\includegraphics[width=0.29\textwidth]{images/glacier_2.JPG}} \hspace{3mm}
121 |     \subfloat[]{\includegraphics[width=0.29\textwidth]{images/glacier_3.JPG}} \hspace{3mm}
122 |     \caption{Change in glacial structure over time.}
123 |     \label{fig:glacier}
124 | \end{figure}
125 | 
126 | Summary:
127 | \begin{enumerate}
128 |     \item Climate change is perhaps the defining issue of our time
129 |     \item To assess risks posed to society and the natural world, we need more information and tools.
130 |     \item Vast datasets cover every aspect of the planet's health but we lack some of the {\it tools} tp process them to generate that information.
131 |     
132 |     \item {\bf Takeaway Question:} Can we establish benchmark tasks that drive climate research forward much like ImageNet has done for vision?
133 | \end{enumerate}
134 | 
135 | \dnote{Stepping out for meetings the rest of the day, back at it tomorrow!}


--------------------------------------------------------------------------------
/days/thursday_5_9_19.tex:
--------------------------------------------------------------------------------
  1 | Last day! I'm flying out this afternoon so I will only make a few sessions today.
  2 | 
  3 | 
  4 | \subsection{Contributed Talks}
  5 | 
  6 | Now some contributed talks.
  7 | 
  8 | \subsubsection{Felix Wiu on Pay Less Attention with Sequence Models~\cite{wu2019pay}}
  9 | 
 10 | {\bf Observation:} Sequence models are almost everywhere in NLP (have become the CNN$\leftrightarrow$ vision). \\
 11 | 
 12 | This work:
 13 | \begin{enumerate}
 14 |     \item Q1: Is self attention needed for good performance?
 15 |     \item Q2: Can we do well on a range of NLP tasks with limited context?
 16 | \end{enumerate}
 17 | 
 18 | Different models perform very differently on Neural machine Translation (Transformer achieve BLEU of 28, SliceNet of 25, phrase-based of 22). \\
 19 | 
 20 | $\ra$ Large performance gap of self-attention and convolutional models. \\
 21 | 
 22 | Background: three ways to encode a sequence
 23 | \begin{itemize}
 24 |     \item RNN: recurrent neural net.
 25 |      can.
 26 |     $h_t = f(x-t, h_{t-1})$, with $x_i$ the input at time $i$, $h_i$ the hidden state at time $i$.
 27 |     
 28 |     \item CNN: convolutional neural net.
 29 |     
 30 |     $h_t = f(x_{t-k}, \ldots, x_{t+k})$ $\ra$ look at a limited window.
 31 |     
 32 |     \item Self-attention models: Compute pairwise similarity between words and aggregate them.
 33 |     
 34 |     $h_t = \sum_{i,j} a_{i,j}$.
 35 | \end{itemize}
 36 | 
 37 | Some pros and cons of each! RNNs can't be parallelized, while CNNs and self-attention, time complexity is higher for self-attention, and so on (see Figure~\ref{cnn_rnn} for full comparisons). \\
 38 | 
 39 | \begin{figure}[h!]
 40 |     \centering
 41 |     \includegraphics[width=0.4\textwidth]{images/cnn_rnn-2.JPG}
 42 |     \caption{Pros and Cons of CNNs, RNNs, and self-attention for sequence modeling.}
 43 |     \label{fig:cnn_rnn}
 44 | \end{figure}
 45 | 
 46 | {\bf Approach:} {\it dynamic} convolution that addresses the main disadvantage of CNNs (lack of dynamic weighting). \\
 47 | 
 48 | But, some challenges to dynamic convolution: too many parameters to optimize! \\
 49 | 
 50 | $\ra$ Response: turn to lightweight convolution, which reduces the number of parameters. \\
 51 | 
 52 | {\bf Experiments:} Explore the trade-off made between {\it inference speed} measured by sentences per second) vs. {\it BLEU score}, which is a way to measure the quality of output translations. \\
 53 | 
 54 | $\ra$ Main finding: dynamic convolution achieves same BLEU score as self-attention, but with a 20\% speed up in inference time. \\
 55 | 
 56 | Conclusion:
 57 | \begin{enumerate}
 58 |     \item Local information is sufficient for several NLP tasks.
 59 |     \item Introduced dynamic convolution: context-specific kernels.
 60 |     \item Lightweight convolution: fewer convolution weights still work well.
 61 | \end{enumerate}
 62 | 
 63 | 
 64 | \subsubsection{Jiyauan Mao on Neural-Symbolic Context Learner~\cite{mao2019neuro}}
 65 | 
 66 | {\bf Focus:} Visual concept reasoning. \\
 67 | 
 68 | $\ra$ Given an input image (of some objects), people can quickly recognize the objects, texture, surface, and so on. \\
 69 | 
 70 | Visual Question Answering: given an image and a question ``What's the shape of the red object?", output an answer to the question. \\
 71 | 
 72 | $\ra$ Also, may want to do image captioning: ``there is a green cube behind a red sphere", or instance retrieval (a bounding box on a particular object). \\
 73 | 
 74 | {\bf Prior Approaches:} End-to-end approaches for solving these three problems. Two things to learn: 1) concepts (colors, shapes), and 2) reasoning (counts). \\
 75 | 
 76 | $\ra$ Downside to end-to-end: concept learning and reasoning are entangled. Not obvious how to transfer. \\
 77 | 
 78 | {\bf This Approach:} Incorporate concepts in visual reasoning. Prior methods rely on excpliti concept annotation. \\
 79 | 
 80 | The idea:
 81 | \begin{itemize}
 82 |     \item Joint learning of concepts and {\it semantic parsing}.
 83 |     \item Given a scene parser, and a semantic parser, learn a program that understands the concepts while parsing both objects.
 84 | \end{itemize}
 85 | 
 86 | Example: given an image of a red sphere and green cube, first perform object detection/feature extraction to get a representation. At the same time, do semantic parsing on the text, to output a parse program that predicts the output of the question. The full overview is given in Figure~\ref{fig:cube}
 87 | 
 88 | \begin{figure}[h!]
 89 |     \centering
 90 |     \includegraphics[width=0.4\textwidth]{images/cube.JPG}
 91 |     \caption{Overview of the approach for joint semantic and scene parsing.}
 92 |     \label{fig:cube}
 93 | \end{figure}
 94 | 
 95 | Two main methods:
 96 | \begin{itemize}
 97 |     \item Learn a program for understanding concepts
 98 |     \item Learn a concepts that can help facilitate parsing new sentences
 99 | \end{itemize}
100 | 
101 | {\bf Experiments:} This approach yields several advantages
102 | \begin{itemize}
103 |     \item State of the art performance on the "CLEVR" data set for visual question answering.
104 |     \item Extensions to natural images and natural sentences as in the VQS dataset: ``what color is the fire hydrant?" given a natural seeming image of a fire hydrant (correctly guesses ``yellow").
105 |     \item Model also supports composition of low level concepts into high level concepts, and bounding box detection.
106 | \end{itemize}
107 | 
108 | {\bf Limitations and Future Directions:}
109 | \begin{itemize}
110 |     \item Consider example of a person with an umbrella hat on, and the question ``what purpose does the thing on this person's head serve"? proves extremely challenging!
111 |     \item Recognition of in-the-wild images and beyond (like goals).
112 |     \item Interpretation of noisy natural language
113 |     \item Concept learning in a more sample efficient way.
114 | \end{itemize}
115 | 
116 | Conclusions:
117 | \begin{itemize}
118 |     \item New model: NSCL learns visual concepts from language with no annotation
119 |     \item Advantages of new model:  high accuracy and data efficiency, transfer concepts to other tasks.
120 |     \item Principles: explicit visual grounding of concepts with neuro-symbolic reasoning.
121 | \end{itemize}
122 | 
123 | \subsubsection{Xiang Li on Smoothing Geometry of Box Embeddings~\cite{li2018smoothing}}
124 | 
125 | {\bf Point:} Learning representations is crucial in NLP! These representations are usually vectors like word2vec or BERT. \\
126 | 
127 | $\ra$ These vectors define semantic similarity in space (closer together words have similar meaning/use). \\
128 | 
129 | But, consider: Rabbit/mammal. They're close to each other in space, but don't capture the full complexity of their relationship rabbit $\subset$ mammal). \\
130 | 
131 | $\ra$ One idea: Gaussian representation of classes like ``mammal". Advantages: 1) region, 2) asymmetry, 3) disjointness; but, one downside: not closed under intersection. Recent work extends this to a probabilistic model that gives up disjointness to achieve closure under intersection. \\
132 | 
133 | {\bf Their Approach:} An extension of these probabilistic models using a {\it box representation} to account for joint concepts, thereby achieving all four of the desired properties (region, asymmetry, etc.). Box representation seen in Figure~\ref{fig:box}. \\
134 | 
135 | \begin{figure}[h!]
136 |     \centering
137 |     \includegraphics[width=0.4\textwidth]{images/box.JPG}
138 |     \caption{Idea behind the new probabilistic box representation}
139 |     \label{fig:box}
140 | \end{figure}
141 | 
142 | Learning problem; boxes represent probability mass, try to do distribution matching over concepts. Initialize random concepts ($Pr(deer), Pr(deer \mid mammal)$. \\
143 | 
144 | {\bf Experiments:} 1) Matrix factorization in MovieLens, 2) Classification on Imbalanced Wordnet
145 | 
146 | \begin{enumerate}
147 |     \item MovieLens Marketbase; Movie $\times$ Movie matrix: $p(lion king \mid aladdin) = 0.73)$, 286 million pairs.
148 |     
149 |     $\ra$ Regression task: train/dev/test, yields a matrix factorization problem (determine which movies people will like). \\
150 |     
151 |     $\ra$ Forrest Gump ends up being a large box, indicating that everyone likes it!
152 |     
153 |     \item Imbalanced WordNet: show the models learning ability for sparse, disjoint data.
154 |     
155 |     $\ra$ Binary classification task: achieve SOTA, even with sparse/disjoint data.
156 | \end{enumerate}
157 | 
158 | \subsubsection{Best Paper Award Talk: Yiqang Shen on Ordered Neurons~\cite{shen2019ordered}}
159 | 
160 | {\bf Assumption:} Language has a latent tre-like structure \\
161 | 
162 | $\ra$ This work: focus on {\it constituency tree}. \\
163 | 
164 | Q: Why? \\
165 | 
166 | A1: Hierarchical representations with increasing levels of abstraction can be captured by these trees! \\
167 | 
168 | A2: Compoisitional effects of language, and long term dependency problem can be handled by these trees. \\
169 | 
170 | {\bf Main Question:} Can we provide a new inductive bias based on this tree structure to achieve a higher down stream task performance? \\
171 | 
172 | Two types of models for answering this in the past:
173 | \begin{enumerate}
174 |     \item Recurrent models (SPINN, RL-SPINN, RNN)
175 |     \item Recursive models (RvNN, ST-Gumbel, DIORA)
176 | \end{enumerate}
177 | 
178 | $\ra$ For most prior works: tree-structure given by external parser, or try to make hard decisions about how to design it. \\
179 | 
180 | {\bf This Work:} Integrate a tree structure directly into an RNN. \\
181 | 
182 | $\ra$ Tree-structure is defined by: when a larger constituent ends, all nested smaller consistuent also ends. \\
183 | 
184 | {\bf Effect:} This yields an inductive bias of ``ordered neurons", when a high ranking neuron is erased, all lower rankings neurons should also be erased. \\
185 | 
186 | To model this structure, introduce a new forget gate called the $cumax$:
187 | \begin{equation}
188 |     cumax(x) = cumsum(softmax(x)).
189 | \end{equation}
190 | Master gates for RNN:
191 | \begin{itemize}
192 |     \item Master forget gate:
193 |     $\tilde{f}_t = cumax(W_f x_t + \ldots)$
194 |     \item Master input gate:
195 |     $\tilde{i_t} = 1 - cumax(W_f x_t + \ldots)$
196 | \end{itemize}
197 | 
198 | {\bf Experiments:}
199 | \begin{enumerate}
200 |     \item Language Modeling: PTB dataset to do next-word prediction. Achieve near state of the art.
201 |     
202 |     \item Unsupervised Constituency Parsing: Penn TreeBank data set on language modeling task.
203 |     
204 |     \item Targeted Syntactic Evaluation: Marvin and Linzen dataset on a language modeling task (given a pair of similar sentences, one ungrammatical, one grammatical, see how the model performs). ON-LSTM is able to pick up on the long-term dependencies.
205 | \end{enumerate}
206 | 
207 | Summary:
208 | \begin{itemize}
209 |     \item Proposed new Ordered Neuron inductive bias:
210 |     \begin{itemize}
211 |         \item High ranking neurons sotre long term info
212 |         \item Low ranking neurons store short term info
213 |     \end{itemize}
214 |     \item New activation: $cumax()$ and ON-LSTM
215 |     \item Inducted structure aligns with human annotated structure
216 |     \item Stronger performance on a lot of experiments.
217 | \end{itemize}
218 | 
219 | \dnote{And that's a wrap! Just a poster session left and then I'm off to the airport.}


--------------------------------------------------------------------------------
/days/wednesday_5_8_19.tex:
--------------------------------------------------------------------------------
  1 | Today I should be around for more talks. The day begins with a keynote!
  2 | 
  3 | \subsection{Keynote: Pierre-Yves Oudeyer on AI and Education}
  4 | 
  5 | {\bf Note:} Children are extraordinary learners! And typically do so without an engineer following them hand tuning every aspect of their learning algorithm and environment. \\
  6 | 
  7 | \begin{figure}[h!]
  8 |     \centering
  9 |     \includegraphics[width=0.4\textwidth]{images/child_learners.JPG}
 10 |     \caption{Learning and development in human infants.}
 11 |     \label{fig:child learners}
 12 | \end{figure}
 13 | 
 14 | Guiding Fields:
 15 | \begin{enumerate}
 16 |     \item {\it Cognitive Science:} Understanding human development and learning
 17 |     \item {\it Robotics:} new theory for lifelong and autonomous learning
 18 |     \item {\it Applications} in education technology.
 19 | \end{enumerate}
 20 | 
 21 | 
 22 | Example 1: study of {\it morphology}, body growth, and maturation in designing motor and perceptual primtives in a robot. \\
 23 | 
 24 | Example 2: consider language acquisition. Children learn new language very quickly. \\
 25 | 
 26 | Example 3: intrinsic motivation, play, and curiosity. \\
 27 | 
 28 | Q: How can we understand these practices, and harness them in AI tools, and build new educational tools around them? \\
 29 | 
 30 | \subsubsection{Intrinsic Motivation and Curiosity}
 31 | 
 32 | Consider {\it active exploration}: video of a baby playing with a variety of toys in a room over time (reminds me of the playroom domain from RL). \\
 33 | 
 34 | $\ra$ Similarly, give a baby a few toys, and a hollow cylinder suspended off the ground with a toy car inside of it. The baby over time tends to put the toy into the cylinder which knocks the car out of the tube (at which point the parent is very happy!). \\
 35 | 
 36 | $\ra$ But! When the car pops out of the tube, the baby also tends to pick up the car and put it back in the tube. \\
 37 | 
 38 | Other children experiment in very {\it different} ways; one kid picked up the block and hit the cylinder to make noises, and seemed very pleased by the noises. This was considered a ``failure" in the study, but was pretty sophisticated exploration! \\
 39 | 
 40 | {\bf Note:} Theories of intrinsic motivation, curiosity, and active learning drive to reduce uncertainty, experience novelty, surprise, or challenge. See~\citet{berlyne1960conflict} and~\citet{berlyne1978curiosity}. \\
 41 | 
 42 | {\bf Perspective:} The child is a sense making organism: explore to make good predictive models of the world and control it! \\
 43 | 
 44 | Q: Based on this perspective, what kind of modeling/algorithms are needed in order to explain these behaviors? \\
 45 | 
 46 | A: We use robotic playgrounds -- place robots in a playroom like environment, and encourage them to play to learn object models and affordances. Also place another robot in the playroom that gives ``feedback" (positive/negative reward) to play the role of a parent encouraging/discourgaing the baby.\\
 47 | 
 48 | Essential ingredients in these robots:
 49 | \begin{itemize}
 50 |     \item Dynamic movement primitives
 51 |     \item Object-based perceptual primitives (like infants, build on prior perceptual learning)
 52 |     \item Self supervised learning forward/inverse models with hindsight learning
 53 |     \item Curiositry-driven, self-motivated play and exploration.
 54 | \end{itemize}
 55 | 
 56 | 
 57 | \subsubsection{The Learning Progress Hypothesis}
 58 | 
 59 | Q: What is an {\it interesting} learning experiment for a robot/baby to conduct (to learn)? \\
 60 | 
 61 | Lots of answers in the literature: high predictability, high novelty, high uncertainty, knowledge gap, novelty, challenge, surprise, free energy, and so on. \\
 62 | 
 63 | {\bf This Work:} The Learning Progress Hypothesis~\cite{oudeyer2016intrinsic}:
 64 | 
 65 | \ddef{Learning Progress Hypothesis}{The ``interestingness" of an experiment is directly proportional to empirical learning progress (absolute value of derivative of the errors)}
 66 | 
 67 | $\ra$ Few assumptions on underlying learning machinery and on match between biases and real world. \\
 68 | 
 69 | 
 70 | {\bf Framework:} suppose we have some robots with motion primitives. Takes some sequence of actions to yield a trajectory:
 71 | \[
 72 | \tau = (s_t, a_t, s_{t+1}, \ldots).
 73 | \]
 74 | From this trajectory, the robot should learn, assuming some behavioral abstraction $\phi$:
 75 | \begin{enumerate}
 76 |     \item Forward model: $F_i : s, \theta \ra \phi_i$, with $\theta$ the parameters of the behavioral policy, $\pi_theta$.
 77 |     \item Inverse model: $I_i : s, \phi_i \ra \argmin_\theta ||\phi_i - F_i(s,\theta)||$
 78 | \end{enumerate}
 79 | 
 80 | Use these two models to measure ``competence progress" as a proxy of the empirical learning progress. \\
 81 | 
 82 | $\ra$ Example 1: hierarchical multi-armed bandits. Split a space into subregions, where an agent monitors the errors of each subregion. Use these errors to measure the learning progress over time. Then, in the bandit setting, can explore based on the ratio of these errors over time. \\
 83 | 
 84 | $\ra$ Example 2: explore omni-directional locomotion. Look at diversity (in terms of spread of states reached in some space) of outcomes by different exploration policies on a robot. Finding: curiosity-driven exploration is less-efficient than goal exploration.\\
 85 | 
 86 | Q: Why is curiosity driven exploration less efficient? \\
 87 | 
 88 | A: Forward model exploration (curiosity): knowing many ways to produce a few effects, inverse model exploration (goal): knowing a few ways to produce many effects. \\
 89 | 
 90 | Example: curiosity-driven discovery of tool use. Videos of a few robots playing with different tools (a robot with a cup learning to interact with a ball, a gripper robot learning to interact with a joystick). \\
 91 | 
 92 | $\ra$ Point: focus on playing with and manipulating objects in the world. The gripper robot learns to manipulate the joysticks, which moves the robot that can pickup the ball. Torso eventually learns to make a light, play a sound, and hide the ball in the cup. \\
 93 | 
 94 | Project: ``MUGL: exploring learned modular goal spaces"~\cite{laversanne2018curiosity}. Main idea is to extend these exploration techniques to high dimensional input (the robot examples above used a feature vector, not images). \\
 95 | 
 96 | $\ra$ MUGL can be used to discovery independently controllable features (learn to control a ball, and so on).
 97 | 
 98 | \subsubsection{Models of Child Development Data}
 99 | 
100 | Experiment: modeling vocal development. Use exact same algorithms from before. \\
101 | 
102 | $\ra$ Goal: make experiments for the infant using the learning progress idea from before. \\
103 | 
104 | {\bf Finding:} Some self-organization of developmental structure in infants. First vocal track is learned (unarticulated sounds) and then learns articulated sounds. \\
105 | 
106 | $\ra$ Observe: regularities that tend to occur at the same time across different individuals, but some things change dramatically. Interactions between learning system and body morphology is stochastic, contingency in exploration, surprising that many things remain constant. \\
107 | 
108 | \begin{figure}[h!]
109 |     \centering
110 |     \includegraphics[width=0.4\textwidth]{images/curious.JPG}
111 |     \caption{Curiosity-driven discovery of language}
112 |     \label{fig:curiosity}
113 | \end{figure}
114 | 
115 | The ``Ergo-Robots" (with Mikhail Gromov and David Lynch, I think?) \dnote{Lynch! :o}. Surreal video of robots learning to speak and interact with their environment and companions, see a sample video here: \url{https://www.youtube.com/watch?v=J0gM5i091JQ}. Robots learn to use language in a meaningful way through exploration. \\
116 | 
117 | $\ra$ Use similar ideas to come up with a realistic models of learning to use a vocal track. See: {\it Self-Organization in the Evolution of Speech}~\cite{oudeyer2006self}. \\
118 | 
119 | {\bf Finding:} The distributions of vowels we find in the world languages matches those of the systems that emerge in these curiosity-driven learning systems. This might explain some regularities of language structure. \\
120 | 
121 | 
122 | 
123 | 
124 | Q: How is spontaneous exploration structured during free play? \\
125 | 
126 | A: Experiment! Let subjects play a bunch of games/tasks, with no guidelines. Just do whatever you want (play games like guitar hero, free to pick any level/song). \\
127 | 
128 | $\ra$ People tend to focus on levels of intermediate complexity; exploration follows a controlled growth in complexity, actively controlled by individuals' predictive models. 
129 | 
130 | 
131 | \subsubsection{Applications in Educational Technologies}
132 | 
133 | 
134 | {\bf Goal:} Develop technologies for fostering efficient learning and intrinsic motivation. \\
135 | 
136 | $\ra$ Project: KidLearn -- allows personalization of intelligent tutoring systems, based on experiments with $>$ 1000 children in 30+ schools. \\
137 | 
138 | Principle: graph (usually a DAG) defines difficulty of task/exercise type. This allows the system to sample exercises in some sequence (but still give the kids some choice among nodes in the graph). \\
139 | 
140 | Main study:
141 | \begin{itemize}
142 |     \item Examine learning impact based on these interventions.
143 |     \item Compare to typical pedagogical expert (vs. their system).
144 |     \item Find that students tend to achieve higher success rate with certain variations of the algorithm.
145 | \end{itemize}
146 | 
147 | {\bf Takeaways:} Fundamental role of spontenous developmental exploration, can be harnessed to develop human-like robots and empower the learning process.
148 | 
149 | \subsection{Contributed Talks}
150 | Next up some contributed talks.
151 | 
152 | \subsubsection{Devon Hjelm on Deep InfoMax~\cite{hjelm2018learning}}
153 | 
154 | {\bf Broad Goal:} Learn unsupervised image representations. \\
155 | 
156 | Example: video of a dog catching a snowball. What annotations make sense? (``Cute dog!", ``Good boy!"). \\
157 | 
158 | $\ra$ Not clear these are the right/useful annotations. \\
159 | 
160 | {\bf Point:} Don't always want supervised learning of representations. Annotations rarely tell the whole story, real world doesn't come with labels, and really want to find the underlying structure (annotations might not enhance this part). \\
161 | 
162 | Preliminaries:
163 | \begin{itemize}
164 |     \item Encoder: $E_\psi : \mc{X} \ra Y$, with $Y$ a representation.
165 |     
166 |     \item Mutul info; $I(X;Y) = D_{KL}(P(X,Y) \mid \mid P(x) p(y)$
167 | \end{itemize}
168 | 
169 | $\ra$ Introduce amutual information estimator: encode an image into a representation. Take pairs of representations from images that aren't associated with each other, and treat these as {\it negative} samples,. \\
170 | 
171 | {\bf Approach:} 
172 | \begin{enumerate}
173 |     \item Encode input $X$ input $Y$ via $E_\psi$.
174 |     \item Use output to estimate $\hat{I}(X;Y)$, and maximize this estimate.
175 |     \item Just doing this alone isn't quite enough.
176 |     
177 |     {\it Intuition:} you might not pick up on the relevant locations of an image. Consider a picture of a cat, the background isn't as crucial as the information in the front.
178 |     
179 |     \item So: instead of maximizing the mutual info globally, instead maximize it {\it locally}. Perform this estimation/maximization across all locations simultaneously. See Figure~\ref{fig:cat}
180 |     \item This yields local feature vectors for different regions of the image, which can then be stitched together into a global feature vector.
181 | \end{enumerate}
182 | 
183 | \begin{figure}
184 |     \centering
185 |     \includegraphics[width=0.4\textwidth]{images/cat.JPG}
186 |     \caption{Extracing local feature maps via local mutual info estimation and maximization}
187 |     \label{fig:cat}
188 | \end{figure}
189 | 
190 | Evaluation: depends heavily on downstream task. Can measure mutual info, down stream performance on classification/regression tasks, and so on. \\
191 | 
192 | $\ra$ Deep Info Max performs very well when the learned representation is used in downstream tasks like classification. \\
193 | 
194 | Other tasks investigated: prior matching, coordinate prediction, relative coordinate prediction \\
195 | 
196 | \dnote{Off to more meetings.}
197 | 
198 | \subsection{Keynote: Zeynep Tufekci on Dangers if ML Works}
199 | \label{sec:keynote_zeynep}
200 | Grew up wanting to be a physicist! But: eventually, many physicist encounter the {\it nuclear} problem: massive ethical issues facing the advancement of a scientific field. \\
201 | 
202 | $\ra$ At CERN recently: giant projects! 700 people on the Higgs Boson paper. They were concerned about how to divide up the Nobel prize, meanwhile in CS we're worrying about the impact our tools have on society, security, labor, climate, social infrastructure, and beyond. So: lots of ethical issues in CS, too! \\
203 | 
204 | Back in Turkey: no internet, limited access to culture/TV from the outside world (grew up watching little house in the prairie). But, got internet connection through working at IBM. \\
205 | 
206 | $\ra$ Wonderful to have unimpeded access to lots of information and connections to people! Lots of optimism about the power for this tool to do good in the world \\
207 | 
208 | 
209 | {\bf This Talk:} big dangers that we're sleepwalking into. Going to end up in a scary and unhappy place, we will end up having built and contributed to part of it. \\
210 | \begin{itemize}
211 |     \item Seven years ago: the ``cat" paper, that learned to recognize cats on YouTube. Was hard to even imagine the current state of the field back in 2012.
212 |     \item What we need to worry about: what if it works? What are we really introducing into the world?
213 | \end{itemize}
214 | 
215 | 
216 | {\bf Themes:}
217 | \begin{enumerate}
218 |     \item You are part of this world, and your tools will be too.
219 |     \item These tools won't be used the way you think they will be.
220 |     \item Alternative paths are possible.
221 | \end{enumerate}
222 | 
223 | \subsubsection{Examples Of Ways Things Can Go Wrong}
224 | 
225 | Example 1: social movements in the public sphere. Specifically, the Facebook news feed:
226 | \begin{itemize}
227 |     \item Optimized to keep people engaged on the website
228 |     \item Recall: Ferguson, MZ, shooting of an african american teen by a police officer. Similarly: in a McDonalds, some customers were thrown into a van without any chance to say anything (by some police officers).
229 |     
230 |     $\ra$ Huge discussions about these events on Twitter, but {\it not} Facebook. 
231 |     
232 |     \item Point: Friends {\it were} talking about it, but these issues weren't ``engagement friendly" for Facebook (it was trying to populate news feed with things that would be liked/shared).
233 |     
234 |     $\ra$ At the time, the ALS ice bucket challenge was going as well. Very likeable/shareworthy, so that dominated most Facebook news feed.
235 |     
236 |     $\ra$ Chilling moment: icebucket challenge dominated the media/facebook as a result.
237 | \end{itemize}
238 | 
239 | Difference for social movements isn't whether you hold a protest. The thing the movements manage to do to be most effective is get attention. \\
240 | 
241 | $\ra$ But our algorithms are optimized to keep people engaged, so it tends to not spread certain kinds of information. A new form of censorship. \\
242 | 
243 | {\bf Classic finding:} People that tend to be into one conspiracy theory tend to be into others (from social science). \\
244 | 
245 | $\ra$ Instagram would recommend similar conspiracy theories, not because designers recommend them, but just because the algorithm learned this social science fact (someone likes a ``faked moon landing" post will also be presented with tons of other conspiracy theories). \\
246 | 
247 | in 2016: these phenomena were {\it widespread}. Watching one certain kind of video would immediately bias future videos (if you watch a jogging video, it then leads you to hardcore marathon training, if you watch a vegetarian video, it then leads you to vegan videos). \\
248 | 
249 | {\bf Point:} YouTube algorithms tends to {\it lead individuals down more polarizing and extreme routes}. Not by hand-design, but it does it {\it to increase engagement}. \\
250 | 
251 | $\ra$ Hate speech, extreme views, and so on, are very ``shareworthy" and capable of improving engagement.
252 | 
253 | \subsubsection{ML and its Challenges}
254 | 
255 | Some core challenges of implementing ML systems in the world:
256 | \begin{enumerate}
257 |     \item Bias
258 |     \item Interpretability
259 |     \item Potency-at-scale Restructuring power
260 |     \item Surveillance and Privacy
261 |     \item Transition and Upheaval
262 | \end{enumerate}
263 | 
264 | Example: suppose you're hiring, and suppose you use ML to hire. \\
265 | 
266 | (rhetorical) Q: We have methods for predicting depression rates; what if your ML system in using this information for hiring outcomes? \\
267 | 
268 | Problem: You won't even know that your system is using this information! \\
269 | 
270 | Similar problem: can use a ``mirror population" system (identify similar groups) to target individuals that are fear-prone with well designed ads to encourage voting for authoritarians? (again coming from findings in social science). \\
271 | 
272 | $\ra$ The way these systems are built {\it incentivizes} surveillance, since data about individuals/groups is always important. Data {\it will} get out, and will be used. \\
273 | 
274 | {\bf Path Forward:} build things that can do what we want to do, but not let it control us and be used on things we dont want it to be. \\
275 | 
276 | **We are in a very particular historic moment: all the companies here are trying to recruit you. If these companies that are trying to recruit you, but can't get you to do the things they want you to do, but you {\it insist} on using privacy-preserving methods. \\
277 | 
278 | {\bf Takeaway:} Lots of good the talent in this room can do. \\
279 | 
280 | Some thoughts:
281 | \begin{enumerate}
282 |     \item Google gave away money for ML projects. One of them was on detecting societal ideation, someone at risk, and do intervention.
283 |     
284 |     Seems like a great 
285 |     
286 |     \item What will happen: university will expel kids with mental health issues. They don't want that happening in their dorms.
287 |     
288 |     \item Look at the database of people killed by police officers. Many people are in a mental health crisis.
289 |     
290 |     $\ra$ Suicide detection program can be used in a very bad way (like laws that prevent at risk individuals from basic needs/goods).
291 |     
292 |     \item But! Imagine a privacy preserving system designed to help individuals instead.
293 | 
294 | \end{enumerate}
295 | 
296 | First earth day: pictures were blurry because of smog. Now that's not the case! So, a final note: genuinely consider unionizing. \\
297 | 
298 | Union: you get a huge number of legal protections. You get some freedom of speech. Organize your own industry ML group. \\
299 | 
300 | No one here is trying to explicitly go down this dark path. So many good people in this community! But the business model plus the world leads us to the wrong kinds of systems.
301 | 
302 | Final thoughts:
303 | \begin{enumerate}
304 |     \item Organize
305 |     \item Build alternatives
306 |     \item Insist on having a voice
307 | \end{enumerate}
308 | 
309 | *What we want, when someone wants to have a phone/app, we're just doing a bad job of what China is doing (in terms of monitoring/surveilance) for the purpose of selling more stuff. \\
310 | 
311 | $\ra$ We need a real alternative. We need the conveniences and empowerment, we need an option that repsects us, that gives us early warning and intervention but not at the expensve of subjecting us to mass surveilance. \\
312 | 
313 | {\bf Final Example:} Before Diffie-Helman (public key crypto), we didn't have a way of sending a message without exchanging secret keys. 
314 | \begin{itemize}
315 |     \item They thought this was a terrible problem. Needed to give people a way to communicate and exchange info without having to exchange keys beforehand.
316 |     
317 |     \item All of public key crytography comes from that. Really important and invidual-empowering tool that has dramatically reshaped the state of the world.
318 |     
319 |     \item So: stock options are cool, but we should be thinking about developing tools like this for the next generation of technology.
320 | \end{itemize}
321 | 
322 | 
323 | \subsubsection{Q \& A}
324 | %({\it Questions}) \\
325 | 
326 | Q: Could you comment on AI in the military? \\
327 | 
328 | A: War in 2019: any form of it is going to be terrible. Anything that makes it more convenient will make it even worse. Governments should be accountable to us. \\
329 | 
330 | Q: Feels a bit US centric -- any thoughts on the broader international perspective? \\
331 | 
332 | A: Remember the paper that can detect (claims to) detect secual orientation. Ugandan government has outlawed homosexuality, so we need to keep these things in mind. If we have better healthcare some of these issues will be less damaging. Fixing the politics makes some of these dangers less potent. Important for silicon valley not to be in this bubble. Everytime I go to SanFran now, every other table around me is talking about when stock options vest. Even if you're making great money but healthcare isn't there and your technology is used to keep people from getting hired, it's problematic. \\
333 | 
334 | Q: Question about the ``solutions" part, and refusing to build unethical software. Often we build small parts of a big thing, how do you predict what's going to be harmful? \\
335 | 
336 | A: First, that's why we should organize and unionize. Second, refusal is powerful but will only last so long. Real path forward is the alternative path: can't get out of oil without solar/wind energy. Surveillance and data economy is our oil economy, and it's cheap. But we can develop the solar equivalent here, and insist on these alternatives that we can embrace. Third, you all might be on the product side: I encourage you to find people on your security team and talk to them. They see the worst.  They can warn you how these things might be used in the worst possible way. \\
337 | 
338 | \dnote{They did the last three questions together}
339 | 
340 | Q: Lots of surveillance in the US, and strong culture against whistleblowers, too. Should people at ICLR take a bigger stand? \\
341 | 
342 | A: Yes absolutely speak out against that. Encourage and support whistleblowers in your own company. \\
343 | 
344 | 
345 | Q: What are the real problems, and what specific things should we be avoiding? We've seen engineers stand up to people working on the army. We've talked about companies that sell ads at any cost. \\
346 | 
347 | A: If the data exists, it will be sold and used. Great challenge: we need to figure out how to do some of this without surveilance. That includes, expiration dates on data, operating on encrypted data (insights on aggregate, not individualized). Key thing is to kill the surveilance economy. We can't kill phones, since those empower us, but we need an alternative. 
348 | 
349 | Q: Any comments on healthcare solutions?  \\
350 | 
351 | A: Lots of things! Skin care diagnosis and other tools that are empowring. Need to figure out a new full stack trusted method that doesn't feed into the surveilance economy. No way we can collect all this data and work on this data and not have the powerful corporations and governments come for it. We need to stop the sureilance economy. 
352 | 
353 | 
354 | \subsection{Debate led by Leslie Kaelbling}
355 | \label{sec:debate}
356 | 
357 | The debaters are: Josh Tenenbaum (JT), Doina Precup (DP), Suchi Sara (SS), Jeff Clun (JC), with Leslie's comments as LPK. \\
358 | 
359 | 
360 | LPK: We are interested in questions from the audience! The topic is on structure in learning. Each panelist will do some intros: \\
361 | 
362 | DP: I drew the short straw. I'm going to argue that a lot of what we need can and should be learned from data rather than come from priors. Especially in the context of generally purpose AI, rather than special purpose applications. In a special purpose application we should definitely use structure/priors, but in making general AI, we want our systems to learn from data. See: AlphaGo. First we used the specialized human version with some expert knowledge, but then the approach that ended up dominating was the one that used entirely data. \\
363 | 
364 | JT: Other extreme! I want to emphasize what you can build in. I, like Doina, am interested in general AI. I'm not against learning, but I'm really struck by how much systems like AlphaGo {\it don't} do. We have a general toolkit for building a specific kind of intelligence, but not general intelligence. Transferring to a different board size, for instance, would require retraining from nearly scratch. I'm interested in taking inspiration from the ways humans come into knowledge, like learning from the stage of a baby. In the universe the only case we have of learning from an early/scratch stage are human infants. We've learned so much, a bit contrary to some of the (bitter) lessons that some people have been drawing. The findings in cognitive science we find tell a different story: lots of inductive biases in human children. Also: the gazelle, learns to walk in the savannah very quickly or it will be eaten. Or think about a bird: the first time it falls out of a nest it has to fly. Human babies don't start off being able to walk, but before they can do those things, they develop objects, intuitive physics, schemes for defining goals, some notion of space, and so on. Exciting opportunity to take those kinds of core systems and learn how to devise and go beyond them. I would like to think about how we can take the right kind of machinery that include what we know how to do in RL and deep learning, but also what we know about symobolic reasoning and classical methods, so they know how to live in a world. \\
365 | 
366 | JC: This debate is usully framed as two extremes: 1) build in the right stuff, and 2) learn everything from scratch. I think there's a third alternative here, which is what I call AI generating algorithms: algorithms that can search for an AI agent that on the inner loop is very sample efficient (which come from the outer loop doing the right thing). It's a nice marriage between the two things. When you have a new problem you don't learn from scratch, you deploy this new agent. We know that can work (existence proof: Earth, this happened!). The algorithm is evolution, the inner loop is the human brain. This research direction isn't to say that the outer loop has to be evolution, could be something else like meta learning. If we want to make progress here: three pillars:
367 | \begin{enumerate}
368 |     \item Meta learn the architectures
369 |     \item Meta learn the learning algortihms
370 |     \item Automate effective learning environments (scaffolding)
371 | \end{enumerate}
372 | Clear trend in ML that we're aware of: hand designed systems that work okay will be surpassed by approaches that are wholly data driven/rely on big compute. If you believe in this trend, you should also believe that it can apply to the discovery of this machinery itself. So: what are the structures that allow for sample efficient general purpose learning? This may be the fastest path for us to pursue these big goals. \\
373 | 
374 | SS: I'm going to oversimplify a bit. Each of the three other panelists proposed a solution path, so I want to highlight some observations, first:
375 | 
376 | \begin{enumerate}
377 |     \item Observation 1: Josh and Jeff suggested that if we want to build human-like intelligence, and we know we can get there during learning. Proof for this is evolution. Evolution is very slow. In the process of getting to us lots of calamities. So, a big issue:  we can't afford to make our civilization to go extinct or to make societal level mistakes. What does it mean for us to come up with the right paradigm since we can't afford to make mistakes?
378 |     
379 |     \item Observation 2; What are the levers we have in ML? Algorithms learn by interacting or from past data. So the question is: can we conclude that if we have data and interactions alone, we can learn anything we want? Unclear!
380 |     
381 |     \item Observation 3: As a field we focus very hard on incrementing on a solution set. I'm curious if we have answers to these kinds of questions: what can we learn easily, starting from a superintelligent being. Do we have a taxonomy for what's easy to learn what's hard to learn? Can we define where the gaps are? Might be good to define the strategy here!
382 | \end{enumerate}
383 | 
384 | So, do we really want to be as slow as evolution or take into account what we know? Definitely the latter \\
385 | 
386 | \spacerule
387 | 
388 | LPK: Really important for us to each define our own objectives. We each have different big and small scale objectives. Big scale: understand math of learning, biology of the brain, making things work in 10 years or 100 years. So one reason to make this not a knockdown debate is that probably no answer will be true for every objective. So can you each say something more about your objective? \\
389 | 
390 | DP: I have multiple objectives. Want to understand how to build general AI agents. Some of the things we do now don't align with that goal: we often train from scratch, but don't necessarily need to do that! We could load a vision system in and learn downstream aspects. Would be great to emphasize this kind of continueal learning a bit more. Other chunk of my time I think about medical applications: it's a space where we don't have a lot of data and we can't always intervene because of ethical considerations. Here we often use structure/priors. Conceptual thing: what are the principles of intelligence? Focusing on learning from data can get us to that. People are wonderful but there are other intelligent things in nature too. Working on these algorithmic questions can help us understand much of nature. \\
391 | 
392 | SS: To me, the biggest challenge is how do we proceed when our objective is broken? (see previous keynote!) Lots of research is asking the right question. Who is going to define the question? How should we think about defining the right objective? \\
393 | 
394 | DP: That's true! We can have different objectives and that's okay. \\
395 | 
396 | JT: I'll say quickly: we're all pretty broad minded people but we have our strong deep views about things. I love the evolutionary view. We look at the learning algorithms we've seen, Evolution is the master algorithm (Pedro Domingos). Gazelles/animals are deeply fascinating. Also cultural evolution, especially once you have language, opens up whole new possibilities. The success of AI tools has in large part occurred due in part to the cultural evolution that lets us build collective knowledge and share goals and ideas. \\
397 | 
398 | JC: I'm interested in how to build intelligence piece-by-piece. But also interested in building a process that can build intelligent systems. Also fascinating to think about the set/space of possible intelligent entities. In part we're motivated by what looks like us, but we might miss out on huge regions of what intelligent/sentient systems look like. Also motivated by applications in healthcare, and beyond. Great opportunity for all of us! \\
399 | 
400 | LPK: Okay now we'll go to audience questions. This one is anonymous: ``What kind of results would cause you to change your opinion on how much structure vs learning is necessary?" --- How much structure is necessary is the wrong question? You can contextualize this with respect to a particular objective and get a really fascinating question. Hard to prove a negative. We're in the midst of a methodological crisis because we're all putting points in a space. Colleagues? \\
401 | 
402 | JT: I know one case study I find interesting about intuitive physics: being able to predict/imagine/plan to causally intervene on objects. All about predicting ``what happens if I hit this thing with that thing?". We built a system that didn't do any learning, just did probabilistic inference using a game engine. Other people around the same time tried tackling same problems but in an end-to-end learning method in some impressive ways. Still required huge amount of data and didn't seem to generalize to well. Others have been trying different systems. We've had some workshops on this topic, and reached broader argument: not an argument for necessity, but an empirical result. Building in some models of objects that might interact via contact/having close spatial locality is extremely powerful. We can't prove they're necessary but they're massively effective. Doesn't change any one person's mind but it's a case where we'ved learned some important empirical lessons that turn out to be valid. \\
403 | 
404 | DP: Can be very valuable to build these things in, but I'm not convinced that we can't just learn these things. The main complaint is usually sample complexity: if we had the right kind of approach for the learning algorithm it could learn these things. Typical example is causality. If we had the right learning algorithm we could learn a causal model not a predictive model. Another short thing about methodology, also very important for us as a field. Difficult to make progress when we care a lot about numbers without also understanding what our systems do. So yes it's great when our algorithms do better, but we need to also understand why. I want to argue that we need to probe our systems not just measure our systems quantiatively/qualitativey, but really probe our systems via hypothesis testing. \\
405 | 
406 | JC: I agree with Doina! Just wanted to ask Josh: might give us a big lift by building these things in, but perhaps we can learn even better lifts? How do you see things like HoG and SIFT where we eventually learned something better? \\
407 | 
408 | JT: Yeah, HOG and SIFT is a very instructive example. We see things in intuitive physics with a similar story: right now the best models we have are physics engines (for playing video games, for instance). But we know there's lots of things they don't capture. Recent work starting to learn physics simulators that can improve on these classical notions of physics simulators. We don't know where that story will end. If we look at vision, not just HOG and SIFT, you see the community over a few generations of research, we see the same motifs/ideas repeated (non-linearity, hierarchy, etc). Many versions of the same idea. HOG and SIFT were one idea, AlexNet was another. We'll see similar ideas recurr and build and call back to the same motifs. \\
409 | 
410 | SS: Josh, question for you: do you think the reason we need more than learning is that you think these systems cant be learned from scratch? Can we learn physics just by learning? \\
411 | 
412 | JT: That's what happened on Earth via evolution. Compute for simulating evolution is huge and we're just barely start to making progress. So the current route of modern deep RL is not necessarily going to deliver on this promise to learn an entire physical model. Human children are proof that some other route works. \\
413 | 
414 | JC: I think lots of routes can get to general AI, but it's partially a question of which is the fastest route. It's an open question as to whether the manual path will get there faster than the meta learning or evolutionary approach. \\
415 | 
416 | SS: Except you missed this along the way: what about everything that happens along the way? \\
417 | 
418 | JC: Fascinating question! General question of whether we should be building a general AI? Also, what should we be doing along the way to building that? Very real considerations. What's the fastest way to get there: I say the meta-learning approach above. Regarding ethics? Lots of open questions, too. \\
419 | 
420 | LPK: One thing Jeff didn't mention was the physical infrastructure required to make this work. Robots need to interact with the world unless we rely on a perfect simulator. I'm not sure we can get that taking off that we might suggest.
421 | 
422 | \spacerule
423 | 
424 | LPK: Next question ``Symbol manipulation has no place in deep learning. Change my mind" (Yann LeCun). Symbol means something different to embody; I'd argue that embeddings are roughly symbol manipulations, so maybe you've been doing it and you don't actually know. \\
425 | 
426 | JT: Certainly not my place to define what deep learning is. One reason it's interesting is that deep learning can mean a lot of things; it could mean and already sort of does mean doing SGD and symbol manipulation. Nice talk tomorrow on this. Whole community trying to explore interesting ways for symbols to live inside of systems that do deep representation learning. Multiple ways to learn a representation, might include symbols or combinations of these. We'll see systems that do things better than without these different techniques. \\
427 | 
428 | DP: But will we see systems that do this from scratch? \\
429 | 
430 | JT: Having a basic path for compositionality that gives us meaning/function that wasn't there from the beginning. \\
431 | 
432 | DP: Meaning as in people assign meaning? Or meaning as in what the machine defines for itself? \\
433 | 
434 | JT: Whatever you mean! ... If we want to build systems that we trust that we think do have the right kind of meaning, it's one reason to build AI that have these kinds of structure/symbols. \\
435 | 
436 | \spacerule
437 | 
438 | LPK: Q from Yoshua Bengio: ``We of course need priors but to obtain the most general AI, we'd like the least amount of priors which buy us the most towards AI-tasks, don't we?" \\
439 | 
440 | JC: Yes! \\
441 | 
442 | SS: Also yes, New question for the audience: everything can be learned? (some hands) not everything can be learned (more hands) -- some examples from audience poll of things that can't be learned: causality, halting problem, false things. \\
443 | 
444 | \dnote{Need to run out for a meeting! Fantastic debate though.}


--------------------------------------------------------------------------------
/days/monday_5_6_19.tex:
--------------------------------------------------------------------------------
  1 | The conference begins! Today we have a few keynotes and the workshops.
  2 | 
  3 | 
  4 | \subsection{Keynote: Cynthia Dwork on Recent Developments in Algorithmic Fairness}
  5 | 
  6 | The field (of algorithmic fairness) began around 2010, but today we'll talk about brand new developments.
  7 | 
  8 | \subsubsection{Algorithmic Fairness}
  9 | 
 10 | {\bf Point 1:} Algorithms are unfair, data are unrepresentative, labels can embody bias. \\
 11 | 
 12 | {\bf Point 2:} Algorithms can have {\it life altering consequences}.
 13 | \begin{itemize}
 14 |     \item Mortgage terms.
 15 |     \item Detention/release.
 16 |     \item Medical assessments and care.
 17 |     \item Deciding if a child is removed or not from a home.
 18 | \end{itemize}
 19 | 
 20 | $\ra$ Lots of papers that say: ``we're shocked by these examples of algorithmic bias!". But now we're in a position to do something about it. \\
 21 | 
 22 | {\bf Algorithmic Fairness:}
 23 | \begin{enumerate}
 24 |     \item Natural desiderata of fairness conflict with each other
 25 |     \item One piece of an unfair world. Deployment can be unfair, too
 26 | \end{enumerate}
 27 | 
 28 | {\bf Goal:} Develop a {\it theory} of algorithmic fairness. Two groups of fairness definitions:
 29 | \begin{enumerate}
 30 |     \item Group fairness
 31 |     \item Individual fairness
 32 | \end{enumerate}
 33 | 
 34 | \ddef{Group Fairness}{Statistical requirements about the relative treatment of two disjoint groups.}
 35 | 
 36 | Example of group fairness: demographics of students accepted to a college should be equal. Or, balance for positive/negative class.\\
 37 | 
 38 | 
 39 | \ddef{Individual Fairness}{People who are similar with respect to a given classification task should be treated similarly}
 40 | 
 41 | $\ra$ Comes from a strong legal foundation.\\
 42 | 
 43 | 
 44 | Problems:
 45 | \begin{itemize}
 46 |     \item Group notions fail under scrutiny
 47 |     \item Individual fairness requires a task specific metric.
 48 |     
 49 |     $\ra$ Paucity of work on individual fairness because we need such specific metrics.
 50 | \end{itemize}
 51 | 
 52 | 
 53 | \subsubsection{Approaches to Fairness}
 54 | 
 55 | Metric Learning for Algorithmic Fairness:
 56 | \begin{itemize}
 57 |     \item Adjudicator has an intuitive mapping from high dimensional feature vector ($X$) to the important aspects of the problem ($Z$).
 58 |     \item Relative queries are easy (which of $A$ and $B$ is closer to $C$?)
 59 |     \item Absolute queries are hard (what is $d(A,B)$?)
 60 |     $\ra$ Idea: turn to learning theory.
 61 |     \item Three insights in trying to answer above adsolute queries:
 62 |     \begin{enumerate}
 63 |         \item Distance from a single representative element produce useful approximations to the true metric.
 64 |         \item Parallax can be achieved by aggregating approximations obtained from a small number of representatives.
 65 |         \item Can generalize to unseen elements.
 66 |     \end{enumerate}
 67 |     \item See also: Bridging the Group vs Individual Gap~\cite{hebert2018multicalibration,kim2018fairness}:
 68 | \end{itemize}
 69 | 
 70 | 
 71 | \subsubsection{Hybrid Group/Individual Fairness Approaches}
 72 | 
 73 | Consider individual probabilities: 1) what is the probability that $P$ will repay a loan? 2) What is the probability that a tumor will metastasize?, and so on. \\
 74 | 
 75 | $\ra$ One concern: these events will just happen once. How should we think about these in terms of giving medical/legal recommendations? How can we justify the answer? \\
 76 | 
 77 | Philip Dawid wrote a recent survey of individual fairness definitions~\cite{dawid2017individual}. \\
 78 | 
 79 | One idea: calibration. Consider forecasting the weather. When we say $30\%$ chance of rain, we mean that $30\%$ of the days we predict $30\%$ rain will rain, and $70\%$ will not. \\
 80 | 
 81 | {\bf The Tumor Example:} Expectations are obtained from binary outcome data.
 82 | 
 83 | $\ra$ Study A says 40\% chance of a tumor, and Study B says $70\%$ (but not training data/context, just the studies output). \\
 84 | 
 85 | So, given $C = \{S_1, S_2\}$, consider the venn diagram formed by the recommendation of the two studies. We can choose values for elements $P = S_1 \ S_2$, $Q = S_1 \cap S_2$, and $R = S_2 \ S_1$, that retain the given expectations. This can help us clarify the appropriate decision. \\
 86 | 
 87 | But: many multi-accurate solutions. If, however, we had ensured calibration, we {\it can} narrow down the expectation to something accurate. \\
 88 | 
 89 | {\bf The Loan Example:}
 90 | \begin{itemize}
 91 |     \item Intersecting demographic/ethnic/age/gender/etc/ groups.
 92 |     \item Minimally: policies consistent with expected repayment rates for each group.
 93 | \end{itemize}
 94 | 
 95 | Q: Who decides which groups should be prioritized? The culturally dominant? The oppressed? How do we set our scoring function? Really hard question~\cite{jost1994role} \\
 96 | 
 97 | A: Let's turn to complexity theory! \\
 98 | $\ra$ All grouds identifiable by small circuits acting on the given data.
 99 | 
100 | \begin{conjecture}
101 | Captures all historically disadvantaged groups $S$.
102 | \end{conjecture}
103 | 
104 | Multi-accuracy and multi-calibration: we can do it!
105 | \begin{itemize}
106 |     \item Multi-Accuracy: Complexity of creating the scoring function depends on hardness of (agnostic) learning of $C$, but function is efficient.
107 |     \item Multi-calibration: $f$ is calibrated on each set $S \in C$ simultaneously, accurate in expectation.
108 | \end{itemize}
109 | 
110 | {\bf Problem:} The Devil is in the collection of $C$. \\
111 | 
112 | $\ra$ We hope we capture task specific semantically significant differences. \\
113 | 
114 | Q: What are the sources of information available to child protective services and call screening?
115 | 
116 | \subsubsection{Fair Ranking}
117 | 
118 | Q: Why? \\
119 | 
120 | A1: Ranking is crucial to many endeavors: the heart of triage, underlying impetus for scoring, rank translates to policies or to scores in clinical trials.\\
121 | 
122 | A2: Thinking about ranking can help us in thinking about a scoring function more generally. \\
123 | 
124 | {\bf Idea:} Let's think about fair ranking from the cryptographic perspective. \\
125 | 
126 | Rank Unfairness:
127 | \begin{itemize}
128 |     \item Suppose we have two groups of people: $A$ and $B$.
129 |     \item Suppose $\bE[A] > \bE[B]$.
130 |     \item But! It's silly to then rank everyone in $A$ above everyone in $B$.
131 | \end{itemize}
132 | 
133 | Take a cryptographic/complexity theoretic approach to address this problem!
134 | 
135 | $\ra$ If positive and negative examples are computationally indistinguishable, the best one can do is assign to everyone a probability according to the base rate.
136 | 
137 | \subsubsection{Approaches from Representation Learning}
138 | 
139 | {\bf Idea:} Learn a ``fair" representation (in group fairness).
140 | \begin{itemize}
141 |     \item Stamps out sensitive information (``censoring")
142 |     \item Retains sufficient information to permit standard training.
143 | \end{itemize}
144 | 
145 | Goal: learn a censored mapping to a lower dimensional space $Z$~\cite{edwards2015censoring}.
146 | \begin{itemize}
147 |     \item Encoder tries to hide memership bit, permit prediction on $Z$.
148 |     \item Decoder tries to reconstruct $x$ from $z = Enc(x)$
149 |     \item Adversary $(A)$ tries to distinguish $Enc(x \in S)$ from $Enc(x \in S^c)$.
150 | \end{itemize}
151 | 
152 | $\ra$ Approach by~\citet{madras2018learning} tie this objective to scoring, show that transfer is possible. \\
153 | 
154 | \begin{figure}[h!]
155 |     \centering
156 |     \includegraphics[width=0.4\textwidth]{images/code_rep.png}
157 |     \caption{The cryptographic setup for learning fair representations.}
158 |     \label{fig:my_label}
159 | \end{figure}
160 | 
161 | {\bf The Harvard-Stanford Problem:} Suppose you're at Stanford, and you build an algorithm for detecting tumors that works really well. Suppose someone else is at Harvard and does the same. \\
162 | 
163 | $\ra$ Claim: the algorithms wont work across the populations due to differences in the groups and in the lab settings. \\
164 | 
165 | Goal, then, is to find a way to identify differences/similarities across populations so that these methods {\it can} be transferred across populations.
166 | 
167 | Approach:
168 | \begin{itemize}
169 |     \item Choose $y \sim Bernoulli(\text{base rate})$
170 |     \item Choose $x \sim N(\mu, \Sigma)$
171 |     \item Retain $x$ if $Bernoulli(\sigma(f_1,s_1)) = f_26i(x_2) = y$.
172 | \end{itemize}
173 | 
174 | 
175 | Summary:
176 | \begin{itemize}
177 |     \item A fair algorithm is only one piece of an unfair world
178 |     \item Multiple kinds of fairness: group, individual, Multi-X.
179 |     \item Breakthrough in metric learning for individual fairness
180 |     \item Individual probabilities are hard to understand, but we can  learn from fairness methods to improve their use.
181 |     \item Censered representations and out-of-distribution generalization.
182 | \end{itemize}
183 | 
184 | \spacerule
185 | 
186 | 
187 | Now off to the Structure and Priors in Reinforcement Learning workshop!
188 | 
189 | 
190 | \subsection{SPiRL Workshop}
191 | \label{sec:spirl}
192 | 
193 | First, Pieter Abbeel on Model-based RL!
194 | 
195 | \subsubsection{Pieter Abbeel on Model-Based RL from Meta RL}
196 | 
197 | Few-shot RL/Learning to RL: we have a {\it family} of environments, $M_1, M_2, \ldots, M_n$. Hope is that when we learn from these $n$ environments, we can learn faster on environment $M_{n+1}$. \\
198 | 
199 | Fast Learning:
200 | \begin{equation}
201 |     \max_\theta \bE_M \bE_{\tau_M} \left[\nsum R_\tau_M^i \mid \text{RLAgent}_\theta \right].
202 | \end{equation}
203 | 
204 | Objective is roughly the above, where we ground \text{RLAgent}$_\theta$ as an RNN (or some othe generic computational architecture). \\
205 | 
206 | Other ways to solve this objective like Model-Agnostic Meta Learning (MAML)~\cite{finn2017model}. \\
207 | 
208 | {\bf Point:} Family of methods that let you train quickly in new environments (new $R$, $T$), given interactions with prior environments. \\
209 | 
210 | Motivation for Simulation:
211 | \begin{itemize}
212 |     \item Less expensive (can't break robot)
213 |     \item Faster/more scalable
214 |     \item Easier to get lots of labels
215 | \end{itemize}
216 | 
217 | Q: How can we leverage {\it crude} simulation through domain randomization?  \\
218 | 
219 | $\ra$ Think about minecraft -- there's some visual structure that is useful for learning about the world, but it's not perfect. How can we train in minecraft (or similar domains) and transfer to the world? \\
220 | 
221 | A: Randomize aspects of simulations in order to make training of vision system easier. Can then transfer these trained perceptual classifiers to the world, and it works! (same goes for grasping) \\
222 | 
223 | A: Same goes for grasping -- can train to grasp in simulation, then grasp real objects (with around 80\% success rate). \\
224 | 
225 | {\bf Result:} Train a hand controller (for manipulating a block in hand) in simulation, can actually deploy it on a real robot. \\
226 | 
227 | \ddef{Model-Free RL}{Interact with the world and collect data $D$. Then, use this data to inform $\pi$ or $Q$, and use those to act.}
228 | 
229 | \ddef{Model-Based RL}{Interact with the world and collect data $D$. Then, use this data to inform a world simulator, $\widehat{M}$, perform simulations in order to act.}
230 | 
231 | Canonical model-based RL;
232 | \begin{enumerate}
233 |     \item For iter $ = 1, 2, \ldots$
234 |     \item \hspace{6mm} Collect data under current policy
235 |     \item \hspace{6mm} Improve learned simulator from all past data
236 |     \item \hspace{6mm} Use simulator to act and collect new data.
237 | \end{enumerate}
238 | 
239 | {\bf Problem:} learned models are imperfect!  \\
240 | 
241 | {\bf Fix:} Learn a better simulator. But, this is insufficient. Extremely hard to learn the {\it right} simulation. \\
242 | 
243 | $\ra$ New overfitting challenge in model-based RL. Policy optimization tends to exploit regularities in simulator, leading to catastrophic failures. \\
244 | 
245 | {\bf Key Idea:} Don't need to learn an accurate model, just need to learn a set of models representative of the real world (and do few-shot RL):
246 | \begin{enumerate}
247 |     \item For iter $ = 1, 2, \ldots$
248 |     \item \hspace{6mm} Collect data under current adaptive policies, $\pi_1, \ldots, \pi_k$
249 |     \item \hspace{6mm} learn ENSEMBLE of $k$ simulators from past data.
250 |     \item \hspace{6mm} meta-policy optimization over ensemble
251 |     \item \hspace{12mm} new meta-policy $\pi_\theta$
252 |     \item \hspace{12mm} new adaptive policies $\pi_1, \ldots, \pi_k$.
253 | \end{enumerate}
254 | 
255 | Experiments: 
256 | \begin{enumerate}
257 |     \item {\it MuJoCo:} With about 45 minutes of real interaction with the mujoco environment, state-of-the-art model-free methods can't learn, while the meta-learning approach can.
258 |     \item {\it Robots:} Similarly, can train meta model-based RL in a sample efficient way to learn to perform robotic grasping tasks.
259 |     
260 |     $\ra$ Between 10-100$\times$ more efficient than model-free methods {\it to get to the same asymptotic performance.}
261 | \end{enumerate}
262 | 
263 | \begin{figure}[h!]
264 |     \centering
265 |     \includegraphics[width=0.4\textwidth]{images/mbrl_results.JPG}
266 |     \caption{Results from meta model-based RL (and a camera + person).}
267 |     \label{fig:meta_mbrl}
268 | \end{figure}
269 | 
270 | 
271 | {\it Challenge Question:} Hierarchical RL promises to solve long horizon planning problems. \\
272 | 
273 | Q1: Are model-free and model-based HRL fundamentally different approaches or are they the same? \\
274 | 
275 | Q2: Do you think there's a way to combine these two? \\
276 | 
277 | Pieter A: Yeah absolutely! The methods we presented do some work to combine these methods together. For HRL it might be a bit different, though. In some sense, at the high level, humans don't do RL. We don't have enough ``high level" trials to explore things like ``go to conference/do a PhD". So, the higher level is probably model-based and more planning based rather than RL. Another thing that comes to mind is that HRL approaches seem to only have two levels. One interesting direction is to generalize with more depth/levels rather than two levels. Still not obvious where the separation between model-based/model-free methods takes place. \\
278 | 
279 | Q: Pros and Cons of looking for an end-to-end approach as opposed to a more modular approach (with more intermittent structure like state estimation). \\
280 | 
281 | Pieter A: There's no final answer here -- state estimation sometimes involves human provided information, might lose the wrong data for doing control (what is state for a self driving car, for instance?). But, some priors in this way can probably help!
282 | 
283 | 
284 | \subsubsection{Contributed Talk: Kate Rakelly on Off Policy RL via Context Variables}
285 | 
286 | {\bf Goal:} Design agents that are skilled a variety of different tasks. \\
287 | 
288 | $\ra$ But, training agents in new tasks is statistically/computational infeasible, so really we'd like to exploit shared structure across tasks. \\
289 | 
290 | {\bf Approach (High-Level):} Meta-RL to learn shared structure across related tasks. \\
291 | 
292 | Problems: Rapid adaptaion requires efficient exploration strategies, while meta-training requires data from each task, exarcerbates sample inefficiency. \\
293 | 
294 | {\bf Approach (Detailed View):} Meta-RL via an {\it off-policy} approach. But, raises a problem with exploration since we no longer control the experience distribution. \\
295 | 
296 | ``Context": task-specific information learned from a task ($z$). Meta-training then has two pieces:
297 | \begin{enumerate}
298 |     \item Learn to summarize the context into $z$.
299 |     \item Learn to take actions given $s,z$.
300 | \end{enumerate}
301 | 
302 | {\bf Algorithm:} \textsc{Pearl}. Given a new task, main idea is to maintain a probability distribution over which task we're in. This lets us exploit knowledge of uncertainty to adapt efficiently to the new task. \\
303 | 
304 | {\bf Experiments;} four different mujoco domains (half cheetah, humanoid, ant, walker). Rewards and dynamics change across tasks (locomotion direction, velocity, joint parameters). 
305 | 
306 | Summary:
307 | \begin{itemize}
308 |     \item \textsc{Pearl} is the first off-policy Meta-RL algorithm
309 |     \item 20-100$\timex$ improved sample efficiency on the domains tested
310 |     \item Posterior sampling for efficient exploration during adaption.
311 | \end{itemize}
312 | 
313 | Code: \url{github.com/katerakelly/oyster} \\
314 | 
315 | Posters now for a bit.
316 | 
317 | 
318 | 
319 | 
320 | 
321 | \subsubsection{Matt Bottvinick on Meta-RL: An Appreciation}
322 | 
323 | {\bf Point:} We need some structure to scale RL. What I have in mind is something like relational RL, objects, graph nets, and so on. \\
324 | 
325 | Guiding Question: What do the algorithms that meta-RL gives rise to, do? What can't they do? \\
326 | 
327 | Recent survey summarizing some of the ideas~\citet{botvinick2019reinforcement}. \\
328 | 
329 | $\ra$ The field seems to have moved on from meta-RL, but I can't! Let's really understand these algorithms. \\
330 | 
331 | Tendency: let's build a faster race car. This talk: let's understand fast race cars, or why these things we've made recently are fast! \\
332 | 
333 | Observations in trying to understand Meta-RL:
334 | \begin{itemize}
335 |     \item Consider two-armed bandits: An animal chooses between two arms, payoff determined according to some payoff schedule. Critically, sources of reward get restocked every so often. 
336 |     
337 |     $\ra$ Animals match their frequency in choices with the frequency with which they get rewards (probability matching).
338 |     
339 |     $\ra$ Ordinary LSTM figures that out, too! Also figures out the gittins optimal thing in the regular $\beta-Bernoulli$ bandit.
340 |     
341 |     \item Consider this new bandit task: probabilities of payoff keep changing, but volatility changes, too (long intervals where payoffs flip around, etc.).
342 |     
343 |     $\ra$ Smart thing is to change your learning rate. People do in fact do this (if we fit a model predicting learning rates to peoples' decisions).
344 |     
345 |     $\ra$ Meta-learned LSTM can do the same thing!
346 |     
347 |     \item Monkey sees two colored face down cups, one of which hides a raisin. Learn to pickup the cup hiding the raisin in general.
348 |     
349 |     $\ra$ Then the monkey has to transfer to a new task with new objects. Turns out monkey learns to transfer as well; when no info can be used, monkey explores uniformly, when info can be exploited, monkey learns very quickly.
350 |     
351 |     $\ra$ LSTM can do this, too!
352 | \end{itemize}
353 | 
354 | Clear illustration of a meta model-free algorithm giving rise to a model-based RL, based on model-based tests designed for people/animals from~\citet{daw2014algorithmic}. \\
355 | 
356 | \begin{figure}[h!]
357 |     \centering
358 |     \includegraphics[width=0.4\textwidth]{images/mb_task.png}
359 |     \caption{A task for determining whether a decision making is using model-based techniques or not (left) and results from different approaches on the task (right).}
360 |     \label{fig:mb_task}
361 | \end{figure}
362 | 
363 | {\bf Point 1:} Meta-RL can also give rise to algorithms that can solve temporal credit assignment problems, too. \\
364 | 
365 | {\bf Point 2:} For LSTM to be solving certain kinds of tasks, they also must be retaining some sufficient statistics in their state (that roughly match what the observer would track, too). \\
366 | 
367 | Lastly: a crazy idea (\dnote{Matt's words, not mine :)}). We have some evidence that with ordinary LSTMs, model-free RL can give rise to model-based RL (in the right situation). \\
368 | 
369 | $\ra$ Maybe, if we set of the environments in the right way, maybe LSTMs can work out algorithms for doing (amortized?) Bayesian inference. \\
370 | 
371 | {\bf Take Home Message:} Meta-RL is really exciting, let's keep coming up with faster algorithms but also try to understand what they're doing.
372 | 
373 | \subsubsection{Katja Hoffman on Challenges \& Directions in Multi-Task RL}
374 | 
375 | Q: Why do we focus on structure and priors in RL? \\
376 | 
377 | Katja A: Three things!
378 | \begin{enumerate}
379 |     \item Improve sample efficiency
380 |     \item Improve sample efficiency
381 |     \item Improve sample efficiency
382 | \end{enumerate}
383 | 
384 | Q: When you think about structure and priors in RL, what kidns of challenges become solvable? \\
385 | 
386 | A: Maybe in games, science, transporation, medicine? \\
387 | 
388 | $\ra$ For these (and probably other) domains, structure is crucial. \\
389 | 
390 | %Q: Who will have access to these solutions? \\
391 | 
392 | Kinds of Structure:
393 | \begin{itemize}
394 |     \item Assume multiple related tasks and useful relationship between tasks can be learned from data
395 |     \item Q: What types of models allow learning and use of related task structure>?
396 |     \item Q: What trade-offs can we achieve between prior assumptions, flexibility, and sample efficiency?
397 | \end{itemize}
398 | 
399 | First Approach: Meta-RL with Latent Variable Gaussian Processes~\cite{saemundsson2018meta}. IDea:
400 | \begin{itemize}
401 |     \item Problem: assume $p$ task with related dynamics:
402 |     \begin{equation}
403 |         y_t^p - f(x_i^p, h_p) + \eps,
404 |     \end{equation}
405 |     \item Observe data from training tasks
406 |     \item Goal: accurately predict held out test dynamics with minimal additional data
407 | 
408 |     \item Approach: Model-Based RL via latent variable Gaussian processes. Place a GP prior on global function.
409 |     
410 |     \item Experiments: 1) toy taks, mult-task prediction. Approach is able to disentangle unseen tasks; 2) multi-task cart pole. System can cary in mass and pendulum length, many held out settings of these parameters.
411 | \end{itemize}
412 | 
413 | Second Approach: (CAVIA) Fast Context Adaptation via Meta-Learning~\cite{zintgraf2018caml}.
414 | \begin{itemize}
415 |     \item Problem: distribution over training and test task $p_{train}$, $p_{test}$.
416 |     \item During meta-training, sample tasks from $p_{train}$, get train/test data for that task
417 |     \item Learn how to adapt quickly on the task by splitting up network into: 1) task specific context parameters $\phi$, and 2) shared parameters $\theta$.
418 |     \item Experiments: 1) supervised learning task; 2) multi-task half cheetah.
419 |     
420 |     $\ra$ CAVIA learns an itnerestable task embedding, captured in context parameters
421 |     $\ra$ Adapts to test tasks with updates to only context parameters -- sheds new light on meta-learning benchmarks.
422 |     $\ra$ Very flexible
423 | \end{itemize}
424 | 
425 | Follow up: Variational Task Embeddings for Fast Adaptaion in Deep RL. Learns to trade of exploration and exploitation online, while interacting with the environment. VATE can deduce information about the task before seeing any reward. \\
426 | 
427 | 
428 | {\bf Point:} As we push forward in RL to harder domains, there might be certain generic structure that tends to work well across many domains. Can we find this unifying structure? \\
429 | 
430 | $\ra$ One thing that might be needed is a dataset that might rive rise to such structure. To this end: \\
431 | 
432 | $\ra$ MineRL: competition (\url{minerl.io/competition}) on sample efficient RL using human priors (upcoming at NeurIPS this year), built on top of MALMO~\cite{johnson2016malmo}. \\
433 | 
434 | Minecraft is massively complex, so offers a great platform for exploring the use of priors in RL. See Figure~\ref{fig:mc_tree} for a sense of the size of the tech tree.
435 | 
436 | \begin{figure}[h!]
437 |     \centering
438 |     \includegraphics[width=0.4\textwidth]{images/mc_tree.JPG}
439 |     \caption{Part of the tech tree in Minecraft}
440 |     \label{fig:mc_tree}
441 | \end{figure}
442 | 
443 | \subsubsection{Tejas Kulkarni on Object-Centric Representations RL}
444 | 
445 | {\bf Point:} The hard problems of RL:
446 | \begin{enumerate}
447 |     \item State estimation
448 |     
449 |     RL does not prescribe a detailed recipe to represent state. Hand specify or learn a state representation.
450 |     
451 |     \item Exploration
452 |     
453 |     And: how you explore depends on how you explore the world.
454 | \end{enumerate}
455 | 
456 | This work: let's use self-supervised deep learning to learn object structure. \\
457 | 
458 | Example: an individual blind from birth can still draw rough object structure, including {\it perspective}~\cite{kennedy2006blind} -- see Figure~\ref{fig:drawing}. \\
459 | 
460 | \begin{figure}[h!]
461 |     \centering
462 |     \includegraphics[width=0.5\textwidth]{images/drawing.pdf}
463 |     \caption{Drawings from~\citet{kleinberg2016inherent}}
464 |     \label{fig:drawing}
465 | \end{figure}
466 | 
467 | Objects in particular are a fundamental and important abstraction.\\
468 | 
469 | Q: So, can we learn them?\\
470 | 
471 | Three properties from object-centric representation in physical domains:
472 | \begin{enumerate}
473 |     \item Capture spatio-temporal features at degrees of freedom.
474 |     \item Long-term temporal consistency.
475 |     \item Captures basic geometry of the environment.
476 | \end{enumerate}
477 | 
478 | A: Yes! The ``Transporter" network (see Figure~\ref{fig:transporter}). %. \\
479 | 
480 | \begin{figure}[h!]
481 |     \centering
482 |     \subfloat[Transformer Network]{\includegraphics[width=0.4\textwidth]{images/transporter.JPG}} \hspace{5mm}
483 |     \subfloat[Object Classification]{\includegraphics[width=0.4\textwidth]{images/objects.JPG}}
484 |     \caption{Transporter Network (left) maps source to target image via compressed geometric representation, and some candidate objects found (right) in different Atari games.}
485 |     \label{fig:transporter}
486 | \end{figure}
487 | 
488 | $\ra$ Transporter network does a good job for capturing salient objects in image based problems like Montezuma. Trained on uniform random policy, and the result is a spatial map of where different objects area. \\
489 | 
490 | But: the above approach only tracks moving objects, not stationary ones. So, we can pair it with instance-based segmentation for finding stationary objects. \\
491 | 
492 | {\bf Next Stage:} Object-Centric RL and Planning in high dimensional domains, building on the earlier work by~\citet{diuk2008object}. \\
493 | 
494 | $\ra$ Key Problem 1: Structured exploration in the space of objects and relations. \\
495 | 
496 | $\ra$ Key Problem 2: Generalization in the form of self generated tasks in the space.
497 | 
498 | Object-centric HRL architectures~\cite{kulkarni2016hierarchical,dilokthanakul2019feature}. \\
499 | 
500 | Major frontier: data efficient RL in hard exploration settings like Montezuma. Idea: systematically explore in the space of objects and relations.
501 | 
502 | {\it Challenge Question:} In supervised learning, progress in representation learning and trasnfer learning for vision has been largely driven by ImageNet. NLP had its ``ImageNet" moment with GPT-2 and BERT. So, will there be a analogous ``ImageNet" moment in RL that allows us to learn general prupose data-driven priors? \\
503 | 
504 | A: Yeah, absolutely! I think we're almost there. Lots of folks working in this direction, I think we are right around the corner. 
505 | 
506 | 
507 | \subsubsection{Tim Lillicrap on Learning Models for Representations and Planning}
508 | 
509 | Current State and Limitations of Deep RL:
510 | \begin{enumerate}
511 |     \item We can now solve virtually any single task/problem for which:
512 |     \begin{enumerate}
513 |         \item Formally specify and query the reward function
514 |         \item Explore sufficiently and collect lots of data
515 |     \end{enumerate}
516 |     \item What remains challenging:
517 |     \begin{itemize}
518 |         \item Learning when a reward function is difficulty to specify
519 |         \item Data efficiency and mult-task transfer learning
520 |     \end{itemize}
521 | \end{enumerate}
522 | 
523 | We measure outcomes via: $R(\tau) = \sum_{t=0}^T \gamma^t r_t$, with the objective function:
524 | \begin{equation}
525 |     J(\theta) = \int_\mathbb{T} p_\theta(\tau) R(\tau) d\tau.
526 | \end{equation}
527 | 
528 | But: In model-free RL we tend to throw away what we know about the task to solve it. \\
529 | 
530 | Clear structure to introduce: plan with a model. \\
531 | 
532 | $\ra$ Tricky! Getting this model is really hard. If we can get it, we know it can be really powerful (see AlphaZero~\cite{silver2018general}). \\
533 | 
534 | {\bf Problem:} Planning with learned models is really hard. (Tim said it became a running joke to started up a model-based RL project at DeepMind in the last few years: no one expected it to work). \\
535 | 
536 | Idea: Hybrid model-free and model-based approaches. By augmenting previous algorithms with a learned model did in fact help on a goal-finding task. \\
537 | 
538 | Planning with Learned Models: PETS~\cite{chua2018deep}, Deep Planning Network (Planet)~\cite{hafner2018learning}. \\
539 | 
540 | Experiments: continuous control from image observations (finger, cheetah, cup, etc). \\
541 | 
542 | $\ra$ Some versions of this end up working well! Around 1k-2k episodes it can solve image-based mujoco problems. \\
543 | 
544 | {\bf Conclusions:}
545 | \begin{itemize}
546 |     \item Model-based algorithms hold promise of addressing data efficiency and transfer learning limitations.
547 |     \item Beginning to develop working recipes that allow planning with models in unknown environments.
548 |     \item Necessary and sufficient conditions for planning with learned models are unclear.
549 |     \item Much work remains!
550 | \end{itemize}
551 | 
552 | 
553 | {\it Challenge Question:} What are the trade-offs in rolling value estimation and perception into the same architecture? \\
554 | 
555 | Tim A: I don't know anyone that's systematically studied this kind of thing, but it's definitely important to study it more. Some insights can be gathered from AlphaZero, ELO rating analysis. Lots more to do! 
556 | 
557 | \subsubsection{Karthik Narasimhan on Task-agnostic Priors for RL}
558 | 
559 | Current State of RL: success of model-free RL approaches (see: Go, DOTA). \\
560 | 
561 | $\ra$ All of these feats require huge amounts of time and samples (like 45,000 years of gameplay for DOTA). \\
562 | 
563 | $\ra$ Little to no transfer of knowledge. \\
564 | 
565 | Recent Approaches:
566 | \begin{itemize}
567 |     \item Multi-task policy learning
568 |     \item Meta-learning
569 |     \item Bayesian RL
570 |     \item Successor Representations
571 | \end{itemize}
572 | 
573 | Observation: all approaches tend to learn policies, which are rigid and hard to transfer to other tasks. \\
574 | 
575 | Solution: model-based RL.\\
576 | 
577 | $\ra$ Approach: bootstrap model learning with task agnostic priors. The model is 1) more transferrable, 2) expensive to learn but can be made easier with priors. \\
578 | 
579 | Q: Can there be a universal set of priors for RL? \\
580 | 
581 | A: Look to how humans learn new tasks. These priors seem to come from 1) a notion of intuitive physics and 2) language. \\
582 | 
583 | {\bf Project 1:} Can we learn physics in a task agnostic way? Moreover, can this physics prior help sample efficiency of RL? \\
584 | 
585 | Lots of prior work in this area, but they are task-specific. \\
586 | 
587 | $\ra$ This work: learn physics prior from task-independent data, decouple the model and policy. \\
588 | 
589 | 
590 | Overview of approach:
591 | \begin{itemize}
592 |     \item Pre-train a frame predictor on physics videos
593 |     \item Initialize dynamics models and use it to learn policy that makes use of future state predictions.
594 |     \item Simultaneously fine-tune dynamics model on target environments.
595 | \end{itemize}
596 | Two Key Operations: 1) isolation of dynamics of each entity in the world, 2) accurate modeling of local spaces around each entity. \\
597 | 
598 | Experiments: PhysWorld and Atari -- in both cases, use videos containing demonstrations of the physics of the environment to do some pre-training of the dynamics model (in Atari, pre-training is still done in PhysWorld). Results show the approach works very well, both at 10-step prediction and at helping sample efficiency in RL. \\
599 | 
600 | 
601 | {\bf Project 2:} Can we use language as a bridge to connect information we have about one domain to another, new domain? \\
602 | 
603 | Overview of approach:
604 | \begin{itemize}
605 |     \item Treat tanguage as task-invariant and accessible medium.
606 |     \item Goal: transfer a model of the environment using text descriptions.
607 |     
608 |     Example: ``Scorpoins can chase you". Might be able to learn a model that places high probability on the scorpoin moving closer to tthe agent location.
609 |     
610 |     \item Main technique: transfer knowledge acquired from language to inform a prior on the dynamics model in a new environment.
611 | \end{itemize}
612 | 
613 | 
614 | Conclusions:
615 | \begin{enumerate}
616 |     \item Model-based RL is sample efficient but learning a model is expensive
617 |     \item Task agnostic priors over models provide a solution for both sample efficiency and generalization
618 |     \item Two common priors applicable to a variety of tasks: classical mechanics and language.
619 | \end{enumerate}
620 | 
621 | 
622 | {\it Challenge Question:} Lots of success in Deep RL. New push into hybrid approaches like cross-domain reasoning, using knowledge from different tasks to aid learning, and so on. What are the greatest obstacles on the path to mid-level intelligence? \\
623 | 
624 | Karthik A: I would emphasize the need for distributional robustness and transfer -- need to look at agents that can transfer across similar domains. Some obstacles involve 
625 | 
626 | 
627 | \subsubsection{Ben E., Lisa L., Jacob T. on Priors for Exploration and Robustness}
628 | 
629 | Challenges in RL today:
630 | \begin{enumerate}
631 |     \item Exploration
632 |     \item Reward function design
633 |     \item Generalization
634 |     \item Safety
635 | \end{enumerate}
636 | 
637 | $\ra$ Priors are a powerful tool for handling these challenges. \\
638 | 
639 | Q: Can we learn useful priors? \\
640 | 
641 | A: Yes! This work is about a general algorithm for learning priors. Idea is to frame RL as a two player game, with one player being an adversary choosing a reward function. \\
642 | 
643 | {\bf Project 1:} State marginal matching.\\
644 | $\ra$ Idea is to try to maximize policy state distribution to some objective distribution. Minimize $KL$ between $\pi^*$ and $\pi$. \\
645 | 
646 | Experiments: test for exploration and meta-learning capacity of the algorithm. Tested with locomotion and manipulation tasks. Their approach works quite well. \\
647 | 
648 | {\bf Project 2:} Priors for Robust Adaptation. \\
649 | 
650 | $\ra$ RL with unknown rewards: assume we're given a distribution over reward functions. Then, sample a new reward function and optimize with respect to it. \\
651 | 
652 | Main approach: compute the Bayes-optimal policy, and then perform regular RL.
653 | 
654 | 
655 | 
656 | \subsubsection{Doina Precup on Temporal Abstraction}
657 | 
658 | Guiding Q: How can we inject temporal abstraction into options? \\
659 | 
660 | $\ra$ Where do options come from? Often, from people (as in robots). \\
661 | 
662 | $\ra$ But what constitutes a good set of options? This is a {\it representation} discovery problem. \\
663 | 
664 | Earlier approach: options should be good at optimizing returns, as in the Option-Critic~\cite{bacon2017option}. Option-critic learns option representations that yield fast in-task learning but also effective transfer across tasks. \\
665 | 
666 | {\bf Point:} Length collapse occurs -- options ``dissolve" into primitive actions over time. \\
667 | 
668 | {\bf Assumption:} executing a policy is cheap, deciding what to do is expensive. So, can use options with an explicit {\it deliberation cost} in mind~\cite{harb2018waiting}. \\
669 | 
670 | That is, can define a new value function based on the deliberation cost:
671 | \[
672 | Q(s, o) = c(s,o) + \sum_{s'} P(s' \mid s,o) \sum_{o'} \mu(o' \mid s') Q(s',o'),
673 | \]
674 | with $c(s,o)$ some cost of deliberation. \\
675 | 
676 | 
677 | Experiments: on Atari, with and without deliberation cost (as a regularizer). Indeed find that options take longer before terminating (which was the intended goal). \\
678 | 
679 |  Q: Should all option components optimize the same thing? (Should $\mc{I}, \beta, \pi$ all be geared toward maximizing rewards?) \\
680 |  
681 |  A: Based on the deliberation cost work, one might think that aspects of the option should take these regularizers into account. See, for instance, the recent work by~\citet{harutyunyan2018learning}, or the termination critic~\cite{harutyunyan2019termination}.\\
682 |  
683 |  {\bf Idea:} Bottleneck states -- we might want options that take us to these bottlenecks. \\
684 |  
685 |  $\ra$ Drawback: expensive both in terms of sample size and computation. \\
686 |  
687 | Discussion:
688 | \begin{itemize}
689 |     \item Priors can be built into option construction via optimization criteria
690 |     \item Termination and internal policies of options could accomplish different goals
691 |     \item {\it **Biggest Open Question:} how should we empirically evaluate lifelong learning AI systems?
692 | \end{itemize}
693 | 
694 | How do we assess the capability of a lifelong agent?
695 | \begin{enumerate}
696 |     \item No longer a single task!
697 |     \item Returbns are imporant but too simple
698 |     \item How well is the agent preserving and enhancing its knowledge?
699 | \end{enumerate}
700 | 
701 | $\ra$ Proposal: hypothesis-driven evaluation of continual systems. That is, take inspiration from other fields (psychological, for instance).
702 | 
703 | 
704 | {\it Challenge Question:} Lots of recent work applies deep RL to existing algorithms in HRL and option discovery. What has deep RL brought to the table? Do they fix all of the problems or do we need some new paradigm shift? \\
705 | 
706 | Doina A: Neural nets brought to HRL roughly what they brought to regular RL -- aspects of the feature discovery problems have effectively been solved. On one hand that's great, because we have a solution. On the other hand, we are still lacking in methods that truly do knowledge discovery. Deep nets are not really an answer to that process. There's a temptation to take a deep net throw it at a problem and put some HRL objectives on the top. Yes, that might work, but it doesn't lead to decomposable or modular knowledge. We've mode lots of progress but perhaps it is a good time for us to take a step back and do fancier things in terms of state abstraction and options. 
707 | 
708 | \subsubsection{Jane Wang on Learning of Structured, Causal Priors}
709 | 
710 | {\bf Point:} Structured priors enable faster learning. \\
711 | 
712 | $\ra$ {\it Causal} priors in particular can enable faster learning (by improving exploration, generalization, credit assignment, and so on). \\
713 | 
714 | Causal reasoning is a rich field, so, some background:
715 | \ddef{Bayes Net}{A probabilistic graphical model that represents a set of variables and their conditional probability distribution in the form of a directed acyclic graph (DAG)}
716 | 
717 | \ddef{Causal Bayes Net}{Bayes net where arrows represent causal semantics}
718 | 
719 | \ddef{Intervention}{Fixing a value of a variable, disconnecting it from its parents.}
720 | 
721 | \ddef{Do-calculus}{A set of tools for making causal inferences given observational data}
722 | 
723 | Can ask three levels of questions:
724 | \begin{enumerate}
725 |     \item Association: are drinking wine and me having a headache related?
726 |     \item Intervention: If I go drink wine, will I have a headache?
727 |     \item Counterfactuals: go back in time and ask, what if I had drank wine? Would I have a headache?
728 | \end{enumerate}
729 | 
730 | \begin{figure}[h!]
731 |     \centering
732 |     \includegraphics[width=0.3\textwidth]{images/ladder.png}
733 |     \caption{Pearl's ladder of causality}
734 |     \label{fig:ladder_of_caus}
735 | \end{figure}
736 | 
737 | Q: How does causal reasoning manifest in humans? \\
738 | 
739 | A (babies): Babies less than a year old do not demonstrate causal knowledge, but do have a sense of physical continuity. \\
740 | 
741 | A (2 year olds): Can learn predictive relationships between events, but can't sponetaneously make interventions based on causal understanding. \\
742 | 
743 | A (3-4 year olds): Can infer causal maps from observing conditional dependencies. \\
744 | 
745 | A (4-5 year olds): Can make informative targeted interventions based on causal knowledge. \\
746 | 
747 | A (adolescence): Strategies for causal learning continue to improve. \\
748 | 
749 | A (adults): Evidence of an associative bias, a ``rich-get-richer" prnciple: one variable is more likely to be present if others in the causal model are also present. \\
750 | 
751 | {\bf Overall:} evidence suggests that children display ability to perform causal inference from observation (roughly consistent with Bayesian inference). More sophisticated forms of reasoning (performing novel informative interventions) come online later as a result of experience. \\
752 | 
753 | But: major deviations from rationality/good inference. \\
754 | 
755 | $\ra$ Reasons for deviations:
756 | \begin{enumerate}
757 |     \item Formal models of causal reasoning optimize difference cost functions.
758 |     \item Humans do not optimize for a specific causal graph, but a flexible one.
759 |     \item 
760 | \end{enumerate}
761 | 
762 | {\it Takeaway:} A structured universe of tasks $\implies$ we should use structured priors. \\
763 | 
764 | {\bf Idea:} Meta Learning of causally inspired priors. Similarly to previous talks, assume a distribution of tasks, want to learn some priors that lead to sample efficient learning in new tasks. \\
765 | 
766 | Experiments: 1) learning from observations on causal networks (can the agent learn some causal structure from a DAG?); 2) learning from interventions; 3) Learning from instance specific info. \\
767 | 
768 | {\it Challenge Question:} Why does deep Rl seem to struggle with out-of-sample generalization compared to other domains? ?
769 | 
770 | Jane A: In RL, lots of ways to be out of sample (in contrast to supervised learning), so it's much harder. Generalization is just harder because lots of things can change: state, action, policy, transition function, reward, function, and so on. RL also requires an ongoing interaction with the environment -- input is really dependent on policy, so input will change as you update policy.
771 | 
772 | \subsubsection{Panel: Matt, Jane, Doina, Sergey, Karthik, Tejas, Tim}
773 | \label{sec:panel}
774 | Q: What is the role of structure vs. data? \\
775 | 
776 | Tim: Question is loaded. Depends on what you want to do: if you want to get good at a niche, specify more. If you want a general learning algorithm: specify less. \\
777 | 
778 | Tejas: Yes! The article talks about search, but to me the big question is making domains ``computable" (simulatable). The article is misguided: where do the primitives come from? Can't rely on the data to give you primitives. We should radically add structure. There are certain truths we can and should exploit to search effectively (objects, agents, physics). \\
779 | 
780 | Karthik: Humans have evolved many things over time that are key to our intelligence. Definitely having the right kind of inductive biases and structured priors to get things to work.  \\
781 | 
782 | Matt: I want to push back on that, because I hear it from psychologists. The assertion is often made that because babies have strong inductive biases, our agents have inductive biases. But its not obviously a constraint we need to fight in designing agents. I don't buy the argument that babies tell us that much. I love Rich Sutton but I think we have to start with structure (like CNNs). Also potentially a formal consideration; the abstractions we need to learn will require an arbitrarily small. \\
783 | 
784 | Doina: There's one thing to say we need only data, and there's another thing to say that we always have to learn from scratch. Right now we don't incorporate the right kind of structures so that we don't have to learn from scratch. We use some ideas (CNNs, gradient descent), but we want to avoid adding too much structure, too. \\
785 | 
786 | Jane: One thing in Rich's essay (which I agree with about 80\%) is that he said our brains/the environment are irredeemably complex. I don't agree with that. Neuroscience and cognitive science have been making great strides in understanding our brains. \\
787 | 
788 | Sergey: This question is different because of a methodological issue. ML is a mathematical/philosophical/scientific field. So, we're good at some things, but not at all. We've been great at making computer vision systems, language systems, and so on. In RL we've mostly been focused on solving some problems but that's been a proxy to a much grander vision that we hope to work. That's where the methodological flaw catches us. It's very easy to get improvement from bias in small problems. \\
789 | 
790 | Doina: I think that's true in the rest of ML too (Sergey: I didn't want to offend everyone! \dnote{tongue in cheek :)})). In NLP, yes, we can do some tasks but can't really do all tasks. In all of ML, we make tasks that are examples that help us improve algorithms/models, but ultimately we need to go to a more complex setting. \\
791 | 
792 | Matt: At the risk of engaging too much in the philosophical side of ML. We don't want to build in inductive biases that are too concentrated on one domain. In ML, we do tend to have a sense of the general domain we anticipate our agents being deployed in. We just have to make our choices about what ontology you want to buy into.\\
793 | 
794 | \spacerule
795 | 
796 | Q: We don't really know what tasks to train on in RL, especially in lifelong RL/multitask RL/meta RL. Any thoughts on defining the problem in a more precise way? \\
797 | 
798 | Doina: Simulation is obviously necessary. Think about human learning: babies have parents, who have an important role. How to build interesting and rich simulations that can be used for complex RL evaluation tasks? Well, one thing we can do is look more seriously at multi-modal data. \\
799 | 
800 | Sergey: Important to think about what RL would look like as a data driven field instead of a simulation driven field. If we don't we might be stuck in a regime where we don't think much about generalization and other core problems. We could think about RL tasks as being started with {\it data}, which is closer to the kinds of RL algorithms we might want that live in the real world. Kind of crazy to imagine an algorithm could learn a complex task {\it tabula rasa}.\\
801 | 
802 | Tejas: I agree with everything that was said. Generalization only matters when your data is limited. One way to think about that is when agents can generate their own tasks. We should think of metrics and measurements which incentivize researchers and platforms where agent can create lots of tasks, play in those tasks to learn more complex behaviors. \\
803 | 
804 | Tim: Question for Sergey: approaching RL in a data driven mode, how much of that needs to be done with real world data vs. simulation? \\
805 | 
806 | Sergey: I'll answer that with a question: what if we wanted to make a better image classifier? We do it with real data in vision because it's easier to make progress. So, in RL, it's easier too, because of the inherent complexity/diversity in the real data. \\
807 | 
808 | Doina: Bit of a different problem b/c we need trajectories. Lots of trajectories. Trajectories generated by people might be very different too. Might be really hard to understand/learn from whole trajectories. \\
809 | 
810 | Sergey: Might not be that hard. Can collect lots of self driving car data, grasping data, and so on, relatively easily. \\
811 | 
812 | Jane: Tend to agree with you (Sergey). One question about real data: can we make guarantees about moving beyond that labratory data? \\
813 | 
814 | Sergey: That's where you need to be really careful. \\
815 | 
816 | Tejas: from first principles, no reason we can't make a simulator that's indistinguishable in terms of graphics and physics. Just a matter of time before we have a simulator that can replace real world data. \\
817 | 
818 | Sergey: Sure! But might be really slow. Why wait? \\
819 | 
820 | \spacerule
821 | 
822 | Q: What's the main reason to used model-based vs. model-free learning? \\
823 | 
824 | Karthik: Learning a model of the world can give you far more flexibility about what you can accomplish. In model free learning, you tend to just have on policy/value function, it wont generalize well/transfer across tasks. Can cover roughly 90\% of things that can happen with a (good) model. \\
825 | 
826 | Doina: Not so sure the distinction is salient. Models can be thought of as generalized value functions, where things become much more blurry. Might end up with model-based RL that is much less efficient because learning the mapping from obserbation to observation is very hard. To do this, need to understand the effects of your actions vs. understand what other variables are important for policy/value function. This latter component might be much simpler than the former. Might need to rethink the nature of our models. We should build models that are small bricks. \\
827 | 
828 | Matt: That resonates! Fascinated by grey the distinction is between model-free and model-based. For a number of years in neuroscience this distinction was treated as very categorical. But, we've realized it's much messier. We're focused on models that can maximize value, but the situation changes when we instead focus on agents that can transmit knowledge to other agents via teaching or communication. We likely need very different models for these kinds of tasks. \\
829 | 
830 | Sergey: I think the two approaches are the same. They're similar, at least. RL is about making predictions (usually), but it's important to realize there's a lot of knowledge contained in some of the things agents predict like the value function (\dnote{very clever dollar bill example Sergey used: imagine you wanted to predict the number of dollar bills in front of you, and maximize this value. To do this you basically need to model everything in the world.})
831 | 
832 | Tejas: Lots of domains that have nothing to do with rewards or reward maximization, like coming up with number theory or computability. \\
833 | 
834 | Sergey: It's not the only way to develop knowledge about the world. But if there's something that has a meaningful effect on your environment, then you'll need to predict them in order to estimate/maximize your value function. \\
835 | 
836 | Jane: One thing about learning these models just from interaction -- in the real world, things are not necessarily disentangled in the way you might like. \dnote{I missed the rest :(} \\
837 | 
838 | \spacerule
839 | 
840 | Q: Is there a general recipe for extracting understanding from human intelligence for RL? \\
841 | 
842 | Matt: Meta learning (mic drop). Look at the history of AI: AI winter was partially caused by the failure of trying to encode all knowledge into our agents. We definitely want to avoid that with inserting the right structures.\\
843 | 
844 | Tejas: Good inductive biases are ones that will never go away. Desiderata for those are that they are truths. How do we find truths? A few scientific ways to do that: notions of agency are true, objects are true. Put these kinds of truths into the agents' head, I think we'll have agents that do the right thing. \\
845 | 
846 | Q: Is anything really ``True"? \\
847 | 
848 | Tejas: Yes, otherwise it's just soup. There's no existence without it. An agent is an object with goals, and objects are true. We can try to learn these invariant truths. \\
849 | 
850 | Doina: With the risk of circling back: one of the lessons that we have been learning over and over again is that putting too much stuff into the agent's head can be detrimental. See: AlphaGo. Put in a bit of knowledge, let it find its own knowledge from its own data. Lots of the structure we have in deep RL are useful, but some are probably not useful. \\
851 | 
852 | Tejas: if you don't assume anything then what? Can we assume a model of an agent? \\
853 | 
854 | Sergey: Lots of things that are true and obvious. It's okay to learn that from data. Naybe it's better, because then it knows how to learn those true things to other ones. Better for my students to figure some things on their own, for instance. \\
855 | 
856 | Tim: Tend on the side of fewer inductive biases, and we'll tend to keep coming back to that. As we get more data, it might be easy to discovery the essential inductive biases from the data we have around. Humans/animals genetic code don't contain that many bits that are sitting in our DNA that produces our brains/bodies. Rediscovering those inductive biases might not be that hard, so we should tend to only lightly add inductive biases. \\
857 | 
858 | \spacerule
859 | 
860 | \dnote{Running out of battery: missed the last question!}


--------------------------------------------------------------------------------
/iclr.bib:
--------------------------------------------------------------------------------
   1 | @article{thompson1933likelihood,
   2 |   title={On the likelihood that one unknown probability exceeds another in view of the evidence of two samples},
   3 |   author={Thompson, William R},
   4 |   journal={Biometrika},
   5 |   volume={25},
   6 |   number={3/4},
   7 |   pages={285--294},
   8 |   year={1933},
   9 |   publisher={JSTOR}
  10 | }
  11 | @article{bush1953stochastic,
  12 |   title={A stochastic model with applications to learning},
  13 |   author={Bush, Robert R and Mosteller, Frederick},
  14 |   journal={The Annals of Mathematical Statistics},
  15 |   pages={559--585},
  16 |   year={1953},
  17 |   publisher={JSTOR}
  18 | }
  19 | 
  20 | @article{grace2017will,
  21 |   title={When will AI exceed human performance? Evidence from AI experts},
  22 |   author={Grace, Katja and Salvatier, John and Dafoe, Allan and Zhang, Baobao and Evans, Owain},
  23 |   journal={arXiv preprint arXiv:1705.08807},
  24 |   year={2017}
  25 | }
  26 | @inproceedings{brown2017libratus,
  27 |   title={Libratus: the superhuman AI for no-limit poker},
  28 |   author={Brown, Noam and Sandholm, Tuomas},
  29 |   booktitle={Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence},
  30 |   year={2017}
  31 | }
  32 | @article{moravvcik2017deepstack,
  33 |   title={Deepstack: Expert-level artificial intelligence in heads-up no-limit poker},
  34 |   author={Morav{\v{c}}{\'\i}k, Matej and Schmid, Martin and Burch, Neil and Lis{\`y}, Viliam and Morrill, Dustin and Bard, Nolan and Davis, Trevor and Waugh, Kevin and Johanson, Michael and Bowling, Michael},
  35 |   journal={Science},
  36 |   volume={356},
  37 |   number={6337},
  38 |   pages={508--513},
  39 |   year={2017},
  40 |   publisher={American Association for the Advancement of Science}
  41 | }
  42 | @article{silver2016mastering,
  43 |   title={Mastering the game of Go with deep neural networks and tree search},
  44 |   author={Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and Van Den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and others},
  45 |   journal={nature},
  46 |   volume={529},
  47 |   number={7587},
  48 |   pages={484--489},
  49 |   year={2016},
  50 |   publisher={Nature Research}
  51 | }
  52 | @article{drucker1976pension,
  53 |   title={Pension fund socialism},
  54 |   author={Drucker, Peter F},
  55 |   journal={The Public Interest},
  56 |   number={42},
  57 |   pages={3},
  58 |   year={1976},
  59 |   publisher={National Affairs, inc.}
  60 | }
  61 | @article{campbell2002deep,
  62 |   title={Deep blue},
  63 |   author={Campbell, Murray and Hoane Jr, A Joseph and Hsu, Feng-hsiung},
  64 |   journal={Artificial intelligence},
  65 |   volume={134},
  66 |   number={1-2},
  67 |   pages={57--83},
  68 |   year={2002},
  69 |   publisher={Elsevier}
  70 | }
  71 | @article{silver2017mastering,
  72 |   title={Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm},
  73 |   author={Silver, David and Hubert, Thomas and Schrittwieser, Julian and Antonoglou, Ioannis and Lai, Matthew and Guez, Arthur and Lanctot, Marc and Sifre, Laurent and Kumaran, Dharshan and Graepel, Thore and others},
  74 |   journal={arXiv preprint arXiv:1712.01815},
  75 |   year={2017}
  76 | }
  77 | @article{rahwan2016moral,
  78 |   title={Moral Machine},
  79 |   author={Rahwan, Iyad and Bonnefon, Jean-Francois and Shariff, Azim and others},
  80 |   journal={Scalable Cooperation, MIT Media Lab. At: http://moralmachine. mit. edu (accessed 21 August 2016)},
  81 |   year={2016}
  82 | }
  83 | @article{kim2018computational,
  84 |   title={A Computational Model of Commonsense Moral Decision Making},
  85 |   author={Kim, Richard and Kleiman-Weiner, Max and Abeliuk, Andres and Awad, Edmond and Dsouza, Sohan and Tenenbaum, Josh and Rahwan, Iyad},
  86 |   journal={arXiv preprint arXiv:1801.04346},
  87 |   year={2018}
  88 | }
  89 | @article{lerer2017maintaining,
  90 |   title={Maintaining cooperation in complex social dilemmas using deep reinforcement learning},
  91 |   author={Lerer, Adam and Peysakhovich, Alexander},
  92 |   journal={arXiv preprint arXiv:1707.01068},
  93 |   year={2017}
  94 | }
  95 | @article{cheng2015global,
  96 |   title={Global contrast based salient region detection},
  97 |   author={Cheng, Ming-Ming and Mitra, Niloy J and Huang, Xiaolei and Torr, Philip HS and Hu, Shi-Min},
  98 |   journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
  99 |   volume={37},
 100 |   number={3},
 101 |   pages={569--582},
 102 |   year={2015},
 103 |   publisher={IEEE}
 104 | }
 105 | @article{rittel1973dilemmas,
 106 |   title={Dilemmas in a general theory of planning},
 107 |   author={Rittel, Horst WJ and Webber, Melvin M},
 108 |   journal={Policy sciences},
 109 |   volume={4},
 110 |   number={2},
 111 |   pages={155--169},
 112 |   year={1973},
 113 |   publisher={Springer}
 114 | }
 115 | @article{head2008wicked,
 116 |   title={Wicked problems in public policy},
 117 |   author={Head, Brian W and others},
 118 |   journal={Public policy},
 119 |   volume={3},
 120 |   number={2},
 121 |   pages={101},
 122 |   year={2008},
 123 |   publisher={John Curtin Institute of Public Policy, Curtin University of Technology}
 124 | }
 125 | @article{camillus2008strategy,
 126 |   title={Strategy as a wicked problem},
 127 |   author={Camillus, John C},
 128 |   journal={Harvard business review},
 129 |   volume={86},
 130 |   number={5},
 131 |   pages={98},
 132 |   year={2008}
 133 | }
 134 | @inproceedings{levin2007playing,
 135 |   title={Playing it forward: Path dependency, progressive incrementalism, and the ‘Super Wicked’problem of global climate change},
 136 |   author={Levin, Kelly and Cashore, Benjamin and Bernstein, Steven and Auld, Graeme},
 137 |   booktitle={International Studies Association 48th Annual Convention. Chicago, February},
 138 |   year={2007},
 139 |   organization={Citeseer}
 140 | }
 141 | @book{pearl2009causality,
 142 |   title={Causality},
 143 |   author={Pearl, Judea},
 144 |   year={2009},
 145 |   publisher={Cambridge university press}
 146 | }
 147 | @article{mcdermott1976artificial,
 148 |   title={Artificial intelligence meets natural stupidity},
 149 |   author={McDermott, Drew},
 150 |   journal={ACM SIGART Bulletin},
 151 |   number={57},
 152 |   pages={4--9},
 153 |   year={1976},
 154 |   publisher={ACM}
 155 | }
 156 | @article{lombrozo2006structure,
 157 |   title={The structure and function of explanations},
 158 |   author={Lombrozo, Tania},
 159 |   journal={Trends in cognitive sciences},
 160 |   volume={10},
 161 |   number={10},
 162 |   pages={464--470},
 163 |   year={2006},
 164 |   publisher={Elsevier}
 165 | }
 166 | @article{grosz1996collaborative,
 167 |   title={Collaborative Systems (AAAI-94 Presidential Address)},
 168 |   author={Grosz, Barbara J},
 169 |   journal={AI magazine},
 170 |   volume={17},
 171 |   number={2},
 172 |   pages={67},
 173 |   year={1996}
 174 | }
 175 | @inproceedings{ciardo2002using,
 176 |   title={Using edge-valued decision diagrams for symbolic generation of shortest paths},
 177 |   author={Ciardo, Gianfranco and Siminiceanu, Radu},
 178 |   booktitle={International Conference on Formal Methods in Computer-Aided Design},
 179 |   pages={256--273},
 180 |   year={2002},
 181 |   organization={Springer}
 182 | }
 183 | @inproceedings{ng1999policy,
 184 |   title={Policy invariance under reward transformations: Theory and application to reward shaping},
 185 |   author={Ng, Andrew Y and Harada, Daishi and Russell, Stuart},
 186 |   booktitle={ICML},
 187 |   volume={99},
 188 |   pages={278--287},
 189 |   year={1999}
 190 | }
 191 | @article{de2017multi,
 192 |   title={Multi-step reinforcement learning: A unifying algorithm},
 193 |   author={De Asis, Kristopher and Hernandez-Garcia, J Fernando and Holland, G Zacharias and Sutton, Richard S},
 194 |   journal={arXiv preprint arXiv:1703.01327},
 195 |   year={2017}
 196 | }
 197 | @article{henriques2015high,
 198 |   title={High-speed tracking with kernelized correlation filters},
 199 |   author={Henriques, Jo{\~a}o F and Caseiro, Rui and Martins, Pedro and Batista, Jorge},
 200 |   journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
 201 |   volume={37},
 202 |   number={3},
 203 |   pages={583--596},
 204 |   year={2015},
 205 |   publisher={IEEE}
 206 | }
 207 | @article{sutton1999between,
 208 |   title={Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning},
 209 |   author={Sutton, Richard S and Precup, Doina and Singh, Satinder},
 210 |   journal={Artificial intelligence},
 211 |   volume={112},
 212 |   number={1-2},
 213 |   pages={181--211},
 214 |   year={1999},
 215 |   publisher={Elsevier}
 216 | }
 217 | @inproceedings{bacon2017option,
 218 |   title={The Option-Critic Architecture.},
 219 |   author={Bacon, Pierre-Luc and Harb, Jean and Precup, Doina},
 220 |   booktitle={AAAI},
 221 |   pages={1726--1734},
 222 |   year={2017}
 223 | }
 224 | @article{simon1972theories,
 225 |   title={Theories of bounded rationality},
 226 |   author={Simon, Herbert A},
 227 |   journal={Decision and organization},
 228 |   volume={1},
 229 |   number={1},
 230 |   pages={161--176},
 231 |   year={1972}
 232 | }
 233 | @inproceedings{huang2015supporting,
 234 |   title={Supporting mental model accuracy in trigger-action programming},
 235 |   author={Huang, Justin and Cakmak, Maya},
 236 |   booktitle={Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing},
 237 |   pages={215--225},
 238 |   year={2015},
 239 |   organization={ACM}
 240 | }
 241 | @inproceedings{subramanian2011learning,
 242 |   title={Learning options through human interaction},
 243 |   author={Subramanian, Kaushik and Isbell, Charles and Thomaz, Andrea},
 244 |   booktitle={2011 IJCAI Workshop on Agents Learning Interactively from Human Teachers (ALIHT)},
 245 |   year={2011},
 246 |   organization={Citeseer}
 247 | }
 248 | @article{knox2015framing,
 249 |   title={Framing reinforcement learning from human reward: Reward positivity, temporal discounting, episodicity, and performance},
 250 |   author={Knox, W Bradley and Stone, Peter},
 251 |   journal={Artificial Intelligence},
 252 |   volume={225},
 253 |   pages={24--50},
 254 |   year={2015},
 255 |   publisher={Elsevier}
 256 | }
 257 | @inproceedings{isbell2001social,
 258 |   title={A social reinforcement learning agent},
 259 |   author={Isbell, Charles and Shelton, Christian R and Kearns, Michael and Singh, Satinder and Stone, Peter},
 260 |   booktitle={Proceedings of the fifth international conference on Autonomous agents},
 261 |   pages={377--384},
 262 |   year={2001},
 263 |   organization={ACM}
 264 | }
 265 | @inproceedings{loftin2014strategy,
 266 |   title={A Strategy-Aware Technique for Learning Behaviors from Discrete Human Feedback.},
 267 |   author={Loftin, Robert Tyler and MacGlashan, James and Peng, Bei and Taylor, Matthew E and Littman, Michael L and Huang, Jeff and Roberts, David L},
 268 |   booktitle={AAAI},
 269 |   pages={937--943},
 270 |   year={2014}
 271 | }
 272 | @inproceedings{taylor2007transfer,
 273 |   title={Transfer via inter-task mappings in policy search reinforcement learning},
 274 |   author={Taylor, Matthew E and Whiteson, Shimon and Stone, Peter},
 275 |   booktitle={Proceedings of the 6th international joint conference on Autonomous agents and multiagent systems},
 276 |   pages={37},
 277 |   year={2007},
 278 |   organization={ACM}
 279 | }
 280 | @article{wang2017erm,
 281 |   title={On the ERM Principle with Networked Data},
 282 |   author={Wang, Yuanhong and Wang, Yuyi and Liu, Xingwu and Pu, Juhua},
 283 |   journal={arXiv preprint arXiv:1711.04297},
 284 |   year={2017}
 285 | }
 286 | @inproceedings{abraham2010highway,
 287 |   title={Highway dimension, shortest paths, and provably efficient algorithms},
 288 |   author={Abraham, Ittai and Fiat, Amos and Goldberg, Andrew V and Werneck, Renato F},
 289 |   booktitle={Proceedings of the twenty-first annual ACM-SIAM symposium on Discrete Algorithms},
 290 |   pages={782--793},
 291 |   year={2010},
 292 |   organization={Society for Industrial and Applied Mathematics}
 293 | }
 294 | @inproceedings{funke2011path,
 295 |   title={Path shapes: an alternative method for map matching and fully autonomous self-localization},
 296 |   author={Funke, Stefan and Storandt, Sabine},
 297 |   booktitle={Proceedings of the 19th ACM SIGSPATIAL International Conference on Advances in Geographic Information Systems},
 298 |   pages={319--328},
 299 |   year={2011},
 300 |   organization={ACM}
 301 | }
 302 | @inproceedings{bast2007transit,
 303 |   title={In transit to constant time shortest-path queries in road networks},
 304 |   author={Bast, Holger and Funke, Stefan and Matijevic, Domagoj and Sanders, Peter and Schultes, Dominik},
 305 |   booktitle={Proceedings of the Meeting on Algorithm Engineering \& Expermiments},
 306 |   pages={46--59},
 307 |   year={2007},
 308 |   organization={Society for Industrial and Applied Mathematics}
 309 | }
 310 | @inproceedings{ramirez2010probabilistic,
 311 |   title={Probabilistic plan recognition using off-the-shelf classical planners},
 312 |   author={Ram{\i}rez, Miquel and Geffner, Hector},
 313 |   booktitle={Proceedings of the Conference of the Association for the Advancement of Artificial Intelligence (AAAI 2010)},
 314 |   pages={1121--1126},
 315 |   year={2010}
 316 | }
 317 | @article{kaye2000minesweeper,
 318 |   title={Minesweeper is NP-complete},
 319 |   author={Kaye, Richard},
 320 |   journal={The Mathematical Intelligencer},
 321 |   volume={22},
 322 |   number={2},
 323 |   pages={9--15},
 324 |   year={2000},
 325 |   publisher={Springer}
 326 | }
 327 | @inproceedings{newson2009hidden,
 328 |   title={Hidden Markov map matching through noise and sparseness},
 329 |   author={Newson, Paul and Krumm, John},
 330 |   booktitle={Proceedings of the 17th ACM SIGSPATIAL international conference on advances in geographic information systems},
 331 |   pages={336--343},
 332 |   year={2009},
 333 |   organization={ACM}
 334 | }
 335 | @inproceedings{cheng2011taxisim,
 336 |   title={Taxisim: A multiagent simulation platform for evaluating taxi fleet operations},
 337 |   author={Cheng, Shih-Fen and Nguyen, Thi Duong},
 338 |   booktitle={Web Intelligence and Intelligent Agent Technology (WI-IAT), 2011 IEEE/WIC/ACM International Conference on},
 339 |   volume={2},
 340 |   pages={14--21},
 341 |   year={2011},
 342 |   organization={IEEE}
 343 | }
 344 | @article{foerster2017counterfactual,
 345 |   title={Counterfactual multi-agent policy gradients},
 346 |   author={Foerster, Jakob and Farquhar, Gregory and Afouras, Triantafyllos and Nardelli, Nantas and Whiteson, Shimon},
 347 |   journal={arXiv preprint arXiv:1705.08926},
 348 |   year={2017}
 349 | }
 350 | @inproceedings{freedman2018adapting,
 351 |   title={Adapting a kidney exchange algorithm to align with human values},
 352 |   author={Freedman, Rachel and Borg, Jana Schaich and Sinnott-Armstrong, Walter and Dickerson, John P and Conitzer, Vincent},
 353 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 354 |   year={2018}
 355 | }
 356 | @inproceedings{marom2018belief,
 357 |   title={Belief Reward Shaping in Reinforcement Learning},
 358 |   author={Marom, Ofir and Rosman, Benjamin},
 359 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 360 |   year={2018}
 361 | }
 362 | @inproceedings{ribeiro2018anchors,
 363 |   title={Anchors: High-Precision Model-Agnostic Explanations},
 364 |   author={Marco Tulio Ribeiro, Sameer Singh, Carlos Guestrin},
 365 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 366 |   year={2018}
 367 | }
 368 | @inproceedings{li2018file,
 369 |   title={FILE: A Novel Framework for Predicting Social Status in Signed Networks},
 370 |   author={Xiaoming Li, Hui Fang, Jie Zhang},
 371 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 372 |   year={2018}
 373 | }
 374 | @inproceedings{freedman2018adapting,
 375 |   title={Adapting a kidney exchange algorithm to align with human values},
 376 |   author={Freedman, Rachel and Borg, Jana Schaich and Sinnott-Armstrong, Walter and Dickerson, John P and Conitzer, Vincent},
 377 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 378 |   year={2018}
 379 | }
 380 | @inproceedings{kahng2018ranking,
 381 |   title={Ranking wily people who rank each other},
 382 |   author={Kahng, Anson and Kotturi, Yasmine and Kulkarni, Chinmay and Kurokawa, David and Procaccia, Ariel D},
 383 |    booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 384 |   year={2018},
 385 | }
 386 | @inproceedings{fulton2018safe,
 387 |   title={Safe Reinforcement Learning via Formal Methods},
 388 |   author={Fulton, Nathan and Platzer, Andr{\'e}},
 389 |   year={2018},
 390 |    booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 391 | }
 392 | @inproceedings{hessel2018rainbow,
 393 |   title={Rainbow: Combining Improvements in Deep Reinforcement Learning},
 394 |   author={Hessel, Matteo and Modayil, Joseph and Van Hasselt, Hado and Schaul, Tom and Ostrovski, Georg and Dabney, Will and Horgan, Dan and Piot, Bilal and Azar, Mohammad and Silver, David},
 395 |    booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 396 |   year={2018}
 397 | }
 398 | @inproceedings{foerster2018counterfactual,
 399 |   title={Counterfactual multi-agent policy gradients},
 400 |   author={Foerster, Jakob and Farquhar, Gregory and Afouras, Triantafyllos and Nardelli, Nantas and Whiteson, Shimon},
 401 |    booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 402 |   year={2018}
 403 | }
 404 | @inproceedings{gaspers18,
 405 |     title={Minesweeper with Limited Moves},
 406 |     author={Serge Gaspers, Stefan Rümmele, Abdallah Saffidine, Kevin Tran},
 407 |    booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 408 |   year={2018}
 409 | }
 410 | @inproceedings{sohrabi2018ai,
 411 |   title={An AI Planning Solution to Scenario Generation for Enterprise Risk Management},
 412 |   author={Sohrabi, Shirin and Riabov, Anton V and Katz, Michael and Udrea, Octavian},
 413 |    booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 414 |   year={2018}
 415 | }
 416 | @inproceedings{asai2018classical,
 417 |   title={Classical Planning in Deep Latent Space: Bridging the Subsymbolic-Symbolic Boundary},
 418 |   author={Asai, Masataro and Fukunaga, Alex},
 419 |    booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 420 |   year={2018}
 421 | }
 422 | @inproceedings{song18risk,
 423 |     title={Risk-aware Proactive Scheduling via Conditional Value-at-Risk},
 424 |     author={Wen Song, Donghun Kang, Jie Zhang, Hui Xi},
 425 |    booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 426 |   year={2018}
 427 | }
 428 | @inproceedings{blum2018sublin,
 429 |     title={Sublinear Search Spaces for Shortest Path Planning in Grid and Road Networks},
 430 |     author={Johannes Blum, Stefan Funke, Sabine Storandt},
 431 |    booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 432 |   year={2018}
 433 | }
 434 | @inproceedings{blum2018algorithms,
 435 |   title={Algorithms for Generalized Topic Modeling},
 436 |   author={Blum, Avrim and Haghtalab, Nika},
 437 |    booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 438 |   year={2018}
 439 | }
 440 | @inproceedings{ashtiani2018sample,
 441 |   title={Sample-Efficient Learning of Mixtures},
 442 |   author={Ashtiani, Hassan and Ben-David, Shai and Mehrabian, Abbas},
 443 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 444 |   year={2018}
 445 | }
 446 | @inproceedings{nguyen2018provable,
 447 |   title={A Provable Approach for Double-Sparse Coding},
 448 |   author={Nguyen, Thanh and Wong, Raymond KW and Hegde, Chinmay},
 449 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 450 |   year={2018}
 451 | }
 452 | @inproceedings{wang2018erm,
 453 |   title={On the ERM Principle with Networked Data},
 454 |   author={Wang, Yuanhong and Wang, Yuyi and Liu, Xingwu and Pu, Juhua},
 455 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 456 |   year={2018}
 457 | }
 458 | @inproceedings{harb2018waiting,
 459 |   title={When waiting is not an option: Learning options with a deliberation cost},
 460 |   author={Harb, Jean and Bacon, Pierre-Luc and Klissarov, Martin and Precup, Doina},
 461 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 462 |   year={2018}
 463 | }
 464 | @inproceedings{li2018optimal,
 465 |   title={An Optimal Online Method of Selecting Source Policies for Reinforcement Learning},
 466 |   author={Li, Siyuan and Zhang, Chongjie},
 467 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 468 |   year={2018}
 469 | }
 470 | @inproceedings{de2018multi,
 471 |   title={Multi-step reinforcement learning: A unifying algorithm},
 472 |   author={De Asis, Kristopher and Hernandez-Garcia, J Fernando and Holland, G Zacharias and Sutton, Richard S},
 473 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 474 |   year={2018}
 475 | }
 476 | @inproceedings{tavakoli2018action,
 477 |   title={Action Branching Architectures for Deep Reinforcement Learning},
 478 |   author={Tavakoli, Arash and Pardo, Fabio and Kormushev, Petar},
 479 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 480 |   year={2018}
 481 | }
 482 | @inproceedings{sharma2018phase,
 483 |   title={Phase-Parametric Policies for Reinforcement Learning in Cyclic Environments},
 484 |   author={Sharma, Arjun and Kitani, Kris M},
 485 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 486 |   year={2018}
 487 | }
 488 | @inproceedings{xinrun2018catching,
 489 |   title={Catching Captain Jack: Efficient Time and Space Dependent Patrols to Combat Oil-Siphoning in International Waters},
 490 |   author={Xinrun Wang, Bo An and Strobel, Martin and Kong, Fookwai},
 491 |     booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 492 |   year={2018}
 493 | }
 494 | @inproceedings{tang2018multi,
 495 |   title={Multi-Entity Dependence Learning with Rich Context via Conditional Variational Auto-encoder},
 496 |   author={Tang, Luming and Xue, Yexiang and Chen, Di and Gomes, Carla P},
 497 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 498 |   year={2018}
 499 | }
 500 | @inproceedings{yang18poiss,
 501 |     title={A Poisson Gamma Probabilistic Model for Latent Node-group Memberships in Dynamic Networks},
 502 |     author={Sikun Yang, Heinz Koeppl},
 503 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 504 |   year={2018}
 505 | }
 506 | @inproceedings{wang2018kernel,
 507 |   title={Kernel Cross-Correlator},
 508 |   author={Wang, Chen and Zhang, Le and Xie, Lihua and Yuan, Junsong},
 509 |     booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 510 |   year={2018}
 511 | }
 512 | @inproceedings{sharon2018traffic,
 513 |   title={Traffic Optimization For a Mixture of Self-interested and Compliant Agents},
 514 |   author={Sharon, Guni and Albert, Michael and Rambha, Tarun and Boyles, Stephen and Stone, Peter},
 515 |     booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 516 |   year={2018}
 517 | }
 518 | @inproceedings{kumar2018load,
 519 |   title={Load Scheduling of Simple Temporal Networks Under Dynamic Resource Pricing},
 520 |   author={Kumar, TK Satish and Wang, Zhi and Craig, Anoop Kumar Craig Milo Rogers and Knoblock, A},
 521 |     booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 522 |   year={2018}
 523 | }
 524 | @inproceedings{liu2017change,
 525 |   title={A Change-Detection based Framework for Piecewise-stationary Multi-Armed Bandit Problem},
 526 |   author={Liu, Fang and Lee, Joohyun and Shroff, Ness},
 527 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 528 |   year={2018}
 529 | }
 530 | @inproceedings{benavent2018experimental,
 531 |   title={An Experimental Study of Advice in Sequential Decision-Making under Uncertainty},
 532 |   author={Benavent, Florian and Zanuttini, Bruno},
 533 |   booktitle={32nd AAAI Conference on Artificial Intelligence},
 534 |   year={2018}
 535 | }
 536 | @inproceedings{paul2018alternating,
 537 |   title={Alternating Optimisation and Quadrature for Robust Control},
 538 |   author={Paul, Supratik and Chatzilygeroudis, Konstantinos and Ciosek, Kamil and Mouret, Jean-Baptiste and Osborne, Michael and Whiteson, Shimon and Ivaldi, Serena and Ugurlu, Barkan},
 539 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 540 |   year={2018}
 541 | }
 542 | @inproceedings{jiang2018pac,
 543 |   title={PAC Reinforcement Learning with an Imperfect Model},
 544 |   author={Jiang, Nan},
 545 |     booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 546 |   year={2018}
 547 | }
 548 | @inproceedings{harutyunyan2018learning,
 549 |   title={Learning with options that terminate off-policy},
 550 |   author={Harutyunyan, Anna and Vrancx, Peter and Bacon, Pierre-Luc and Precup, Doina and Nowe, Ann},
 551 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 552 |   year={2018}
 553 | }
 554 | @inproceedings{brown18irl,
 555 |     title={Efficient Probabilistic Performance Bounds for Inverse Reinforcement Learning},
 556 |     author={Daniel Brown, Scott Niekum},
 557 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 558 |   year={2018}
 559 | }
 560 | @inproceedings{mattmueller18,
 561 |     title={On the Relationship Between State-Dependent Action Costs and Conditional Effects in Planning},
 562 |     author={Robert Mattmueller, Florian Geißer, Benedict Wright, Bernhard Nebel},
 563 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 564 |   year={2018}
 565 | }
 566 | @inproceedings{hooker18toward,
 567 |     title={Toward Non-Intuition-Based Machine Ethics},
 568 |     author={John Hooker and Tae Wan Kim},
 569 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 570 |   year={2018}
 571 | }
 572 | @inproceedings{larosa18trust,
 573 |     title={Impacts on Trust of Healthcare AI},
 574 |     author={Emily Larosa and David Danks},
 575 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 576 |   year={2018}
 577 | }
 578 | @inproceedings{erdelyi18,
 579 |   title={Regulating Artificial Intelligence Proposal for a Global Solution},
 580 |   author={Olivia J. Erdelyi, Judy Goldsmith.},
 581 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 582 |   year={2018}
 583 | }
 584 | @inproceedings{scheessele18frame,
 585 |     title={A framework for grounding the moral status of intelligent machines},
 586 |     author={Michael Scheessele},
 587 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 588 |   year={2018}
 589 | }
 590 | @inproceedings{estrada18val,
 591 |     title={Value Alignment, Fair Play, and the Rights of Service Robots},
 592 |     author={Daniel Estrada},
 593 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 594 |   year={2018}
 595 | }
 596 | @inproceedings{kramer18when,
 597 |     title={When Do People Want AI to Make Decisions?},
 598 |     author={Max Kramer, Jana Schaich Borg, Vincent Conitzer and Walter Sinnott-Armstrong},
 599 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 600 |   year={2018}
 601 | }
 602 | @inproceedings{gruze18wicked,
 603 |     title={Rethinking AI Strategy and Policy as Entangled Super Wicked Problems},
 604 |     author={Ross Gruetzemacher},
 605 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 606 |   year={2018}
 607 | }
 608 | @inproceedings{london18reg,
 609 |     title={Regulating Autonomous Vehicles: A Policy Proposal},
 610 |     author={Alex John London and David Danks},
 611 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 612 |   year={2018}
 613 | }
 614 | @inproceedings{iyer18explan,
 615 |     title={Transparency and Explanation in Deep Reinforcement Learning Neural Networks},
 616 |     author={Rahul Iyer, Yuezhang Li, Huao Li, Michael Lewis, Ramitha Sundar and Katia Sycara}
 617 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 618 |   year={2018}
 619 | }
 620 |     
 621 | @inproceedings{sun18shape,
 622 |     title={Designing Non-greedy Reinforcement Learning Agents with Diminishing Reward Shaping},
 623 |     author={Fan-Yun Sun, Yen-Yu Chang, Yueh-Hua Wu and Shou-De Lin},
 624 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 625 |   year={2018}
 626 | }
 627 | @inproceedings{kim18comp,
 628 |     title={A Computational Model of Commonsense Moral Decision Making},
 629 |     author={Richard Kim, Max Kleiman-Weiner, Andres Abeliuk, Edmond Awad, Sohan Dsouza, Josh Tenenbaum and Iyad Rahwan},
 630 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 631 |   year={2018}
 632 | }
 633 | @inproceedings{vanderelst18,
 634 |     title={The Dark Side of Ethical Robots},
 635 |     author={Dieter Vanderelst and Alan Winfield},
 636 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 637 |   year={2018}
 638 | }    
 639 | @inproceedings{henderson18ethics,
 640 |     title={Ethical Challenges in Data-Driven Dialogue Systems},
 641 |     author={Peter Henderson, Koustuv Sinha, Nicolas Angelard-Gontier, Nan Rosemary Ke, Genevieve Fried, Ryan Lowe, Joelle Pineau},
 642 |     booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 643 |     year={2018}
 644 | }    
 645 | @inproceedings{eicher18watson,
 646 |     title={Jill Watson Doesn’t Care if You’re Pregnant:
 647 | Grounding AI Ethics in Empirical Studies},
 648 |     author={Bobbie Eicher, Lalith Polepeddi, Ashok Goel},
 649 |     booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 650 |     year={2018}
 651 | }
 652 | @inproceedings{cave18risk,
 653 |     title={An AI Race for Strategic Advantage: Rhetoric and Risks},
 654 |     author={Stephen Cave and Seán S ÓhÉigeartaigh},
 655 |   booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 656 |   year={2018}
 657 | }
 658 | @inproceedings{wallach2018agile,
 659 |     title={An Agile Ethical/Legal Model for the International and National Governance of AI and Robotics},
 660 |     authors={Wendell Wallach and Gary E. Marchant},
 661 |     booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 662 |     year={2018}
 663 | }
 664 | @inproceedings{maas18reg,
 665 |     title={Regulating for ‘normal AI accidents’— Operational lessons for the responsible governance of AI deployment},
 666 |     author={Matthijs M. Maas},
 667 |     booktitle={Proceedings of the First AAAI/ACM Conference on Artificial Intelligence, Ethics & Society},
 668 |     year={2018}
 669 | }   
 670 | @inproceedings{katzsemi2018,
 671 |   title={Semi-Black Box: Rapid Development of Planning Based Solutions},
 672 |   author={Michael Katz*, Dany Moshkovich, Erez Karpas},
 673 |   booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence},
 674 |   year={2018}
 675 | }
 676 | 
 677 | @article{correa2018generalized,
 678 |   title={Generalized Adjustment Under Confounding and Selection Biases},
 679 |   author={Correa, Juan D and Tian, Jin and Bareinboim, Elias},
 680 |   year={2018}
 681 | }
 682 | @inproceedings{xiaommc2018,
 683 |     title={Memory-Augmented Monte Carlo Tree Search},
 684 |     author={Chenjun Xiao and Jincheng Mei and Martin Muller},
 685 |     year={2018},
 686 |     booktitle={Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence}
 687 | }
 688 | @inproceedings{abel2018solar, 
 689 | title={Bandit-Based Solar Panel Control}, 
 690 | author={David Abel and Edward C. Willams and Stephen Brawner and Emily Reif and Michael L. Littman}, 
 691 | booktitle={Proceedings of the Thirtieth Conference on Innovative Applications of Artificial Intelligence}, 
 692 | year={2018} 
 693 | }
 694 | @inproceedings{shah2018indi, 
 695 | title={A water demand prediction model for Central Indiana}, 
 696 | author={Setu Shah, Mahmood Hosseini, Zina Ben-Miled, Rebecca Shafer and Steve Berube}, 
 697 | booktitle={Proceedings of the Thirtieth Conference on Innovative Applications of Artificial Intelligence}, 
 698 | year={2018} 
 699 | }
 700 | @inproceedings{Hoshino2018, 
 701 | title={Optimal Pricing for Distance-Based Transit Fares}, 
 702 | author={Richard Hoshino and Jeneva Beairsto}, 
 703 | booktitle={Proceedings of the Thirtieth Conference on Innovative Applications of Artificial Intelligence}, 
 704 | year={2018} 
 705 | }
 706 | @inproceedings{shekhar2018, 
 707 | title={Upping the Game of Taxi Driving in the Age of Uber}, 
 708 | author={Shashi Shekhar Jha, Shih-Fen Cheng, Meghna Lowalekar, Nicholas Wong Wai Hin, Rishikeshan Rajendram, Tran Trong Khiem, Pradeep Varakantham, Truong Trong Nghia and Firmansyah Rahman}, 
 709 | booktitle={Proceedings of the Thirtieth Conference on Innovative Applications of Artificial Intelligence}, 
 710 | year={2018} 
 711 | }
 712 | @inproceedings{dwork2012fairness,
 713 |   title={Fairness through awareness},
 714 |   author={Dwork, Cynthia and Hardt, Moritz and Pitassi, Toniann and Reingold, Omer and Zemel, Richard},
 715 |   booktitle={Proceedings of the 3rd innovations in theoretical computer science conference},
 716 |   pages={214--226},
 717 |   year={2012},
 718 |   organization={ACM}
 719 | }
 720 | @article{chouldechova2017fair,
 721 |   title={Fair prediction with disparate impact: A study of bias in recidivism prediction instruments},
 722 |   author={Chouldechova, Alexandra},
 723 |   journal={Big data},
 724 |   volume={5},
 725 |   number={2},
 726 |   pages={153--163},
 727 |   year={2017},
 728 |   publisher={Mary Ann Liebert, Inc. 140 Huguenot Street, 3rd Floor New Rochelle, NY 10801 USA}
 729 | }
 730 | @article{kleinberg2016inherent,
 731 |   title={Inherent trade-offs in the fair determination of risk scores},
 732 |   author={Kleinberg, Jon and Mullainathan, Sendhil and Raghavan, Manish},
 733 |   journal={arXiv preprint arXiv:1609.05807},
 734 |   year={2016}
 735 | }
 736 | @inproceedings{zemel2013learning,
 737 |   title={Learning fair representations},
 738 |   author={Zemel, Rich and Wu, Yu and Swersky, Kevin and Pitassi, Toni and Dwork, Cynthia},
 739 |   booktitle={International Conference on Machine Learning},
 740 |   pages={325--333},
 741 |   year={2013}
 742 | }
 743 | @article{antoniou2017data,
 744 |   title={Data Augmentation Generative Adversarial Networks},
 745 |   author={Antoniou, Antreas and Storkey, Amos and Edwards, Harrison},
 746 |   journal={arXiv preprint arXiv:1711.04340},
 747 |   year={2017}
 748 | }
 749 | @incollection{williams1992simple,
 750 |   title={Simple statistical gradient-following algorithms for connectionist reinforcement learning},
 751 |   author={Williams, Ronald J},
 752 |   booktitle={Reinforcement Learning},
 753 |   pages={5--32},
 754 |   year={1992},
 755 |   publisher={Springer}
 756 | }
 757 | @article{wang2015dueling,
 758 |   title={Dueling network architectures for deep reinforcement learning},
 759 |   author={Wang, Ziyu and Schaul, Tom and Hessel, Matteo and Van Hasselt, Hado and Lanctot, Marc and De Freitas, Nando},
 760 |   journal={arXiv preprint arXiv:1511.06581},
 761 |   year={2015}
 762 | }
 763 | @inproceedings{hasselt2010double,
 764 |   title={Double Q-learning},
 765 |   author={Hasselt, Hado V},
 766 |   booktitle={Advances in Neural Information Processing Systems},
 767 |   pages={2613--2621},
 768 |   year={2010}
 769 | }
 770 | @article{schaul2015prioritized,
 771 |   title={Prioritized experience replay},
 772 |   author={Schaul, Tom and Quan, John and Antonoglou, Ioannis and Silver, David},
 773 |   journal={arXiv preprint arXiv:1511.05952},
 774 |   year={2015}
 775 | }
 776 | @article{fortunato2017noisy,
 777 |   title={Noisy networks for exploration},
 778 |   author={Fortunato, Meire and Azar, Mohammad Gheshlaghi and Piot, Bilal and Menick, Jacob and Osband, Ian and Graves, Alex and Mnih, Vlad and Munos, Remi and Hassabis, Demis and Pietquin, Olivier and others},
 779 |   journal={arXiv preprint arXiv:1706.10295},
 780 |   year={2017}
 781 | }
 782 | @article{bellemare2017distributional,
 783 |   title={A distributional perspective on reinforcement learning},
 784 |   author={Bellemare, Marc G and Dabney, Will and Munos, R{\'e}mi},
 785 |   journal={arXiv preprint arXiv:1707.06887},
 786 |   year={2017}
 787 | }
 788 | @article{sutton1988learning,
 789 |   title={Learning to predict by the methods of temporal differences},
 790 |   author={Sutton, Richard S},
 791 |   journal={Machine learning},
 792 |   volume={3},
 793 |   number={1},
 794 |   pages={9--44},
 795 |   year={1988},
 796 |   publisher={Springer}
 797 | }
 798 | @inproceedings{mnih2016asynchronous,
 799 |   title={Asynchronous methods for deep reinforcement learning},
 800 |   author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
 801 |   booktitle={International Conference on Machine Learning},
 802 |   pages={1928--1937},
 803 |   year={2016}
 804 | }
 805 | @article{eykholt2017note,
 806 |   title={Note on Attacking Object Detectors with Adversarial Stickers},
 807 |   author={Eykholt, Kevin and Evtimov, Ivan and Fernandes, Earlence and Li, Bo and Song, Dawn and Kohno, Tadayoshi and Rahmati, Amir and Prakash, Atul and Tramer, Florian},
 808 |   journal={arXiv preprint arXiv:1712.08062},
 809 |   year={2017}
 810 | }
 811 | 
 812 | @article{goodfellow2014explaining,
 813 |   title={Explaining and harnessing adversarial examples},
 814 |   author={Goodfellow, Ian J and Shlens, Jonathon and Szegedy, Christian},
 815 |   journal={arXiv preprint arXiv:1412.6572},
 816 |   year={2014}
 817 | }
 818 | @article{kolter2017provable,
 819 |   title={Provable defenses against adversarial examples via the convex outer adversarial polytope},
 820 |   author={Kolter, J Zico and Wong, Eric},
 821 |   journal={arXiv preprint arXiv:1711.00851},
 822 |   year={2017}
 823 | }
 824 | @article{roth2004kidney,
 825 |   title={Kidney exchange},
 826 |   author={Roth, Alvin E and S{\"o}nmez, Tayfun and {\"U}nver, M Utku},
 827 |   journal={The Quarterly Journal of Economics},
 828 |   volume={119},
 829 |   number={2},
 830 |   pages={457--488},
 831 |   year={2004},
 832 |   publisher={MIT Press}
 833 | }
 834 | @inproceedings{abraham2007clearing,
 835 |   title={Clearing algorithms for barter exchange markets: Enabling nationwide kidney exchanges},
 836 |   author={Abraham, David J and Blum, Avrim and Sandholm, Tuomas},
 837 |   booktitle={Proceedings of the 8th ACM conference on Electronic commerce},
 838 |   pages={295--304},
 839 |   year={2007},
 840 |   organization={ACM}
 841 | }
 842 | @article{mcdermott1998pddl,
 843 |   title={PDDL-the planning domain definition language},
 844 |   author={McDermott, Drew and Ghallab, Malik and Howe, Adele and Knoblock, Craig and Ram, Ashwin and Veloso, Manuela and Weld, Daniel and Wilkins, David},
 845 |   year={1998}
 846 | }
 847 | @article{asai2017classical,
 848 |   title={Classical Planning in Deep Latent Space: Bridging the Subsymbolic-Symbolic Boundary},
 849 |   author={Asai, Masataro and Fukunaga, Alex},
 850 |   journal={arXiv preprint arXiv:1705.00154},
 851 |   year={2017}
 852 | }
 853 | @article{zou2005regularization,
 854 |   title={Regularization and variable selection via the elastic net},
 855 |   author={Zou, Hui and Hastie, Trevor},
 856 |   journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
 857 |   volume={67},
 858 |   number={2},
 859 |   pages={301--320},
 860 |   year={2005},
 861 |   publisher={Wiley Online Library}
 862 | }
 863 | @inproceedings{pineda2014planning,
 864 |   title={Planning Under Uncertainty Using Reduced Models: Revisiting Determinization.},
 865 |   author={Pineda, Luis Enrique and Zilberstein, Shlomo},
 866 |   booktitle={ICAPS},
 867 |   year={2014}
 868 | }
 869 | @inproceedings{yoon2007ff,
 870 |   title={FF-Replan: A Baseline for Probabilistic Planning.},
 871 |   author={Yoon, Sung Wook and Fern, Alan and Givan, Robert},
 872 |   booktitle={ICAPS},
 873 |   volume={7},
 874 |   pages={352--359},
 875 |   year={2007}
 876 | }
 877 | @inproceedings{keren2015goal,
 878 |   title={Goal Recognition Design for Non-Optimal Agents.},
 879 |   author={Keren, Sarah and Gal, Avigdor and Karpas, Erez},
 880 |   booktitle={AAAI},
 881 |   pages={3298--3304},
 882 |   year={2015}
 883 | }
 884 | @article{eisenberg2009settlement,
 885 |   title={What is the settlement rate and why should we care?},
 886 |   author={Eisenberg, Theodore and Lanvers, Charlotte},
 887 |   journal={Journal of Empirical Legal Studies},
 888 |   volume={6},
 889 |   number={1},
 890 |   pages={111--146},
 891 |   year={2009},
 892 |   publisher={Wiley Online Library}
 893 | }
 894 | @inproceedings{mell2016iago,
 895 |   title={IAGO: interactive arbitration guide online},
 896 |   author={Mell, Johnathan and Gratch, Jonathan},
 897 |   booktitle={Proceedings of the 2016 International Conference on Autonomous Agents \& Multiagent Systems},
 898 |   pages={1510--1512},
 899 |   year={2016},
 900 |   organization={International Foundation for Autonomous Agents and Multiagent Systems}
 901 | }
 902 | @article{sharon2012meta,
 903 |   title={Meta-Agent Conflict-Based Search For Optimal Multi-Agent Path Finding.},
 904 |   author={Sharon, Guni and Stern, Roni and Felner, Ariel and Sturtevant, Nathan R},
 905 |   journal={SoCS},
 906 |   volume={1},
 907 |   pages={39--40},
 908 |   year={2012}
 909 | }
 910 | @inproceedings{park2017telling,
 911 |   title={Telling stories to robots: The effect of backchanneling on a child's storytelling},
 912 |   author={Park, Hae Won and Gelsomini, Mirko and Lee, Jin Joo and Breazeal, Cynthia},
 913 |   booktitle={Proceedings of the 2017 ACM/IEEE international conference on human-robot interaction},
 914 |   pages={100--108},
 915 |   year={2017},
 916 |   organization={ACM}
 917 | }
 918 | @inproceedings{westlund2018measuring,
 919 |   title={Measuring young children's long-term relationships with social robots},
 920 |   author={Westlund, Jacqueline M Kory and Park, Hae Won and Williams, Randi and Breazeal, Cynthia},
 921 |   booktitle={Proceedings of the 17th ACM Conference on Interaction Design and Children},
 922 |   pages={207--218},
 923 |   year={2018},
 924 |   organization={ACM}
 925 | }
 926 | @article{juba2017precision,
 927 |   title={Precision-Recall versus Accuracy and the Role of Large Data Sets},
 928 |   author={Juba, Brendan and Le, Hai S},
 929 |   journal={AAAI},
 930 |   year={2019}
 931 | }
 932 | @inproceedings{liu2019nearneighbor,
 933 |   title={Nearneighbor methods in random preference completion},
 934 |   author={Liu, Ao and Wu, Qiong and Zhenming, L and Xia, Lirong},
 935 |   booktitle={Proceedings of 33rd AAAI Conference on Artifical Intelligence (AAAI-19)},
 936 |   year={2019}
 937 | }
 938 | @article{katz2017nonparametric,
 939 |   title={Nonparametric Preference Completion},
 940 |   author={Katz-Samuels, Julian and Scott, Clayton},
 941 |   journal={arXiv preprint arXiv:1705.08621},
 942 |   year={2017}
 943 | }
 944 | @article{kaban2019dimension,
 945 |   title={Dimension-Free Error Bounds from Random Projections},
 946 |   author={Kab{\'a}n, Ata},
 947 |   journal={AAAI},
 948 |   year={2019}
 949 | }
 950 | @article{karras2017progressive,
 951 |   title={Progressive growing of gans for improved quality, stability, and variation},
 952 |   author={Karras, Tero and Aila, Timo and Laine, Samuli and Lehtinen, Jaakko},
 953 |   journal={arXiv preprint arXiv:1710.10196},
 954 |   year={2017}
 955 | }
 956 | @inproceedings{goodfellow2014generative,
 957 |   title={Generative adversarial nets},
 958 |   author={Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
 959 |   booktitle={Advances in neural information processing systems},
 960 |   pages={2672--2680},
 961 |   year={2014}
 962 | }
 963 | @article{zhu2017unpaired,
 964 |   title={Unpaired image-to-image translation using cycle-consistent adversarial networks},
 965 |   author={Zhu, Jun-Yan and Park, Taesung and Isola, Phillip and Efros, Alexei A},
 966 |   journal={arXiv preprint},
 967 |   year={2017}
 968 | }
 969 | @inproceedings{wang2018non,
 970 |   title={Non-local neural networks},
 971 |   author={Wang, Xiaolong and Girshick, Ross and Gupta, Abhinav and He, Kaiming},
 972 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
 973 |   pages={7794--7803},
 974 |   year={2018}
 975 | }
 976 | @article{brock2018large,
 977 |   title={Large scale gan training for high fidelity natural image synthesis},
 978 |   author={Brock, Andrew and Donahue, Jeff and Simonyan, Karen},
 979 |   journal={arXiv preprint arXiv:1809.11096},
 980 |   year={2018}
 981 | }
 982 | @article{ganin2018synthesizing,
 983 |   title={Synthesizing Programs for Images using Reinforced Adversarial Learning},
 984 |   author={Ganin, Yaroslav and Kulkarni, Tejas and Babuschkin, Igor and Eslami, SM and Vinyals, Oriol},
 985 |   journal={arXiv preprint arXiv:1804.01118},
 986 |   year={2018}
 987 | }
 988 | @article{samuel1959some,
 989 |   title={Some studies in machine learning using the game of checkers},
 990 |   author={Samuel, Arthur L},
 991 |   journal={IBM Journal of research and development},
 992 |   volume={3},
 993 |   number={3},
 994 |   pages={210--229},
 995 |   year={1959},
 996 |   publisher={IBM}
 997 | }
 998 | @article{miyato2018virtual,
 999 |   title={Virtual adversarial training: a regularization method for supervised and semi-supervised learning},
1000 |   author={Miyato, Takeru and Maeda, Shin-ichi and Ishii, Shin and Koyama, Masanori},
1001 |   journal={IEEE transactions on pattern analysis and machine intelligence},
1002 |   year={2018},
1003 |   publisher={IEEE}
1004 | }
1005 | @article{ganin2016domain,
1006 |   title={Domain-adversarial training of neural networks},
1007 |   author={Ganin, Yaroslav and Ustinova, Evgeniya and Ajakan, Hana and Germain, Pascal and Larochelle, Hugo and Laviolette, Fran{\c{c}}ois and Marchand, Mario and Lempitsky, Victor},
1008 |   journal={The Journal of Machine Learning Research},
1009 |   volume={17},
1010 |   number={1},
1011 |   pages={2096--2030},
1012 |   year={2016},
1013 |   publisher={JMLR. org}
1014 | }
1015 | @article{edwards2015censoring,
1016 |   title={Censoring representations with an adversary},
1017 |   author={Edwards, Harrison and Storkey, Amos},
1018 |   journal={arXiv preprint arXiv:1511.05897},
1019 |   year={2015}
1020 | }
1021 | @article{shi2018virtual,
1022 |   title={Virtual-Taobao: Virtualizing Real-world Online Retail Environment for Reinforcement Learning},
1023 |   author={Shi, Jing-Cheng and Yu, Yang and Da, Qing and Chen, Shi-Yong and Zeng, An-Xiang},
1024 |   journal={arXiv preprint arXiv:1805.10000},
1025 |   year={2018}
1026 | }
1027 | @article{zhang2018quota,
1028 |   title={QUOTA: The Quantile Option Architecture for Reinforcement Learning},
1029 |   author={Zhang, Shangtong and Mavrin, Borislav and Yao, Hengshuai and Kong, Linglong and Liu, Bo},
1030 |   journal={arXiv preprint arXiv:1811.02073},
1031 |   year={2018}
1032 | }
1033 | @article{franccois2018combined,
1034 |   title={Combined Reinforcement Learning via Abstract Representations},
1035 |   author={Fran{\c{c}}ois-Lavet, Vincent and Bengio, Yoshua and Precup, Doina and Pineau, Joelle},
1036 |   journal={arXiv preprint arXiv:1809.04506},
1037 |   year={2018}
1038 | }
1039 | @article{he2018detecting,
1040 |   title={Detecting Vehicle Illegal Parking Events using Sharing Bikes’ Trajectories},
1041 |   author={He, Tianfu and Bao, Jie and Li, Ruiyuan and Ruan, Sijie and Li, Yanhua and Tian, Chao and Zheng, Yu},
1042 |   journal={KDD},
1043 |   year={2018}
1044 | }
1045 | @inproceedings{zhang2017deep,
1046 |   title={Deep Spatio-Temporal Residual Networks for Citywide Crowd Flows Prediction.},
1047 |   author={Zhang, Junbo and Zheng, Yu and Qi, Dekang},
1048 |   booktitle={AAAI},
1049 |   pages={1655--1661},
1050 |   year={2017}
1051 | }
1052 | @inproceedings{zheng2013u,
1053 |   title={U-Air: When urban air quality inference meets big data},
1054 |   author={Zheng, Yu and Liu, Furui and Hsieh, Hsun-Ping},
1055 |   booktitle={Proceedings of the 19th ACM SIGKDD international conference on Knowledge discovery and data mining},
1056 |   pages={1436--1444},
1057 |   year={2013},
1058 |   organization={ACM}
1059 | }
1060 | @article{chakraborty2019testing,
1061 |   title={On Testing of Uniform Samplers},
1062 |   author={Chakraborty, Sourav and Meel, Kuldeep S},
1063 |   journal={AAAI},
1064 |   year={2019}
1065 | }
1066 | @article{pitis2019rethinking,
1067 |   title={Rethinking the Discount Factor in Reinforcement Learning: A Decision Theoretic Approach},
1068 |   author={Pitis, Silviu},
1069 |   journal={AAAI},
1070 |   year={2019}
1071 | }
1072 | @article{liao2018finding,
1073 |   title={Finding All Bayesian Network Structures within a Factor of Optimal},
1074 |   author={Liao, Zhenyu A and Sharma, Charupriya and Cussens, James and van Beek, Peter},
1075 |   journal={AAAI},
1076 |   year={2019}
1077 | }
1078 | @article{teyssier2012ordering,
1079 |   title={Ordering-based search: A simple and effective algorithm for learning Bayesian networks},
1080 |   author={Teyssier, Marc and Koller, Daphne},
1081 |   journal={arXiv preprint arXiv:1207.1429},
1082 |   year={2012}
1083 | }
1084 | @inproceedings{ng2000algorithms,
1085 |   title={Algorithms for inverse reinforcement learning.},
1086 |   author={Ng, Andrew Y and Russell, Stuart J and others},
1087 |   booktitle={Icml},
1088 |   pages={663--670},
1089 |   year={2000}
1090 | }
1091 | @article{gilpin2007lossless,
1092 |   title={Lossless abstraction of imperfect information games},
1093 |   author={Gilpin, Andrew and Sandholm, Tuomas},
1094 |   journal={Journal of the ACM (JACM)},
1095 |   volume={54},
1096 |   number={5},
1097 |   pages={25},
1098 |   year={2007},
1099 |   publisher={ACM}
1100 | }
1101 | @inproceedings{sandholm2012lossy,
1102 |   title={Lossy stochastic game abstraction with bounds},
1103 |   author={Sandholm, Tuomas and Singh, Satinder},
1104 |   booktitle={Proceedings of the 13th ACM Conference on Electronic Commerce},
1105 |   pages={880--897},
1106 |   year={2012},
1107 |   organization={ACM}
1108 | }
1109 | @inproceedings{waugh2009abstraction,
1110 |   title={Abstraction pathologies in extensive games},
1111 |   author={Waugh, Kevin and Schnizlein, David and Bowling, Michael and Szafron, Duane},
1112 |   booktitle={Proceedings of The 8th International Conference on Autonomous Agents and Multiagent Systems-Volume 2},
1113 |   pages={781--788},
1114 |   year={2009},
1115 |   organization={International Foundation for Autonomous Agents and Multiagent Systems}
1116 | }
1117 | @inproceedings{lanctot2009monte,
1118 |   title={Monte Carlo sampling for regret minimization in extensive games},
1119 |   author={Lanctot, Marc and Waugh, Kevin and Zinkevich, Martin and Bowling, Michael},
1120 |   booktitle={Advances in neural information processing systems},
1121 |   pages={1078--1086},
1122 |   year={2009}
1123 | }
1124 | @article{farina2018online,
1125 |   title={Online convex optimization for sequential decision processes and extensive-form games},
1126 |   author={Farina, Gabriele and Kroer, Christian and Sandholm, Tuomas},
1127 |   journal={arXiv preprint arXiv:1809.03075},
1128 |   year={2018}
1129 | }
1130 | @article{brown2018depth,
1131 |   title={Depth-Limited Solving for Imperfect-Information Games},
1132 |   author={Brown, Noam and Sandholm, Tuomas and Amos, Brandon},
1133 |   journal={NeurIPS},
1134 |   year={2018}
1135 | }
1136 | @article{pang2018reinforcement,
1137 |   title={On reinforcement learning for full-length game of starcraft},
1138 |   author={Pang, Zhen-Jia and Liu, Ruo-Ze and Meng, Zhou-Yu and Zhang, Yi and Yu, Yang and Lu, Tong},
1139 |   journal={AAAI},
1140 |   year={2019}
1141 | }
1142 | @article{annasamy2018towards,
1143 |   title={Towards Better Interpretability in Deep Q-Networks},
1144 |   author={Annasamy, Raghuram Mandyam and Sycara, Katia},
1145 |   journal={AAAI},
1146 |   year={2019}
1147 | }
1148 | @article{song2018diversity,
1149 |   title={Diversity-Driven Extensible Hierarchical Reinforcement Learning},
1150 |   author={Song, Yuhang and Wang, Jianyi and Lukasiewicz, Thomas and Xu, Zhenghua and Xu, Mai},
1151 |   journal={AAAI},
1152 |   year={2019}
1153 | }
1154 | @article{cobbe2018quantifying,
1155 |   title={Quantifying Generalization in Reinforcement Learning},
1156 |   author={Cobbe, Karl and Klimov, Oleg and Hesse, Chris and Kim, Taehoon and Schulman, John},
1157 |   journal={arXiv preprint arXiv:1812.02341},
1158 |   year={2018}
1159 | }
1160 | @article{zhang2018dissection,
1161 |   title={A Dissection of Overfitting and Generalization in Continuous Reinforcement Learning},
1162 |   author={Zhang, Amy and Ballas, Nicolas and Pineau, Joelle},
1163 |   journal={arXiv preprint arXiv:1806.07937},
1164 |   year={2018}
1165 | }
1166 | @article{shih2019compiling,
1167 |   title={Compiling Bayesian Network Classifiers into Decision Graphs},
1168 |   author={Shih, Andy and Choi, Arthur and Darwiche, Adnan},
1169 |   journal={AAAI},
1170 |   year={2019}
1171 | }
1172 | @incollection{baird1995residual,
1173 |   title={Residual algorithms: Reinforcement learning with function approximation},
1174 |   author={Baird, Leemon},
1175 |   booktitle={Machine Learning Proceedings 1995},
1176 |   pages={30--37},
1177 |   year={1995},
1178 |   publisher={Elsevier}
1179 | }
1180 | @inproceedings{hebert2018multicalibration,
1181 |   title={Multicalibration: Calibration for the (computationally-identifiable) masses},
1182 |   author={H{\'e}bert-Johnson, {\'U}rsula and Kim, Michael and Reingold, Omer and Rothblum, Guy},
1183 |   booktitle={International Conference on Machine Learning},
1184 |   pages={1944--1953},
1185 |   year={2018}
1186 | }
1187 | @inproceedings{kim2018fairness,
1188 |   title={Fairness through computationally-bounded awareness},
1189 |   author={Kim, Michael and Reingold, Omer and Rothblum, Guy},
1190 |   booktitle={Advances in Neural Information Processing Systems},
1191 |   pages={4842--4852},
1192 |   year={2018}
1193 | }
1194 | @article{dawid2017individual,
1195 |   title={On individual risk},
1196 |   author={Dawid, Philip},
1197 |   journal={Synthese},
1198 |   volume={194},
1199 |   number={9},
1200 |   pages={3445--3474},
1201 |   year={2017},
1202 |   publisher={Springer}
1203 | }
1204 | @article{jost1994role,
1205 |   title={The role of stereotyping in system-justification and the production of false consciousness},
1206 |   author={Jost, John T and Banaji, Mahzarin R},
1207 |   journal={British journal of social psychology},
1208 |   volume={33},
1209 |   number={1},
1210 |   pages={1--27},
1211 |   year={1994},
1212 |   publisher={Wiley Online Library}
1213 | }
1214 | @article{kim2019preference,
1215 |   title={Preference-Informed Fairness},
1216 |   author={Kim, Michael P and Korolova, Aleksandra and Rothblum, Guy N and Yona, Gal},
1217 |   journal={arXiv preprint arXiv:1904.01793},
1218 |   year={2019}
1219 | }
1220 | @article{edwards2015censoring,
1221 |   title={Censoring representations with an adversary},
1222 |   author={Edwards, Harrison and Storkey, Amos},
1223 |   journal={arXiv preprint arXiv:1511.05897},
1224 |   year={2015}
1225 | }
1226 | @article{madras2018learning,
1227 |   title={Learning adversarially fair and transferable representations},
1228 |   author={Madras, David and Creager, Elliot and Pitassi, Toniann and Zemel, Richard},
1229 |   journal={arXiv preprint arXiv:1802.06309},
1230 |   year={2018}
1231 | }
1232 | @inproceedings{finn2017model,
1233 |   title={Model-agnostic meta-learning for fast adaptation of deep networks},
1234 |   author={Finn, Chelsea and Abbeel, Pieter and Levine, Sergey},
1235 |   booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70},
1236 |   pages={1126--1135},
1237 |   year={2017},
1238 |   organization={JMLR. org}
1239 | }
1240 | @article{botvinick2019reinforcement,
1241 |   title={Reinforcement Learning, Fast and Slow},
1242 |   author={Botvinick, Mathew and Ritter, Sam and Wang, Jane X and Kurth-Nelson, Zeb and Blundell, Charles and Hassabis, Demis},
1243 |   journal={Trends in cognitive sciences},
1244 |   year={2019},
1245 |   publisher={Elsevier}
1246 | }
1247 | @article{daw2014algorithmic,
1248 |   title={The algorithmic anatomy of model-based evaluation},
1249 |   author={Daw, Nathaniel D and Dayan, Peter},
1250 |   journal={Philosophical Transactions of the Royal Society B: Biological Sciences},
1251 |   volume={369},
1252 |   number={1655},
1253 |   pages={20130478},
1254 |   year={2014},
1255 |   publisher={The Royal Society}
1256 | }
1257 | @article{saemundsson2018meta,
1258 |   title={Meta reinforcement learning with latent variable gaussian processes},
1259 |   author={S{\ae}mundsson, Steind{\'o}r and Hofmann, Katja and Deisenroth, Marc Peter},
1260 |   journal={arXiv preprint arXiv:1803.07551},
1261 |   year={2018}
1262 | }
1263 | @article{zintgraf2018caml,
1264 |   title={CAML: Fast Context Adaptation via Meta-Learning},
1265 |   author={Zintgraf, Luisa M and Shiarlis, Kyriacos and Kurin, Vitaly and Hofmann, Katja and Whiteson, Shimon},
1266 |   journal={arXiv preprint arXiv:1810.03642},
1267 |   year={2018}
1268 | }
1269 | @inproceedings{johnson2016malmo,
1270 |   title={The Malmo Platform for Artificial Intelligence Experimentation.},
1271 |   author={Johnson, Matthew and Hofmann, Katja and Hutton, Tim and Bignell, David},
1272 |   booktitle={IJCAI},
1273 |   pages={4246--4247},
1274 |   year={2016}
1275 | }
1276 | @article{kennedy2006blind,
1277 |   title={Blind man draws using diminution in three dimensions},
1278 |   author={Kennedy, John M and Juricevic, Igor},
1279 |   journal={Psychonomic bulletin \& review},
1280 |   volume={13},
1281 |   number={3},
1282 |   pages={506--509},
1283 |   year={2006},
1284 |   publisher={Springer}
1285 | }
1286 | @inproceedings{diuk2008object,
1287 |   title={An object-oriented representation for efficient reinforcement learning},
1288 |   author={Diuk, Carlos and Cohen, Andre and Littman, Michael L},
1289 |   booktitle={Proceedings of the 25th international conference on Machine learning},
1290 |   pages={240--247},
1291 |   year={2008},
1292 |   organization={ACM}
1293 | }
1294 | @inproceedings{kulkarni2016hierarchical,
1295 |   title={Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation},
1296 |   author={Kulkarni, Tejas D and Narasimhan, Karthik and Saeedi, Ardavan and Tenenbaum, Josh},
1297 |   booktitle={Advances in neural information processing systems},
1298 |   pages={3675--3683},
1299 |   year={2016}
1300 | }
1301 | @article{dilokthanakul2019feature,
1302 |   title={Feature control as intrinsic motivation for hierarchical reinforcement learning},
1303 |   author={Dilokthanakul, Nat and Kaplanis, Christos and Pawlowski, Nick and Shanahan, Murray},
1304 |   journal={IEEE transactions on neural networks and learning systems},
1305 |   year={2019},
1306 |   publisher={IEEE}
1307 | }
1308 | @article{silver2018general,
1309 |   title={A general reinforcement learning algorithm that masters chess, shogi, and Go through self-play},
1310 |   author={Silver, David and Hubert, Thomas and Schrittwieser, Julian and Antonoglou, Ioannis and Lai, Matthew and Guez, Arthur and Lanctot, Marc and Sifre, Laurent and Kumaran, Dharshan and Graepel, Thore and others},
1311 |   journal={Science},
1312 |   volume={362},
1313 |   number={6419},
1314 |   pages={1140--1144},
1315 |   year={2018},
1316 |   publisher={American Association for the Advancement of Science}
1317 | }
1318 | @inproceedings{chua2018deep,
1319 |   title={Deep reinforcement learning in a handful of trials using probabilistic dynamics models},
1320 |   author={Chua, Kurtland and Calandra, Roberto and McAllister, Rowan and Levine, Sergey},
1321 |   booktitle={Advances in Neural Information Processing Systems},
1322 |   pages={4754--4765},
1323 |   year={2018}
1324 | }
1325 | @article{hafner2018learning,
1326 |   title={Learning latent dynamics for planning from pixels},
1327 |   author={Hafner, Danijar and Lillicrap, Timothy and Fischer, Ian and Villegas, Ruben and Ha, David and Lee, Honglak and Davidson, James},
1328 |   journal={arXiv preprint arXiv:1811.04551},
1329 |   year={2018}
1330 | }
1331 | @inproceedings{harutyunyan2018learning,
1332 |   title={Learning with options that terminate off-policy},
1333 |   author={Harutyunyan, Anna and Vrancx, Peter and Bacon, Pierre-Luc and Precup, Doina and Now{\'e}, Ann},
1334 |   booktitle={Thirty-Second AAAI Conference on Artificial Intelligence},
1335 |   year={2018}
1336 | }
1337 | @article{harutyunyan2019termination,
1338 |   title={The Termination Critic},
1339 |   author={Harutyunyan, Anna and Dabney, Will and Borsa, Diana and Heess, Nicolas and Munos, Remi and Precup, Doina},
1340 |   journal={arXiv preprint arXiv:1902.09996},
1341 |   year={2019}
1342 | }
1343 | @article{berlyne1960conflict,
1344 |   title={Conflict, arousal, and curiosity.},
1345 |   author={Berlyne, Daniel E},
1346 |   year={1960},
1347 |   publisher={McGraw-Hill Book Company}
1348 | }
1349 | @article{berlyne1978curiosity,
1350 |   title={Curiosity and learning},
1351 |   author={Berlyne, Daniel E},
1352 |   journal={Motivation and emotion},
1353 |   volume={2},
1354 |   number={2},
1355 |   pages={97--175},
1356 |   year={1978},
1357 |   publisher={Springer}
1358 | }
1359 | @incollection{oudeyer2016intrinsic,
1360 |   title={Intrinsic motivation, curiosity, and learning: Theory and applications in educational technologies},
1361 |   author={Oudeyer, P-Y and Gottlieb, Jacqueline and Lopes, Manuel},
1362 |   booktitle={Progress in brain research},
1363 |   volume={229},
1364 |   pages={257--284},
1365 |   year={2016},
1366 |   publisher={Elsevier}
1367 | }
1368 | @article{laversanne2018curiosity,
1369 |   title={Curiosity driven exploration of learned disentangled goal spaces},
1370 |   author={Laversanne-Finot, Adrien and P{\'e}r{\'e}, Alexandre and Oudeyer, Pierre-Yves},
1371 |   journal={arXiv preprint arXiv:1807.01521},
1372 |   year={2018}
1373 | }
1374 | @book{oudeyer2006self,
1375 |   title={Self-organization in the evolution of speech},
1376 |   author={Oudeyer, Pierre-Yves},
1377 |   volume={6},
1378 |   number={6},
1379 |   year={2006},
1380 |   publisher={OUP Oxford}
1381 | }
1382 | @article{hjelm2018learning,
1383 |   title={Learning deep representations by mutual information estimation and maximization},
1384 |   author={Hjelm, R Devon and Fedorov, Alex and Lavoie-Marchildon, Samuel and Grewal, Karan and Trischler, Adam and Bengio, Yoshua},
1385 |   journal={arXiv preprint arXiv:1808.06670},
1386 |   year={2018}
1387 | }
1388 | @inproceedings{li2018smoothing,
1389 |   title={Smoothing the Geometry of Probabilistic Box Embeddings},
1390 |   author={Li, Xiang and Vilnis, Luke and Zhang, Dongxu and Boratko, Michael and McCallum, Andrew},
1391 |   year={2019},
1392 |   booktitle={ICLR}
1393 | }
1394 | @article{mao2019neuro,
1395 |   title={The Neuro-Symbolic concept learner: Interpreting scenes, words, and sentences from natural supervision},
1396 |   author={Mao, Jiayuan and Gan, Chuang and Kohli, Pushmeet and Tenenbaum, Joshua B and Wu, Jiajun},
1397 |   journal={arXiv preprint arXiv:1904.12584},
1398 |   year={2019}
1399 | }
1400 | @article{wu2019pay,
1401 |   title={Pay Less Attention with Lightweight and Dynamic Convolutions},
1402 |   author={Wu, Felix and Fan, Angela and Baevski, Alexei and Dauphin, Yann N and Auli, Michael},
1403 |   journal={arXiv preprint arXiv:1901.10430},
1404 |   year={2019}
1405 | }
1406 | @article{shen2019ordered,
1407 |   title={Ordered Neurons: Integrating Tree Structures into Recurrent Neural Networks},
1408 |   author={Shen, Yikang and Tan, Shawn and Sordoni, Alessandro and Courville, Aaron},
1409 |   journal={Proceedings of ICLR},
1410 |   year={2019}
1411 | }
1412 | @inproceedings{nachum2018near,
1413 |   title={Near-Optimal Representation Learning for Hierarchical Reinforcement Learning},
1414 |   author={Nachum, Ofir and Gu, Shixiang and Lee, Honglak and Levine, Sergey},
1415 |   booktitle={ICLR},
1416 |   year={2019}
1417 | }
1418 | @inproceedings{levy2018learning,
1419 |   title={Learning Multi-Level Hierarchies with Hindsight},
1420 |   author={Levy, Andrew and Konidaris, George and Platt, Robert and Saenko, Kate},
1421 |   booktitle={ICLR},
1422 |   year={2019}
1423 | }
1424 | @inproceedings{koul2018learning,
1425 |   title={Learning Finite State Representations of Recurrent Policy Networks},
1426 |   author={Koul, Anurag and Greydanus, Sam and Fern, Alan},
1427 |   inproceedings={ICLR},
1428 |   year={2019}
1429 | }


--------------------------------------------------------------------------------