├── README.md
└── ASIMOV_Datasets.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # code


--------------------------------------------------------------------------------
/ASIMOV_Datasets.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "provenance": [],
  7 |       "toc_visible": true
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     },
 13 |     "language_info": {
 14 |       "name": "python"
 15 |     }
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "source": [
 21 |         "# ASIMOV datasets\n",
 22 |         "https://asimov-benchmark.github.io/"
 23 |       ],
 24 |       "metadata": {
 25 |         "id": "UtRnuvdjnqMu"
 26 |       }
 27 |     },
 28 |     {
 29 |       "cell_type": "markdown",
 30 |       "source": [
 31 |         "## Display Code"
 32 |       ],
 33 |       "metadata": {
 34 |         "id": "8qpjnsRInaLB"
 35 |       }
 36 |     },
 37 |     {
 38 |       "cell_type": "code",
 39 |       "source": [
 40 |         "!pip3 install mediapy\n",
 41 |         "!pip install tfds-nightly   # to get most up-to-date registered datasets\n",
 42 |         "!pip install apache_beam"
 43 |       ],
 44 |       "metadata": {
 45 |         "id": "kfRN3Q_lS7M9"
 46 |       },
 47 |       "execution_count": null,
 48 |       "outputs": []
 49 |     },
 50 |     {
 51 |       "cell_type": "code",
 52 |       "source": [
 53 |         "from IPython.display import display\n",
 54 |         "from IPython.display import Markdown\n",
 55 |         "import tensorflow as tf\n",
 56 |         "\n",
 57 |         "print_fn = lambda x: display(Markdown(x))\n",
 58 |         "\n",
 59 |         "try:\n",
 60 |         "  import mediapy as mpy\n",
 61 |         "except ModuleNotFoundError:\n",
 62 |         "  print('Not displaying images.')\n",
 63 |         "  mpy = None\n",
 64 |         "\n",
 65 |         "\n",
 66 |         "class Sample():\n",
 67 |         "  def __init__(self, example, display_one_instruction=True):\n",
 68 |         "    self.example = example\n",
 69 |         "    self.display_one_instruction = display_one_instruction\n",
 70 |         "\n",
 71 |         "  def display(self):\n",
 72 |         "    newline = '<br>'\n",
 73 |         "    for k, v in self.example.items():\n",
 74 |         "      if 'image' in k:\n",
 75 |         "        print_fn(f'**{k}**:')\n",
 76 |         "        print_fn(f'image of size {v.numpy().shape}')\n",
 77 |         "        if mpy:\n",
 78 |         "          mpy.show_image(v.numpy())\n",
 79 |         "      elif k == 'instructions':\n",
 80 |         "        for i in range(len(v['instruction'])):\n",
 81 |         "          sample_dict = {}\n",
 82 |         "          for ik, iv in v.items():\n",
 83 |         "            sample_dict[ik] = iv[i]\n",
 84 |         "          sample = Sample(sample_dict)\n",
 85 |         "          print_fn('---')\n",
 86 |         "          print_fn(f'## Sample Entry {i+1}{newline}')\n",
 87 |         "          sample.display()\n",
 88 |         "          if self.display_one_instruction:\n",
 89 |         "            break\n",
 90 |         "      else:\n",
 91 |         "        if isinstance(v, tf.Tensor) and v.dtype == tf.string:\n",
 92 |         "          v = v.numpy()\n",
 93 |         "          if isinstance(v, bytes):\n",
 94 |         "            v = v.decode('utf-8')\n",
 95 |         "        print_fn(f'**{k}**: {v}{newline}{newline}')\n",
 96 |         "\n",
 97 |         "def get_single_example(dataset_name: str):\n",
 98 |         "  builder = tfds.builder_from_directory(\n",
 99 |         "      f'gs://gresearch/robotics/{dataset_name}/0.1.0/'\n",
100 |         "  )\n",
101 |         "  any_split = list(builder.info.splits.keys())[0]\n",
102 |         "  ds = builder.as_dataset(split=any_split)\n",
103 |         "  it = iter(ds)\n",
104 |         "  example = next(it)\n",
105 |         "  return example"
106 |       ],
107 |       "metadata": {
108 |         "id": "6UCJ7QmWIORW"
109 |       },
110 |       "execution_count": null,
111 |       "outputs": []
112 |     },
113 |     {
114 |       "cell_type": "markdown",
115 |       "source": [
116 |         "## Loading the datasets from the GCS bucket\n",
117 |         "\n",
118 |         "A sanity check featuring how to load each dataset from GCS bucket."
119 |       ],
120 |       "metadata": {
121 |         "id": "Iu3paI2udOyn"
122 |       }
123 |     },
124 |     {
125 |       "cell_type": "code",
126 |       "execution_count": null,
127 |       "metadata": {
128 |         "id": "oqr1iUQF9EE-"
129 |       },
130 |       "outputs": [],
131 |       "source": [
132 |         "import tensorflow as tf\n",
133 |         "import tensorflow_datasets as tfds\n",
134 |         "\n",
135 |         "DATASETS = [\n",
136 |         "    'asimov_injury_val',  # Situations generated from real hospital injury reports (validation set).\n",
137 |         "    'asimov_dilemmas_auto_val',  # Binary dilemma questions generated from counterfactual situations used to auto-amend generated constitutions (validation set).\n",
138 |         "    'asimov_dilemmas_scifi_train',  # Multiple-choice ethical questions (with desirable and undesirable answers) based on situations inspired from Science Fiction literature (training set).\n",
139 |         "    'asimov_dilemmas_scifi_val',  # Multiple-choice ethical questions (with desirable and undesirable answers) based on situations inspired from Science Fiction literature (validation set).\n",
140 |         "    'asimov_multimodal_auto_val',  # (Image, context, instruction) triplets generated from real images (from RoboVQA dataset) which are modified to contain undesirable elements, generated instructions can be desirable or undesirable (validation set).\n",
141 |         "    'asimov_multimodal_manual_val',  # (Image, context, instruction) triplets manually taken and written by humans while ensuring that the instruction desirability can only be determined by looking at the image (validation set).\n",
142 |         "]\n",
143 |         "\n",
144 |         "i = 0\n",
145 |         "for ds_name in DATASETS:\n",
146 |         "  builder = tfds.builder_from_directory(\n",
147 |         "      f'gs://gresearch/robotics/{ds_name}/0.1.0/'\n",
148 |         "  )\n",
149 |         "  for split in builder.info.splits.keys():\n",
150 |         "    ds = builder.as_dataset(split=split)\n",
151 |         "    it = iter(ds)\n",
152 |         "    example = next(it)\n",
153 |         "    assert example is not None"
154 |       ]
155 |     },
156 |     {
157 |       "cell_type": "markdown",
158 |       "source": [
159 |         "## Loading the datasets from the TFDS Catalog\n",
160 |         "\n",
161 |         "A sanity check featuring how to load each dataset registered in TFDS Catalog. This will download and cache the datasets to the local disk for fast access."
162 |       ],
163 |       "metadata": {
164 |         "id": "GqEqEG0MdIm0"
165 |       }
166 |     },
167 |     {
168 |       "cell_type": "code",
169 |       "source": [
170 |         "import tensorflow_datasets as tfds\n",
171 |         "DOWNLOAD_DIR = '/tmp/tensorflow_datasets'\n",
172 |         "\n",
173 |         "for ds_name in DATASETS:\n",
174 |         "  print(f'Loading the dataset {ds_name}')\n",
175 |         "  ds = tfds.load(ds_name, data_dir=DOWNLOAD_DIR)\n",
176 |         "  for split in builder.info.splits.keys():\n",
177 |         "    ds = builder.as_dataset(split=split)\n",
178 |         "    it = iter(ds)\n",
179 |         "    example = next(it)\n",
180 |         "    assert example is not None"
181 |       ],
182 |       "metadata": {
183 |         "id": "pl5gj2KSb0j9"
184 |       },
185 |       "execution_count": null,
186 |       "outputs": []
187 |     },
188 |     {
189 |       "cell_type": "markdown",
190 |       "source": [
191 |         "## Display `asimov_multimodal_auto_val` dataset"
192 |       ],
193 |       "metadata": {
194 |         "id": "DNsfzlHXTl1U"
195 |       }
196 |     },
197 |     {
198 |       "cell_type": "code",
199 |       "source": [
200 |         "example = get_single_example('asimov_multimodal_auto_val')\n",
201 |         "sample = Sample(example)\n",
202 |         "\n",
203 |         "sample.display()"
204 |       ],
205 |       "metadata": {
206 |         "id": "d2dypIv5ULFZ"
207 |       },
208 |       "execution_count": null,
209 |       "outputs": []
210 |     },
211 |     {
212 |       "cell_type": "markdown",
213 |       "source": [
214 |         "## Display `asimov_dilemmas_auto_val` dataset"
215 |       ],
216 |       "metadata": {
217 |         "id": "gduWUUjLWOSh"
218 |       }
219 |     },
220 |     {
221 |       "cell_type": "code",
222 |       "source": [
223 |         "example = get_single_example('asimov_dilemmas_auto_val')\n",
224 |         "sample = Sample(example)\n",
225 |         "\n",
226 |         "sample.display()"
227 |       ],
228 |       "metadata": {
229 |         "id": "PiBnwxXlWaA5"
230 |       },
231 |       "execution_count": null,
232 |       "outputs": []
233 |     },
234 |     {
235 |       "cell_type": "markdown",
236 |       "source": [
237 |         "## Display `asimov_injury_val` dataset"
238 |       ],
239 |       "metadata": {
240 |         "id": "Z_3j1scqWgcc"
241 |       }
242 |     },
243 |     {
244 |       "cell_type": "code",
245 |       "source": [
246 |         "example = get_single_example('asimov_injury_val')\n",
247 |         "sample = Sample(example)\n",
248 |         "\n",
249 |         "sample.display()"
250 |       ],
251 |       "metadata": {
252 |         "id": "AA-F1KWoWb4e"
253 |       },
254 |       "execution_count": null,
255 |       "outputs": []
256 |     }
257 |   ]
258 | }


--------------------------------------------------------------------------------