├── README.md └── ASIMOV_Datasets.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # code -------------------------------------------------------------------------------- /ASIMOV_Datasets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "toc_visible": true 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "source": [ 21 | "# ASIMOV datasets\n", 22 | "https://asimov-benchmark.github.io/" 23 | ], 24 | "metadata": { 25 | "id": "UtRnuvdjnqMu" 26 | } 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "source": [ 31 | "## Display Code" 32 | ], 33 | "metadata": { 34 | "id": "8qpjnsRInaLB" 35 | } 36 | }, 37 | { 38 | "cell_type": "code", 39 | "source": [ 40 | "!pip3 install mediapy\n", 41 | "!pip install tfds-nightly # to get most up-to-date registered datasets\n", 42 | "!pip install apache_beam" 43 | ], 44 | "metadata": { 45 | "id": "kfRN3Q_lS7M9" 46 | }, 47 | "execution_count": null, 48 | "outputs": [] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "source": [ 53 | "from IPython.display import display\n", 54 | "from IPython.display import Markdown\n", 55 | "import tensorflow as tf\n", 56 | "\n", 57 | "print_fn = lambda x: display(Markdown(x))\n", 58 | "\n", 59 | "try:\n", 60 | " import mediapy as mpy\n", 61 | "except ModuleNotFoundError:\n", 62 | " print('Not displaying images.')\n", 63 | " mpy = None\n", 64 | "\n", 65 | "\n", 66 | "class Sample():\n", 67 | " def __init__(self, example, display_one_instruction=True):\n", 68 | " self.example = example\n", 69 | " self.display_one_instruction = display_one_instruction\n", 70 | "\n", 71 | " def display(self):\n", 72 | " newline = '
'\n", 73 | " for k, v in self.example.items():\n", 74 | " if 'image' in k:\n", 75 | " print_fn(f'**{k}**:')\n", 76 | " print_fn(f'image of size {v.numpy().shape}')\n", 77 | " if mpy:\n", 78 | " mpy.show_image(v.numpy())\n", 79 | " elif k == 'instructions':\n", 80 | " for i in range(len(v['instruction'])):\n", 81 | " sample_dict = {}\n", 82 | " for ik, iv in v.items():\n", 83 | " sample_dict[ik] = iv[i]\n", 84 | " sample = Sample(sample_dict)\n", 85 | " print_fn('---')\n", 86 | " print_fn(f'## Sample Entry {i+1}{newline}')\n", 87 | " sample.display()\n", 88 | " if self.display_one_instruction:\n", 89 | " break\n", 90 | " else:\n", 91 | " if isinstance(v, tf.Tensor) and v.dtype == tf.string:\n", 92 | " v = v.numpy()\n", 93 | " if isinstance(v, bytes):\n", 94 | " v = v.decode('utf-8')\n", 95 | " print_fn(f'**{k}**: {v}{newline}{newline}')\n", 96 | "\n", 97 | "def get_single_example(dataset_name: str):\n", 98 | " builder = tfds.builder_from_directory(\n", 99 | " f'gs://gresearch/robotics/{dataset_name}/0.1.0/'\n", 100 | " )\n", 101 | " any_split = list(builder.info.splits.keys())[0]\n", 102 | " ds = builder.as_dataset(split=any_split)\n", 103 | " it = iter(ds)\n", 104 | " example = next(it)\n", 105 | " return example" 106 | ], 107 | "metadata": { 108 | "id": "6UCJ7QmWIORW" 109 | }, 110 | "execution_count": null, 111 | "outputs": [] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "source": [ 116 | "## Loading the datasets from the GCS bucket\n", 117 | "\n", 118 | "A sanity check featuring how to load each dataset from GCS bucket." 119 | ], 120 | "metadata": { 121 | "id": "Iu3paI2udOyn" 122 | } 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "id": "oqr1iUQF9EE-" 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "import tensorflow as tf\n", 133 | "import tensorflow_datasets as tfds\n", 134 | "\n", 135 | "DATASETS = [\n", 136 | " 'asimov_injury_val', # Situations generated from real hospital injury reports (validation set).\n", 137 | " 'asimov_dilemmas_auto_val', # Binary dilemma questions generated from counterfactual situations used to auto-amend generated constitutions (validation set).\n", 138 | " 'asimov_dilemmas_scifi_train', # Multiple-choice ethical questions (with desirable and undesirable answers) based on situations inspired from Science Fiction literature (training set).\n", 139 | " 'asimov_dilemmas_scifi_val', # Multiple-choice ethical questions (with desirable and undesirable answers) based on situations inspired from Science Fiction literature (validation set).\n", 140 | " 'asimov_multimodal_auto_val', # (Image, context, instruction) triplets generated from real images (from RoboVQA dataset) which are modified to contain undesirable elements, generated instructions can be desirable or undesirable (validation set).\n", 141 | " 'asimov_multimodal_manual_val', # (Image, context, instruction) triplets manually taken and written by humans while ensuring that the instruction desirability can only be determined by looking at the image (validation set).\n", 142 | "]\n", 143 | "\n", 144 | "i = 0\n", 145 | "for ds_name in DATASETS:\n", 146 | " builder = tfds.builder_from_directory(\n", 147 | " f'gs://gresearch/robotics/{ds_name}/0.1.0/'\n", 148 | " )\n", 149 | " for split in builder.info.splits.keys():\n", 150 | " ds = builder.as_dataset(split=split)\n", 151 | " it = iter(ds)\n", 152 | " example = next(it)\n", 153 | " assert example is not None" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "source": [ 159 | "## Loading the datasets from the TFDS Catalog\n", 160 | "\n", 161 | "A sanity check featuring how to load each dataset registered in TFDS Catalog. This will download and cache the datasets to the local disk for fast access." 162 | ], 163 | "metadata": { 164 | "id": "GqEqEG0MdIm0" 165 | } 166 | }, 167 | { 168 | "cell_type": "code", 169 | "source": [ 170 | "import tensorflow_datasets as tfds\n", 171 | "DOWNLOAD_DIR = '/tmp/tensorflow_datasets'\n", 172 | "\n", 173 | "for ds_name in DATASETS:\n", 174 | " print(f'Loading the dataset {ds_name}')\n", 175 | " ds = tfds.load(ds_name, data_dir=DOWNLOAD_DIR)\n", 176 | " for split in builder.info.splits.keys():\n", 177 | " ds = builder.as_dataset(split=split)\n", 178 | " it = iter(ds)\n", 179 | " example = next(it)\n", 180 | " assert example is not None" 181 | ], 182 | "metadata": { 183 | "id": "pl5gj2KSb0j9" 184 | }, 185 | "execution_count": null, 186 | "outputs": [] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "source": [ 191 | "## Display `asimov_multimodal_auto_val` dataset" 192 | ], 193 | "metadata": { 194 | "id": "DNsfzlHXTl1U" 195 | } 196 | }, 197 | { 198 | "cell_type": "code", 199 | "source": [ 200 | "example = get_single_example('asimov_multimodal_auto_val')\n", 201 | "sample = Sample(example)\n", 202 | "\n", 203 | "sample.display()" 204 | ], 205 | "metadata": { 206 | "id": "d2dypIv5ULFZ" 207 | }, 208 | "execution_count": null, 209 | "outputs": [] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "source": [ 214 | "## Display `asimov_dilemmas_auto_val` dataset" 215 | ], 216 | "metadata": { 217 | "id": "gduWUUjLWOSh" 218 | } 219 | }, 220 | { 221 | "cell_type": "code", 222 | "source": [ 223 | "example = get_single_example('asimov_dilemmas_auto_val')\n", 224 | "sample = Sample(example)\n", 225 | "\n", 226 | "sample.display()" 227 | ], 228 | "metadata": { 229 | "id": "PiBnwxXlWaA5" 230 | }, 231 | "execution_count": null, 232 | "outputs": [] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "source": [ 237 | "## Display `asimov_injury_val` dataset" 238 | ], 239 | "metadata": { 240 | "id": "Z_3j1scqWgcc" 241 | } 242 | }, 243 | { 244 | "cell_type": "code", 245 | "source": [ 246 | "example = get_single_example('asimov_injury_val')\n", 247 | "sample = Sample(example)\n", 248 | "\n", 249 | "sample.display()" 250 | ], 251 | "metadata": { 252 | "id": "AA-F1KWoWb4e" 253 | }, 254 | "execution_count": null, 255 | "outputs": [] 256 | } 257 | ] 258 | } --------------------------------------------------------------------------------