├── LICENSE ├── README.md ├── categories.txt ├── examples ├── binary_file_parser.py └── nodejs │ ├── .gitignore │ ├── binary-parser.js │ ├── ndjson.md │ ├── package.json │ └── simplified-parser.js └── preview.jpg /LICENSE: -------------------------------------------------------------------------------- 1 | This data made available by Google, Inc. under the Creative Commons Attribution 4.0 International license. 2 | https://creativecommons.org/licenses/by/4.0/ 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Quick, Draw! Dataset 2 | ![preview](preview.jpg) 3 | 4 | The Quick Draw Dataset is a collection of 50 million drawings across [345 categories](categories.txt), contributed by players of the game [Quick, Draw!](https://quickdraw.withgoogle.com). The drawings were captured as timestamped vectors, tagged with metadata including what the player was asked to draw and in which country the player was located. You can browse the recognized drawings on [quickdraw.withgoogle.com/data](https://quickdraw.withgoogle.com/data). 5 | 6 | We're sharing them here for developers, researchers, and artists to explore, study, and learn from. If you create something with this dataset, please let us know [by e-mail](mailto:quickdraw-support@google.com) or at [A.I. Experiments](https://aiexperiments.withgoogle.com/submit). 7 | 8 | We have also released a tutorial and model for training your own drawing classifier on [tensorflow.org](https://github.com/tensorflow/docs/blob/master/site/en/r1/tutorials/sequences/recurrent_quickdraw.md). 9 | 10 | Please keep in mind that while this collection of drawings was individually moderated, it may still contain inappropriate content. 11 | 12 | ## Content 13 | - [The raw moderated dataset](#the-raw-moderated-dataset) 14 | - [Preprocessed dataset](#preprocessed-dataset) 15 | - [Get the data](#get-the-data) 16 | - [Projects using the dataset](#projects-using-the-dataset) 17 | - [Changes](#changes) 18 | - [License](#license) 19 | 20 | 21 | ## The raw moderated dataset 22 | The raw data is available as [`ndjson`](https://github.com/ndjson) files seperated by category, in the following format: 23 | 24 | | Key | Type | Description | 25 | | ------------ | -----------------------| -------------------------------------------- | 26 | | key_id | 64-bit unsigned integer| A unique identifier across all drawings. | 27 | | word | string | Category the player was prompted to draw. | 28 | | recognized | boolean | Whether the word was recognized by the game. | 29 | | timestamp | datetime | When the drawing was created. | 30 | | countrycode | string | A two letter country code ([ISO 3166-1 alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2)) of where the player was located. | 31 | | drawing | string | A JSON array representing the vector drawing | 32 | 33 | 34 | Each line contains one drawing. Here's an example of a single drawing: 35 | 36 | ```javascript 37 |  { 38 | "key_id":"5891796615823360", 39 | "word":"nose", 40 | "countrycode":"AE", 41 | "timestamp":"2017-03-01 20:41:36.70725 UTC", 42 | "recognized":true, 43 | "drawing":[[[129,128,129,129,130,130,131,132,132,133,133,133,133,...]]] 44 | } 45 | ``` 46 | 47 | The format of the drawing array is as following: 48 | 49 | ```javascript 50 | [ 51 | [ // First stroke 52 | [x0, x1, x2, x3, ...], 53 | [y0, y1, y2, y3, ...], 54 | [t0, t1, t2, t3, ...] 55 | ], 56 | [ // Second stroke 57 | [x0, x1, x2, x3, ...], 58 | [y0, y1, y2, y3, ...], 59 | [t0, t1, t2, t3, ...] 60 | ], 61 | ... // Additional strokes 62 | ] 63 | ``` 64 | 65 | Where `x` and `y` are the pixel coordinates, and `t` is the time in milliseconds since the first point. `x` and `y` are real-valued while `t` is an integer. The raw drawings can have vastly different bounding boxes and number of points due to the different devices used for display and input. 66 | 67 | ## Preprocessed dataset 68 | We've preprocessed and split the dataset into different files and formats to make it faster and easier to download and explore. 69 | 70 | #### Simplified Drawing files (`.`) 71 | We've simplified the vectors, removed the timing information, and positioned and scaled the data into a 256x256 region. The data is exported in [`ndjson`](https://github.com/ndjson) format with the same metadata as the raw format. The simplification process was: 72 | 73 | 1. Align the drawing to the top-left corner, to have minimum values of 0. 74 | 2. Uniformly scale the drawing, to have a maximum value of 255. 75 | 3. Resample all strokes with a 1 pixel spacing. 76 | 4. Simplify all strokes using the [Ramer–Douglas–Peucker algorithm](https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm) with an epsilon value of 2.0. 77 | 78 | There is an example in [examples/nodejs/simplified-parser.js](examples/nodejs/simplified-parser.js) showing how to read ndjson files in NodeJS. 79 | Additionally, the [examples/nodejs/ndjson.md](examples/nodejs/ndjson.md) document details a set of command-line tools that can help explore subsets of these quite large files. 80 | 81 | #### Binary files (`.bin`) 82 | The simplified drawings and metadata are also available in a custom binary format for efficient compression and loading. 83 | 84 | There is an example in [examples/binary_file_parser.py](examples/binary_file_parser.py) showing how to load the binary files in Python. 85 | There is also an example in [examples/nodejs/binary-parser.js](examples/nodejs/binary-parser.js) showing how to read the binary files in NodeJS. 86 | 87 | #### Numpy bitmaps (`.npy`) 88 | All the simplified drawings have been rendered into a 28x28 grayscale bitmap in numpy `.npy` format. The files can be loaded with [`np.load()`](https://docs.scipy.org/doc/numpy-1.12.0/reference/generated/numpy.load.html). These images were generated from the simplified data, but are aligned to the center of the drawing's bounding box rather than the top-left corner. [See here for code snippet used for generation](https://github.com/googlecreativelab/quickdraw-dataset/issues/19#issuecomment-402247262). 89 | 90 | ## Get the data 91 | The dataset is available on Google Cloud Storage as [`ndjson`](https://github.com/ndjson) files seperated by category. See the list of files in [Cloud 92 | ](https://console.cloud.google.com/storage/browser/quickdraw_dataset/), or read more about [accessing public datasets](https://cloud.google.com/storage/docs/access-public-data) using other methods. As an example, to easily download all simplified drawings, one way is to run the command `gsutil -m cp 'gs://quickdraw_dataset/full/simplified/*.ndjson' .` 93 | 94 | #### Full dataset seperated by categories 95 | - [Raw files](https://console.cloud.google.com/storage/browser/quickdraw_dataset/full/raw) (`.ndjson`) 96 | - [Simplified drawings files](https://console.cloud.google.com/storage/browser/quickdraw_dataset/full/simplified) (`.ndjson`) 97 | - [Binary files](https://console.cloud.google.com/storage/browser/quickdraw_dataset/full/binary) (`.bin`) 98 | - [Numpy bitmap files](https://console.cloud.google.com/storage/browser/quickdraw_dataset/full/numpy_bitmap) (`.npy`) 99 | 100 | #### Sketch-RNN QuickDraw Dataset 101 | This data is also used for training the [Sketch-RNN](https://arxiv.org/abs/1704.03477) model. An open source, TensorFlow implementation of this model is available in the [Magenta Project](https://magenta.tensorflow.org/sketch_rnn), (link to GitHub [repo](https://github.com/tensorflow/magenta/tree/master/magenta/models/sketch_rnn)). You can also read more about this model in this Google Research [blog post](https://research.googleblog.com/2017/04/teaching-machines-to-draw.html). The data is stored in compressed `.npz` files, in a format suitable for inputs into a recurrent neural network. 102 | 103 | In this dataset, 75K samples (70K Training, 2.5K Validation, 2.5K Test) has been randomly selected from each category, processed with [RDP](https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm) line simplification with an `epsilon` parameter of 2.0. Each category will be stored in its own `.npz` file, for example, `cat.npz`. 104 | 105 | We have also provided the full data for each category, if you want to use more than 70K training examples. These are stored with the `.full.npz` extensions. 106 | 107 | - [Numpy .npz files](https://console.cloud.google.com/storage/browser/quickdraw_dataset/sketchrnn) 108 | 109 | *Note:* For Python3, loading the `npz` files using `np.load(data_filepath, encoding='latin1', allow_pickle=True)` 110 | 111 | Instructions for converting Raw `ndjson` files to this `npz` format is available in this [notebook](https://github.com/hardmaru/quickdraw-ndjson-to-npz). 112 | 113 | ## Projects using the dataset 114 | Here are some projects and experiments that are using or featuring the dataset in interesting ways. Got something to add? [Let us know!](mailto:quickdraw-support@google.com) 115 | 116 | *Creative and artistic projects* 117 | 118 | - [Letter collages](http://frauzufall.de/en/2017/google-quick-draw/) by [Deborah Schmidt](http://frauzufall.de/) 119 | - [Face tracking experiment](https://www.instagram.com/p/BUU8TuQD6_v/) by [Neil Mendoza](http://www.neilmendoza.com/) 120 | - [Faces of Humanity](http://project.laboiteatortue.com/facesofhumanity/) by [Tortue](www.laboiteatortue.com) 121 | - [Infinite QuickDraw](https://kynd.github.io/infinite_quickdraw/) by [kynd.info](http://kynd.info) 122 | - [Misfire.io](http://misfire.io/) by Matthew Collyer 123 | - [Draw This](http://danmacnish.com/2018/07/01/draw-this/) by [Dan Macnish](http://danmacnish.com/) 124 | - [Scribbling Speech](http://xinyue.de/scribbling-speech.html) by [Xinyue Yang](http://xinyue.de/) 125 | - illustrAItion by [Ling Chen](https://github.com/lingchen42/illustrAItion) 126 | - [Dreaming of Electric Sheep](https://medium.com/@libreai/dreaming-of-electric-sheep-d1aca32545dc) by [ 127 | Dr. Ernesto Diaz-Aviles](http://ernesto.diazaviles.com/) 128 | 129 | *Data analyses* 130 | 131 | - [How do you draw a circle?](https://qz.com/994486/the-way-you-draw-circles-says-a-lot-about-you/) by [Quartz](https://qz.com/) 132 | - [Forma Fluens](http://formafluens.io/) by [Mauro Martino](http://www.mamartino.com/), [Hendrik Strobelt](http://hendrik.strobelt.com/) and [Owen Cornec](http://www.byowen.com/) 133 | - [How Long Does it Take to (Quick) Draw a Dog?](http://vallandingham.me/quickdraw/) by [Jim Vallandingham](http://vallandingham.me/) 134 | - [Finding bad flamingo drawings with recurrent neural networks](http://colinmorris.github.io/blog/bad_flamingos) by [Colin Morris](http://colinmorris.github.io/) 135 | - [Facets Dive x Quick, Draw!](https://pair-code.github.io/facets/quickdraw.html) by [People + AI Research Initiative (PAIR), Google](https://ai.google/pair) 136 | - [Exploring and Visualizing an Open Global Dataset](https://research.googleblog.com/2017/08/exploring-and-visualizing-open-global.html) by Google Research 137 | - [Machine Learning for Visualization](https://medium.com/@enjalot/machine-learning-for-visualization-927a9dff1cab) - Talk / article by Ian Johnson 138 | 139 | *Papers* 140 | - [A Neural Representation of Sketch Drawings](https://arxiv.org/pdf/1704.03477.pdf) by [David Ha](https://scholar.google.com/citations?user=J1j92GsxVUMC&hl=en), [Douglas Eck](https://scholar.google.com/citations?user=bLb3VdIAAAAJ&hl=en), ICLR 2018. [code](https://github.com/tensorflow/magenta/tree/master/magenta/models/sketch_rnn) 141 | - [Sketchmate: Deep hashing for million-scale human sketch retrieval](http://openaccess.thecvf.com/content_cvpr_2018/papers/Xu_SketchMate_Deep_Hashing_CVPR_2018_paper.pdf) by [Peng Xu](http://www.pengxu.net/) et al., CVPR 2018. 142 | - [Multi-graph transformer for free-hand sketch recognition](https://arxiv.org/pdf/1912.11258.pdf) by [Peng Xu](http://www.pengxu.net/), [Chaitanya K Joshi](https://chaitjo.github.io/), [Xavier Bresson](https://www.ntu.edu.sg/home/xbresson/), ArXiv 2019. [code](https://github.com/PengBoXiangShang/multigraph_transformer) 143 | - [Deep Self-Supervised Representation Learning for Free-Hand Sketch](https://arxiv.org/pdf/2002.00867.pdf) by [Peng Xu](http://www.pengxu.net/) et al., ArXiv 2020. [code](https://github.com/zzz1515151/self-supervised_learning_sketch) 144 | - [SketchTransfer: A Challenging New Task for Exploring Detail-Invariance and the Abstractions Learned by Deep Networks](https://arxiv.org/pdf/1912.11570.pdf) by [Alex Lamb](https://sites.google.com/view/alexmlamb), [Sherjil Ozair](https://sherjilozair.github.io/), [Vikas Verma](https://scholar.google.com/citations?user=wo_M4uQAAAAJ&hl=en), [David Ha](https://scholar.google.com/citations?user=J1j92GsxVUMC&hl=en), WACV 2020. 145 | - [Deep Learning for Free-Hand Sketch: A Survey](https://arxiv.org/pdf/2001.02600.pdf) by [Peng Xu](http://www.pengxu.net/), ArXiv 2020. 146 | - [A Novel Sketch Recognition Model based on Convolutional Neural Networks](https://ieeexplore.ieee.org/document/9152911) by [Abdullah Talha Kabakus](https://www.linkedin.com/in/talhakabakus), 2nd International Congress on Human-Computer Interaction, Optimization and Robotic Applications, pp. 101-106, 2020. 147 | 148 | *Guides & Tutorials* 149 | - [TensorFlow tutorial for drawing classification](https://github.com/tensorflow/docs/blob/master/site/en/r1/tutorials/sequences/recurrent_quickdraw.md) 150 | - [Train a model in tf.keras with Colab, and run it in the browser with TensorFlow.js](https://medium.com/tensorflow/train-on-google-colab-and-run-on-the-browser-a-case-study-8a45f9b1474e) by Zaid Alyafeai 151 | 152 | *Code and tools* 153 | - [Quick, Draw! Polymer Component & Data API](https://github.com/googlecreativelab/quickdraw-component) by Nick Jonas 154 | - [Quick, Draw for Processing](https://github.com/codybenlewis/Quick-Draw-for-Processing) by [Cody Ben Lewis](https://twitter.com/CodyBenLewis) 155 | - [Quick, Draw! prediction model](https://github.com/keisukeirie/quickdraw_prediction_model) by Keisuke Irie 156 | - [Random sample tool](http://learning.statistics-is-awesome.org/draw/) by [Learning statistics is awesome](http://learning.statistics-is-awesome.org/) 157 | - [SVG rendering in d3.js example](https://bl.ocks.org/enjalot/a2b28f0ed18b891f9fb70910f1b8886d) by [Ian Johnson](http://enja.org/) (read more about the process [here](https://gist.github.com/enjalot/54c4342eb7527ea523884dbfa52d174b)) 158 | - [Sketch-RNN Classification](https://github.com/payalbajaj/sketch_rnn_classification) by Payal Bajaj 159 | - [quickdraw.js](https://github.com/wagenaartje/quickdraw.js) by Thomas Wagenaar 160 | - [~ Doodler ~](https://github.com/krishnasriSomepalli/cs50-project/) by [ 161 | Krishna Sri Somepalli](https://krishnasrisomepalli.github.io/) 162 | - [quickdraw Python API](http://quickdraw.readthedocs.io) by [Martin O'Hanlon](https://github.com/martinohanlon) 163 | - [RealTime QuickDraw](https://github.com/akshaybahadur21/QuickDraw) by [Akshay Bahadur](http://akshaybahadur.com/) 164 | - [DataFlow processing](https://github.com/gxercavins/dataflow-samples/tree/master/quick-draw) by Guillem Xercavins 165 | - [QuickDrawGH Rhino Plugin](https://www.food4rhino.com/app/quickdrawgh) by [James Dalessandro](https://github.com/DalessandroJ) 166 | - [QuickDrawBattle](https://andri.io/quickdrawbattle/) by [Andri Soone](https://github.com/ndri) 167 | 168 | 169 | ## Changes 170 | 171 | May 25, 2017: Updated Sketch-RNN QuickDraw dataset, created `.full.npz` complementary sets. 172 | 173 | ## License 174 | This data made available by Google, Inc. under the [Creative Commons Attribution 4.0 International license.](https://creativecommons.org/licenses/by/4.0/) 175 | 176 | ## Dataset Metadata 177 | The following table is necessary for this dataset to be indexed by search 178 | engines such as Google Dataset Search. 179 |
180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 211 | 212 | 213 | 214 | 232 | 233 | 234 | 235 | 253 | 254 |
propertyvalue
nameThe Quick, Draw! Dataset
alternateNameQuick Draw Dataset
alternateNamequickdraw-dataset
url
sameAshttps://github.com/googlecreativelab/quickdraw-dataset
descriptionThe Quick Draw Dataset is a collection of 50 million drawings across 345 categories, contributed by players of the game "Quick, Draw!". The drawings were captured as timestamped vectors, tagged with metadata including what the player was asked to draw and in which country the player was located.\n 208 | \n 209 | Example drawings: 210 | ![preview](https://raw.githubusercontent.com/googlecreativelab/quickdraw-dataset/master/preview.jpg)
provider 215 |
216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 |
propertyvalue
nameGoogle
sameAshttps://en.wikipedia.org/wiki/Google
230 |
231 |
license 236 |
237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 |
propertyvalue
nameCC BY 4.0
url
251 |
252 |
255 |
256 | -------------------------------------------------------------------------------- /categories.txt: -------------------------------------------------------------------------------- 1 | aircraft carrier 2 | airplane 3 | alarm clock 4 | ambulance 5 | angel 6 | animal migration 7 | ant 8 | anvil 9 | apple 10 | arm 11 | asparagus 12 | axe 13 | backpack 14 | banana 15 | bandage 16 | barn 17 | baseball 18 | baseball bat 19 | basket 20 | basketball 21 | bat 22 | bathtub 23 | beach 24 | bear 25 | beard 26 | bed 27 | bee 28 | belt 29 | bench 30 | bicycle 31 | binoculars 32 | bird 33 | birthday cake 34 | blackberry 35 | blueberry 36 | book 37 | boomerang 38 | bottlecap 39 | bowtie 40 | bracelet 41 | brain 42 | bread 43 | bridge 44 | broccoli 45 | broom 46 | bucket 47 | bulldozer 48 | bus 49 | bush 50 | butterfly 51 | cactus 52 | cake 53 | calculator 54 | calendar 55 | camel 56 | camera 57 | camouflage 58 | campfire 59 | candle 60 | cannon 61 | canoe 62 | car 63 | carrot 64 | castle 65 | cat 66 | ceiling fan 67 | cello 68 | cell phone 69 | chair 70 | chandelier 71 | church 72 | circle 73 | clarinet 74 | clock 75 | cloud 76 | coffee cup 77 | compass 78 | computer 79 | cookie 80 | cooler 81 | couch 82 | cow 83 | crab 84 | crayon 85 | crocodile 86 | crown 87 | cruise ship 88 | cup 89 | diamond 90 | dishwasher 91 | diving board 92 | dog 93 | dolphin 94 | donut 95 | door 96 | dragon 97 | dresser 98 | drill 99 | drums 100 | duck 101 | dumbbell 102 | ear 103 | elbow 104 | elephant 105 | envelope 106 | eraser 107 | eye 108 | eyeglasses 109 | face 110 | fan 111 | feather 112 | fence 113 | finger 114 | fire hydrant 115 | fireplace 116 | firetruck 117 | fish 118 | flamingo 119 | flashlight 120 | flip flops 121 | floor lamp 122 | flower 123 | flying saucer 124 | foot 125 | fork 126 | frog 127 | frying pan 128 | garden 129 | garden hose 130 | giraffe 131 | goatee 132 | golf club 133 | grapes 134 | grass 135 | guitar 136 | hamburger 137 | hammer 138 | hand 139 | harp 140 | hat 141 | headphones 142 | hedgehog 143 | helicopter 144 | helmet 145 | hexagon 146 | hockey puck 147 | hockey stick 148 | horse 149 | hospital 150 | hot air balloon 151 | hot dog 152 | hot tub 153 | hourglass 154 | house 155 | house plant 156 | hurricane 157 | ice cream 158 | jacket 159 | jail 160 | kangaroo 161 | key 162 | keyboard 163 | knee 164 | knife 165 | ladder 166 | lantern 167 | laptop 168 | leaf 169 | leg 170 | light bulb 171 | lighter 172 | lighthouse 173 | lightning 174 | line 175 | lion 176 | lipstick 177 | lobster 178 | lollipop 179 | mailbox 180 | map 181 | marker 182 | matches 183 | megaphone 184 | mermaid 185 | microphone 186 | microwave 187 | monkey 188 | moon 189 | mosquito 190 | motorbike 191 | mountain 192 | mouse 193 | moustache 194 | mouth 195 | mug 196 | mushroom 197 | nail 198 | necklace 199 | nose 200 | ocean 201 | octagon 202 | octopus 203 | onion 204 | oven 205 | owl 206 | paintbrush 207 | paint can 208 | palm tree 209 | panda 210 | pants 211 | paper clip 212 | parachute 213 | parrot 214 | passport 215 | peanut 216 | pear 217 | peas 218 | pencil 219 | penguin 220 | piano 221 | pickup truck 222 | picture frame 223 | pig 224 | pillow 225 | pineapple 226 | pizza 227 | pliers 228 | police car 229 | pond 230 | pool 231 | popsicle 232 | postcard 233 | potato 234 | power outlet 235 | purse 236 | rabbit 237 | raccoon 238 | radio 239 | rain 240 | rainbow 241 | rake 242 | remote control 243 | rhinoceros 244 | rifle 245 | river 246 | roller coaster 247 | rollerskates 248 | sailboat 249 | sandwich 250 | saw 251 | saxophone 252 | school bus 253 | scissors 254 | scorpion 255 | screwdriver 256 | sea turtle 257 | see saw 258 | shark 259 | sheep 260 | shoe 261 | shorts 262 | shovel 263 | sink 264 | skateboard 265 | skull 266 | skyscraper 267 | sleeping bag 268 | smiley face 269 | snail 270 | snake 271 | snorkel 272 | snowflake 273 | snowman 274 | soccer ball 275 | sock 276 | speedboat 277 | spider 278 | spoon 279 | spreadsheet 280 | square 281 | squiggle 282 | squirrel 283 | stairs 284 | star 285 | steak 286 | stereo 287 | stethoscope 288 | stitches 289 | stop sign 290 | stove 291 | strawberry 292 | streetlight 293 | string bean 294 | submarine 295 | suitcase 296 | sun 297 | swan 298 | sweater 299 | swing set 300 | sword 301 | syringe 302 | table 303 | teapot 304 | teddy-bear 305 | telephone 306 | television 307 | tennis racquet 308 | tent 309 | The Eiffel Tower 310 | The Great Wall of China 311 | The Mona Lisa 312 | tiger 313 | toaster 314 | toe 315 | toilet 316 | tooth 317 | toothbrush 318 | toothpaste 319 | tornado 320 | tractor 321 | traffic light 322 | train 323 | tree 324 | triangle 325 | trombone 326 | truck 327 | trumpet 328 | t-shirt 329 | umbrella 330 | underwear 331 | van 332 | vase 333 | violin 334 | washing machine 335 | watermelon 336 | waterslide 337 | whale 338 | wheel 339 | windmill 340 | wine bottle 341 | wine glass 342 | wristwatch 343 | yoga 344 | zebra 345 | zigzag 346 | -------------------------------------------------------------------------------- /examples/binary_file_parser.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import struct 16 | from struct import unpack 17 | 18 | 19 | def unpack_drawing(file_handle): 20 | key_id, = unpack('Q', file_handle.read(8)) 21 | country_code, = unpack('2s', file_handle.read(2)) 22 | recognized, = unpack('b', file_handle.read(1)) 23 | timestamp, = unpack('I', file_handle.read(4)) 24 | n_strokes, = unpack('H', file_handle.read(2)) 25 | image = [] 26 | for i in range(n_strokes): 27 | n_points, = unpack('H', file_handle.read(2)) 28 | fmt = str(n_points) + 'B' 29 | x = unpack(fmt, file_handle.read(n_points)) 30 | y = unpack(fmt, file_handle.read(n_points)) 31 | image.append((x, y)) 32 | 33 | return { 34 | 'key_id': key_id, 35 | 'country_code': country_code, 36 | 'recognized': recognized, 37 | 'timestamp': timestamp, 38 | 'image': image 39 | } 40 | 41 | 42 | def unpack_drawings(filename): 43 | with open(filename, 'rb') as f: 44 | while True: 45 | try: 46 | yield unpack_drawing(f) 47 | except struct.error: 48 | break 49 | 50 | 51 | for drawing in unpack_drawings('nose.bin'): 52 | # do something with the drawing 53 | print(drawing['country_code']) 54 | -------------------------------------------------------------------------------- /examples/nodejs/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /examples/nodejs/binary-parser.js: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 Google Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | https://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | /* 17 | Demonstration of parsing binary files from Quick, Draw! dataset with NodeJS. 18 | 19 | https://github.com/googlecreativelab/quickdraw-dataset 20 | https://quickdraw.withgoogle.com/data 21 | 22 | This demo assumes you've put the file "face.bin" into a folder called "data" 23 | in the same directory as this script. 24 | */ 25 | var fs = require('fs'); 26 | var Parser = require('binary-parser').Parser; 27 | var BigInteger = require('javascript-biginteger').BigInteger; 28 | 29 | var Drawing = Parser.start() 30 | .endianess('little') 31 | .array('key_id', { 32 | type: 'uint8', 33 | length: 8 34 | }) 35 | .string('countrycode', { length: 2, encoding: 'ascii' }) 36 | // .uint8('recognized') 37 | .bit1('recognized') 38 | .uint32le('timestamp') // unix timestamp in seconds 39 | .uint16le('n_strokes') 40 | .array('strokes', { 41 | type: Parser.start() 42 | .uint16le('n_points') 43 | .array('x', { 44 | type: 'uint8', 45 | length: 'n_points' 46 | }) 47 | .array('y', { 48 | type: 'uint8', 49 | length: 'n_points' 50 | }), 51 | length: 'n_strokes' 52 | }); 53 | 54 | function parseBinaryDrawings(fileName, callback) { 55 | fs.readFile(fileName, function(err, buffer) { 56 | var unpacked = Parser.start() 57 | .array('drawings', { 58 | type: Drawing, 59 | // length: 2 60 | readUntil: 'eof' 61 | }).parse(buffer); 62 | // console.log("unpacked", unpacked) 63 | var drawings = unpacked.drawings.map(function(d) { 64 | var ka = d.key_id; 65 | // the key is a long integer so we have to parse it specially 66 | var key = BigInteger(0); 67 | for (var i = 7; i >= 0; i--) { 68 | key = key.multiply(256); 69 | key = key.add(ka[i]); 70 | } 71 | var strokes = d.strokes.map(function(d,i) { return [ d.x, d.y ] }); 72 | return { 73 | 'key_id': key.toString(), 74 | 'countrycode': d.countrycode, 75 | 'recognized': !!d.recognized, //convert to boolean 76 | 'timestamp': d.timestamp * 1000, // turn it into milliseconds 77 | 'drawing': strokes 78 | } 79 | }) 80 | callback(null, drawings); 81 | }) 82 | } 83 | 84 | parseBinaryDrawings("data/face.bin", function(err, drawings) { 85 | if(err) return console.error(err); 86 | drawings.forEach(function(d) { 87 | // Do something with the drawing 88 | console.log(d.key_id, d.countrycode) 89 | }) 90 | console.log("# of drawings:", drawings.length) 91 | }) 92 | -------------------------------------------------------------------------------- /examples/nodejs/ndjson.md: -------------------------------------------------------------------------------- 1 | # Quick, Draw! ndjson data 2 | 3 | The [Quick, Draw! dataset](https://github.com/googlecreativelab/quickdraw-dataset) uses 4 | [ndjson](https://github.com/maxogden/ndjson) as one of the formats to store its millions of drawings. 5 | 6 | We can use the [ndjson-cli](https://github.com/mbostock/ndjson-cli) utility to quickly create interesting subsets of this dataset. 7 | 8 | The drawings (stroke data and associated metadata) are stored as one JSON object per line. e.g.: 9 | ```js 10 | { 11 | "key_id":"5891796615823360", 12 | "word":"nose", 13 | "countrycode":"AE", 14 | "timestamp":"2017-03-01 20:41:36.70725 UTC", 15 | "recognized":true, 16 | "drawing":[[[129,128,129,129,130,130,131,132,132,133,133,133,133,...]]] 17 | } 18 | ``` 19 | 20 | Each file represents all of the drawings for a given word. So, you can download the one you want. 21 | For this exploration we will focus on the [simplified drawings](https://pantheon.corp.google.com/storage/browser/quickdraw_dataset/full/simplified) 22 | because the files are about 10x smaller and the drawings look just as good. 23 | We do lose timing information available in the raw data, so feel free to explore that when you are comfortable navigating the data (the format is pretty much exactly the same besides the added timing array and more points in the stroke data.) 24 | 25 | # Let's explore the `face` collection! 26 | 27 | One nice thing that you can do with `.ndjson` files are to quickly peek at the data using some simple Unix commands: 28 | 29 | ```bash 30 | # look at the first 5 lines 31 | cat face.ndjson | head -n 5 32 | # look at the last 5 lines 33 | cat face.ndjson | tail -n 5 34 | ``` 35 | 36 | ## Filtering 37 | 38 | Now let's take our first subset of the data by filtering: 39 | ```bash 40 | # let's filter down to only the recognized drawings 41 | cat face.ndjson | ndjson-filter 'd.recognized == true' | head -n 5 42 | # How many recognized drawings are there? 43 | cat face.ndjson | ndjson-filter 'd.recognized == true' | wc -l 44 | # How about unrecognized? 45 | cat face.ndjson | ndjson-filter 'd.recognized == false' | wc -l 46 | 47 | # We can also filter down to a country we are interested in 48 | cat face.ndjson | ndjson-filter 'd.recognized == true && d.countrycode == "CA"' | wc -l 49 | ``` 50 | 51 | ## Sorting 52 | 53 | For sorting, you can make things easier by including d3. This means you'll need to `npm install d3` in the directory from which you are calling these commands. 54 | ```bash 55 | # sort by when the drawing was created 56 | cat face.ndjson | ndjson-sort -r d3 'd3.ascending(a.timestamp, b.timestamp)' | head -n 5 57 | 58 | # sort from the most complex drawings to the simplest (judged by how many strokes they use to draw) 59 | cat face.ndjson | ndjson-sort -r d3 'd3.descending(a.drawing.length, b.drawing.length)' | head -n 5 60 | ``` 61 | 62 | ## Saving to JSON 63 | If you want to save out a subset as a regular JSON file, you can use `ndjson-reduce`: 64 | ```bash 65 | # save to the file "canadian-faces.json" 66 | cat face.ndjson | ndjson-filter 'd.recognized == true && d.countrycode == "CA"' | ndjson-reduce > canadian-faces.json 67 | 68 | # You can combine these utilities to further filter down your data 69 | cat face.ndjson | ndjson-filter 'd.recognized == true && d.countrycode == "CA"' | head -n 1000 | ndjson-reduce > canadian-faces.json 70 | 71 | cat face.ndjson | ndjson-filter 'd.recognized == true && d.countrycode == "CA"' | ndjson-sort -r d3 'd3.descending(a.drawing.length, b.drawing.length)' | head -n 100 | ndjson-reduce > complex-faces.json 72 | ``` 73 | -------------------------------------------------------------------------------- /examples/nodejs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "quickdraw-node-demos", 3 | "version": "0.0.1", 4 | "description": "Sample code for parsing Quick, Draw! dataset in NodeJS", 5 | "main": "simplified-parser.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "Ian Johnson (enjalot@google.com)", 10 | "license": "Apache-2.0", 11 | "dependencies": { 12 | "binary-parser": "^1.1.5", 13 | "javascript-biginteger": "^0.9.2", 14 | "ndjson": "^1.5.0" 15 | }, 16 | "devDependencies": { 17 | "d3": "^4.9.1", 18 | "ndjson-cli": "^0.3.0" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/nodejs/simplified-parser.js: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 Google Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | https://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | /* 17 | Demonstration of parsing simplified ndjson files from Quick, Draw! dataset with node.js. 18 | Read in all of the simplified drawings into memory and log out some properties. 19 | 20 | https://github.com/googlecreativelab/quickdraw-dataset 21 | https://quickdraw.withgoogle.com/data 22 | 23 | This demo assumes you've put the file "face-simple.ndjson" into a folder called "data" 24 | in the same directory as this script. 25 | */ 26 | var fs = require('fs'); 27 | var ndjson = require('ndjson'); // npm install ndjson 28 | 29 | function parseSimplifiedDrawings(fileName, callback) { 30 | var drawings = []; 31 | var fileStream = fs.createReadStream(fileName) 32 | fileStream 33 | .pipe(ndjson.parse()) 34 | .on('data', function(obj) { 35 | drawings.push(obj) 36 | }) 37 | .on("error", callback) 38 | .on("end", function() { 39 | callback(null, drawings) 40 | }); 41 | } 42 | 43 | parseSimplifiedDrawings("data/face-simple.ndjson", function(err, drawings) { 44 | if(err) return console.error(err); 45 | drawings.forEach(function(d) { 46 | // Do something with the drawing 47 | console.log(d.key_id, d.countrycode); 48 | }) 49 | console.log("# of drawings:", drawings.length); 50 | }) 51 | -------------------------------------------------------------------------------- /preview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/googlecreativelab/quickdraw-dataset/5fe6c0a910b3732bc3db7639d3c9e7c287617f2f/preview.jpg --------------------------------------------------------------------------------