├── tensorflow_datasets ├── testing │ ├── test_data │ │ ├── fake_examples │ │ │ ├── mnist │ │ │ │ ├── test-label │ │ │ │ ├── train-label │ │ │ │ ├── test-image │ │ │ │ └── train-image │ │ │ ├── fashion_mnist │ │ │ │ ├── test-label │ │ │ │ ├── train-label │ │ │ │ ├── test-image │ │ │ │ └── train-image │ │ │ ├── bair_robot_pushing_small │ │ │ │ ├── README.md │ │ │ │ └── softmotion30_44k │ │ │ │ │ ├── test │ │ │ │ │ └── traj_0_to_255.tfrecords │ │ │ │ │ └── train │ │ │ │ │ └── traj_1792_to_2047.tfrecords │ │ │ ├── imdb_reviews │ │ │ │ ├── aclImdb │ │ │ │ │ ├── train │ │ │ │ │ │ ├── neg │ │ │ │ │ │ │ ├── 0_7.txt │ │ │ │ │ │ │ ├── 1_8.txt │ │ │ │ │ │ │ └── 2_9.txt │ │ │ │ │ │ └── pos │ │ │ │ │ │ │ ├── 0_7.txt │ │ │ │ │ │ │ └── 1_8.txt │ │ │ │ │ └── test │ │ │ │ │ │ ├── pos │ │ │ │ │ │ ├── 777_7.txt │ │ │ │ │ │ └── 778_7.txt │ │ │ │ │ │ └── neg │ │ │ │ │ │ ├── 888_7.txt │ │ │ │ │ │ └── 889_7.txt │ │ │ │ ├── aclImdb_v1.tar.gz │ │ │ │ └── README.md │ │ │ ├── wmt_translate_ende │ │ │ │ ├── task │ │ │ │ │ └── dev │ │ │ │ │ │ ├── newstest2014.en │ │ │ │ │ │ └── newstest2014.de │ │ │ │ ├── crawl │ │ │ │ │ ├── commoncrawl.de-en.en │ │ │ │ │ └── commoncrawl.de-en.de │ │ │ │ ├── validation │ │ │ │ │ └── dev │ │ │ │ │ │ ├── newstest2013.en │ │ │ │ │ │ └── newstest2013.de │ │ │ │ ├── europarl │ │ │ │ │ └── training │ │ │ │ │ │ ├── europarl-v7.de-en.de │ │ │ │ │ │ └── europarl-v7.de-en.en │ │ │ │ └── nc_v13 │ │ │ │ │ └── training-parallel-nc-v13 │ │ │ │ │ ├── news-commentary-v13.de-en.en │ │ │ │ │ └── news-commentary-v13.de-en.de │ │ │ ├── celeb_a │ │ │ │ ├── list_eval_partition.txt │ │ │ │ ├── img_align_celeba │ │ │ │ │ ├── 000001.jpg │ │ │ │ │ ├── 000002.jpg │ │ │ │ │ ├── 000003.jpg │ │ │ │ │ ├── 000004.jpg │ │ │ │ │ ├── 000005.jpg │ │ │ │ │ └── 000006.jpg │ │ │ │ ├── list_landmarks_align_celeba.txt │ │ │ │ └── list_attr_celeba.txt │ │ │ ├── cifar10 │ │ │ │ └── cifar-10-batches-bin │ │ │ │ │ ├── batches.meta.txt │ │ │ │ │ ├── data_batch_1.bin │ │ │ │ │ ├── data_batch_2.bin │ │ │ │ │ ├── data_batch_3.bin │ │ │ │ │ ├── data_batch_4.bin │ │ │ │ │ ├── data_batch_5.bin │ │ │ │ │ └── test_batch.bin │ │ │ ├── quickdraw_bitmap │ │ │ │ ├── angel.npy │ │ │ │ ├── airplane.npy │ │ │ │ └── ambulance.npy │ │ │ ├── svhn_cropped │ │ │ │ ├── test_32x32.mat │ │ │ │ ├── extra_32x32.mat │ │ │ │ ├── train_32x32.mat │ │ │ │ └── generate_data.py │ │ │ ├── lm1b │ │ │ │ └── 1-billion-word-language-modeling-benchmark-r13output │ │ │ │ │ ├── heldout-monolingual.tokenized.shuffled │ │ │ │ │ └── news.en.heldout-00000-of-00001 │ │ │ │ │ └── training-monolingual.tokenized.shuffled │ │ │ │ │ └── news.en-00000-of-00001 │ │ │ ├── starcraft_video │ │ │ │ ├── test.tfrecords │ │ │ │ ├── valid.tfrecords │ │ │ │ ├── 128_test.tfrecords │ │ │ │ ├── 128_valid.tfrecords │ │ │ │ ├── train_0.tfrecords │ │ │ │ ├── train_1.tfrecords │ │ │ │ ├── 128_train_0.tfrecords │ │ │ │ └── 128_train_1.tfrecords │ │ │ ├── lsun │ │ │ │ ├── classroom_val_lmdb │ │ │ │ │ ├── data.mdb │ │ │ │ │ └── lock.mdb │ │ │ │ └── classroom_train_lmdb │ │ │ │ │ ├── data.mdb │ │ │ │ │ └── lock.mdb │ │ │ ├── cifar100 │ │ │ │ └── cifar-100-binary │ │ │ │ │ ├── test.bin │ │ │ │ │ ├── train.bin │ │ │ │ │ ├── coarse_label_names.txt │ │ │ │ │ └── fine_label_names.txt │ │ │ ├── imagenet2012 │ │ │ │ ├── ILSVRC2012_img_val.tar │ │ │ │ └── ILSVRC2012_img_train.tar │ │ │ ├── open_images_v4 │ │ │ │ ├── s3-tar_test_sha2.tar │ │ │ │ ├── s3-tar_train_sha1_0.tar │ │ │ │ ├── s3-tar_train_sha1_1.tar │ │ │ │ ├── s3-tar_train_sha1_2.tar │ │ │ │ ├── s3-tar_train_sha1_3.tar │ │ │ │ ├── s3-tar_train_sha1_4.tar │ │ │ │ ├── s3-tar_train_sha1_5.tar │ │ │ │ ├── s3-tar_train_sha1_6.tar │ │ │ │ ├── s3-tar_train_sha1_7.tar │ │ │ │ ├── s3-tar_train_sha1_8.tar │ │ │ │ ├── s3-tar_train_sha1_9.tar │ │ │ │ ├── s3-tar_train_sha1_a.tar │ │ │ │ ├── s3-tar_train_sha1_b.tar │ │ │ │ ├── s3-tar_train_sha1_c.tar │ │ │ │ ├── s3-tar_train_sha1_d.tar │ │ │ │ ├── s3-tar_train_sha1_e.tar │ │ │ │ ├── s3-tar_train_sha1_f.tar │ │ │ │ ├── s3-tar_validation_sha3.tar │ │ │ │ └── class_descriptions.csv │ │ │ ├── image_folder_data │ │ │ │ ├── test │ │ │ │ │ ├── label1 │ │ │ │ │ │ ├── 000001.jpg │ │ │ │ │ │ └── 000006.jpg │ │ │ │ │ ├── label2 │ │ │ │ │ │ ├── 000002.jpg │ │ │ │ │ │ ├── 000003.jpg │ │ │ │ │ │ └── 000004.jpg │ │ │ │ │ └── label3 │ │ │ │ │ │ └── 000005.jpg │ │ │ │ └── train │ │ │ │ │ └── label2 │ │ │ │ │ ├── 000001.jpg │ │ │ │ │ └── 000006.jpg │ │ │ ├── diabetic_retinopathy_detection │ │ │ │ ├── test │ │ │ │ │ ├── 1_left.jpeg │ │ │ │ │ ├── 3_left.jpeg │ │ │ │ │ ├── 5_left.jpeg │ │ │ │ │ ├── 7_left.jpeg │ │ │ │ │ ├── 9_left.jpeg │ │ │ │ │ ├── 11_left.jpeg │ │ │ │ │ ├── 11_right.jpeg │ │ │ │ │ ├── 1_right.jpeg │ │ │ │ │ ├── 3_right.jpeg │ │ │ │ │ ├── 5_right.jpeg │ │ │ │ │ ├── 7_right.jpeg │ │ │ │ │ └── 9_right.jpeg │ │ │ │ ├── sample │ │ │ │ │ ├── 1_left.jpeg │ │ │ │ │ ├── 1_right.jpeg │ │ │ │ │ ├── 5_left.jpeg │ │ │ │ │ └── 5_right.jpeg │ │ │ │ ├── train │ │ │ │ │ ├── 10_left.jpeg │ │ │ │ │ ├── 10_right.jpeg │ │ │ │ │ ├── 12_left.jpeg │ │ │ │ │ ├── 12_right.jpeg │ │ │ │ │ ├── 2_left.jpeg │ │ │ │ │ ├── 2_right.jpeg │ │ │ │ │ ├── 4_left.jpeg │ │ │ │ │ ├── 4_right.jpeg │ │ │ │ │ ├── 6_left.jpeg │ │ │ │ │ ├── 6_right.jpeg │ │ │ │ │ ├── 8_left.jpeg │ │ │ │ │ └── 8_right.jpeg │ │ │ │ ├── trainLabels.csv │ │ │ │ └── README.md │ │ │ └── squad │ │ │ │ ├── dev-v1.1.json │ │ │ │ └── train-v1.1.json │ │ ├── foo.csv │ │ ├── dataset_info │ │ │ └── mnist │ │ │ │ └── 1.0.0 │ │ │ │ └── image.image.json │ │ ├── 6pixels.png │ │ ├── archives │ │ │ ├── arch1.tar │ │ │ ├── arch1.zip │ │ │ ├── foo.csv.gz │ │ │ └── arch1.tar.gz │ │ ├── lsun_examples │ │ │ ├── 1.jpg │ │ │ ├── 2.jpg │ │ │ ├── 3.jpg │ │ │ └── 4.jpg │ │ ├── README.md │ │ └── lorem_ipsum_zh.txt │ ├── generate_archives.sh │ ├── __init__.py │ ├── test_case.py │ ├── e2e_binary.py │ ├── _utils.py │ ├── lsun.py │ └── bair_robot_pushing.py ├── core │ ├── proto │ │ ├── README.md │ │ ├── install_protoc.sh │ │ ├── __init__.py │ │ ├── dataset_info.proto │ │ └── generate_py_proto.sh │ ├── utils │ │ ├── __init__.py │ │ ├── version_test.py │ │ ├── tf_utils_test.py │ │ └── version.py │ ├── constants.py │ ├── download │ │ └── __init__.py │ ├── lazy_imports_test.py │ ├── features │ │ ├── text │ │ │ └── __init__.py │ │ ├── bounding_boxes_test.py │ │ ├── video_feature_test.py │ │ ├── __init__.py │ │ ├── video_feature.py │ │ ├── audio_feature.py │ │ ├── audio_feature_test.py │ │ └── bounding_boxes.py │ ├── units_test.py │ ├── __init__.py │ ├── units.py │ ├── test_utils_test.py │ ├── lazy_imports.py │ └── naming.py ├── audio │ └── __init__.py ├── translate │ ├── __init__.py │ └── wmt_ende_test.py ├── video │ ├── __init__.py │ ├── bair_robot_pushing_test.py │ └── starcraft_test.py ├── text │ ├── __init__.py │ ├── lm1b_test.py │ ├── imdb_test.py │ └── squad_test.py ├── import_test.py ├── image │ ├── imagenet_test.py │ ├── quickdraw_test.py │ ├── lsun_test.py │ ├── svhn_test.py │ ├── cifar_test.py │ ├── diabetic_retinopathy_detection_test.py │ ├── celeba_test.py │ ├── __init__.py │ ├── mnist_test.py │ ├── image_folder_test.py │ └── open_images_test.py └── public_api.py ├── docs ├── dataset_layers.png ├── api_docs │ └── python │ │ ├── _redirects.yaml │ │ └── tfds │ │ ├── core │ │ ├── get_tfds_path.md │ │ ├── lazy_imports.md │ │ ├── Version.md │ │ ├── BuilderConfig.md │ │ ├── SplitInfo.md │ │ ├── SplitGenerator.md │ │ └── SplitDict.md │ │ ├── list_builders.md │ │ ├── dataset_as_numpy.md │ │ ├── percent.md │ │ ├── download │ │ ├── iter_archive.md │ │ ├── ExtractMethod.md │ │ ├── GenerateMode.md │ │ └── DownloadConfig.md │ │ ├── units │ │ └── size_str.md │ │ ├── features │ │ ├── text │ │ │ ├── TextEncoderConfig.md │ │ │ ├── ByteTextEncoder.md │ │ │ ├── TextEncoder.md │ │ │ └── Tokenizer.md │ │ ├── TensorInfo.md │ │ ├── BBox.md │ │ └── text_lib.md │ │ ├── as_numpy.md │ │ ├── file_adapter.md │ │ ├── units.md │ │ ├── download.md │ │ ├── file_adapter │ │ ├── FileFormatAdapter.md │ │ ├── TFRecordExampleAdapter.md │ │ └── CSVAdapter.md │ │ ├── builder.md │ │ ├── core.md │ │ ├── Split.md │ │ └── features.md ├── README.md ├── _project.yaml ├── _book.yaml ├── _index.ipynb └── _index.yaml ├── AUTHORS ├── .gitignore ├── .travis.yml ├── .github └── ISSUE_TEMPLATE │ ├── dataset-request.md │ ├── feature_request.md │ ├── question-or-help.md │ └── bug_report.md ├── oss_scripts ├── oss_pip_install.sh ├── oss_tests.sh └── oss_release.sh └── CONTRIBUTING.md /tensorflow_datasets/testing/test_data/fake_examples/mnist/test-label: -------------------------------------------------------------------------------- 1 | 11111111 -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/fashion_mnist/test-label: -------------------------------------------------------------------------------- 1 | 11111111 -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/mnist/train-label: -------------------------------------------------------------------------------- 1 | 11111111  -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/fashion_mnist/train-label: -------------------------------------------------------------------------------- 1 | 11111111 -------------------------------------------------------------------------------- /docs/dataset_layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/docs/dataset_layers.png -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/bair_robot_pushing_small/README.md: -------------------------------------------------------------------------------- 1 | Empty for now... 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/aclImdb/train/neg/0_7.txt: -------------------------------------------------------------------------------- 1 | Goodbye world 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/aclImdb/train/pos/0_7.txt: -------------------------------------------------------------------------------- 1 | Hello world 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/aclImdb/train/pos/1_8.txt: -------------------------------------------------------------------------------- 1 | Hello world2 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/aclImdb/test/pos/777_7.txt: -------------------------------------------------------------------------------- 1 | Test Hello world 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/aclImdb/test/pos/778_7.txt: -------------------------------------------------------------------------------- 1 | Test Hello world2 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/aclImdb/train/neg/1_8.txt: -------------------------------------------------------------------------------- 1 | Goodbye world2 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/aclImdb/train/neg/2_9.txt: -------------------------------------------------------------------------------- 1 | Goodbye world3 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/foo.csv: -------------------------------------------------------------------------------- 1 | image,label 2 | 1.jpeg,0 3 | 2.jpeg,1 4 | 3.jpeg,1 5 | 3.jpeg,2 6 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/aclImdb/test/neg/888_7.txt: -------------------------------------------------------------------------------- 1 | Test Goodbye world 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/aclImdb/test/neg/889_7.txt: -------------------------------------------------------------------------------- 1 | Test Goodbye world2 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/dataset_info/mnist/1.0.0/image.image.json: -------------------------------------------------------------------------------- 1 | {"encoding_format": "png", "shape": [28, 28, 1]} -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/wmt_translate_ende/task/dev/newstest2014.en: -------------------------------------------------------------------------------- 1 | my own plane 2 | I am flying 3 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/wmt_translate_ende/crawl/commoncrawl.de-en.en: -------------------------------------------------------------------------------- 1 | I am running 2 | 3 | I am swimming 4 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/wmt_translate_ende/task/dev/newstest2014.de: -------------------------------------------------------------------------------- 1 | mein eigenes Flugzeug 2 | ich fliege 3 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/wmt_translate_ende/validation/dev/newstest2013.en: -------------------------------------------------------------------------------- 1 | my own plane 2 | I am flying 3 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/wmt_translate_ende/validation/dev/newstest2013.de: -------------------------------------------------------------------------------- 1 | mein eigenes Flugzeug 2 | ich fliege 3 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/wmt_translate_ende/crawl/commoncrawl.de-en.de: -------------------------------------------------------------------------------- 1 | ich renne 2 | es verschwand 3 | ich schwimme 4 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/wmt_translate_ende/europarl/training/europarl-v7.de-en.de: -------------------------------------------------------------------------------- 1 | Dies ist das wichtigste Gesetz 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/wmt_translate_ende/europarl/training/europarl-v7.de-en.en: -------------------------------------------------------------------------------- 1 | This is the most important law 2 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/6pixels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/6pixels.png -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/archives/arch1.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/archives/arch1.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/archives/arch1.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/archives/arch1.zip -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/archives/foo.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/archives/foo.csv.gz -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/lsun_examples/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/lsun_examples/1.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/lsun_examples/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/lsun_examples/2.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/lsun_examples/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/lsun_examples/3.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/lsun_examples/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/lsun_examples/4.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/archives/arch1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/archives/arch1.tar.gz -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/wmt_translate_ende/nc_v13/training-parallel-nc-v13/news-commentary-v13.de-en.en: -------------------------------------------------------------------------------- 1 | my car 2 | missing 3 | translation 4 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/wmt_translate_ende/nc_v13/training-parallel-nc-v13/news-commentary-v13.de-en.de: -------------------------------------------------------------------------------- 1 | mein Auto 2 | 3 | fehlende Übersetzung 4 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/celeb_a/list_eval_partition.txt: -------------------------------------------------------------------------------- 1 | 000001.jpg 0 2 | 000002.jpg 0 3 | 000003.jpg 0 4 | 000004.jpg 1 5 | 000005.jpg 1 6 | 000006.jpg 2 7 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/mnist/test-image: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/mnist/test-image -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/mnist/train-image: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/mnist/train-image -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/batches.meta.txt: -------------------------------------------------------------------------------- 1 | airplane 2 | automobile 3 | bird 4 | cat 5 | deer 6 | dog 7 | frog 8 | horse 9 | ship 10 | truck -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/fashion_mnist/test-image: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/fashion_mnist/test-image -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/fashion_mnist/train-image: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/fashion_mnist/train-image -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/quickdraw_bitmap/angel.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/quickdraw_bitmap/angel.npy -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/svhn_cropped/test_32x32.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/svhn_cropped/test_32x32.mat -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/aclImdb_v1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/aclImdb_v1.tar.gz -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/lm1b/1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en.heldout-00000-of-00001: -------------------------------------------------------------------------------- 1 | test sentence 2 | test sentence 2 3 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/quickdraw_bitmap/airplane.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/quickdraw_bitmap/airplane.npy -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/quickdraw_bitmap/ambulance.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/quickdraw_bitmap/ambulance.npy -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/test.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/test.tfrecords -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/svhn_cropped/extra_32x32.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/svhn_cropped/extra_32x32.mat -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/svhn_cropped/train_32x32.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/svhn_cropped/train_32x32.mat -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/lsun/classroom_val_lmdb/data.mdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/lsun/classroom_val_lmdb/data.mdb -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/lsun/classroom_val_lmdb/lock.mdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/lsun/classroom_val_lmdb/lock.mdb -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/valid.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/valid.tfrecords -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000001.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000002.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000003.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000004.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000005.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000006.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/celeb_a/img_align_celeba/000006.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/cifar100/cifar-100-binary/test.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/cifar100/cifar-100-binary/test.bin -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/cifar100/cifar-100-binary/train.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/cifar100/cifar-100-binary/train.bin -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imagenet2012/ILSVRC2012_img_val.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/imagenet2012/ILSVRC2012_img_val.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/lsun/classroom_train_lmdb/data.mdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/lsun/classroom_train_lmdb/data.mdb -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/lsun/classroom_train_lmdb/lock.mdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/lsun/classroom_train_lmdb/lock.mdb -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_test_sha2.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_test_sha2.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/128_test.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/128_test.tfrecords -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/128_valid.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/128_valid.tfrecords -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/train_0.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/train_0.tfrecords -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/train_1.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/train_1.tfrecords -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imagenet2012/ILSVRC2012_img_train.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/imagenet2012/ILSVRC2012_img_train.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/128_train_0.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/128_train_0.tfrecords -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/128_train_1.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/starcraft_video/128_train_1.tfrecords -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label1/000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label1/000001.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label1/000006.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label1/000006.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label2/000002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label2/000002.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label2/000003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label2/000003.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label2/000004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label2/000004.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label3/000005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/test/label3/000005.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/lm1b/1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/news.en-00000-of-00001: -------------------------------------------------------------------------------- 1 | hello world line 1 2 | hello world line 2 3 | hello world line 3 -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_0.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_0.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_1.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_1.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_2.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_2.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_3.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_3.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_4.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_4.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_5.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_5.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_6.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_6.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_7.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_7.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_8.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_8.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_9.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_9.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_a.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_a.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_b.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_b.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_c.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_c.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_d.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_d.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_e.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_e.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_f.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_train_sha1_f.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/train/label2/000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/train/label2/000001.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/train/label2/000006.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/image_folder_data/train/label2/000006.jpg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_validation_sha3.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/s3-tar_validation_sha3.tar -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/data_batch_1.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/data_batch_1.bin -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/data_batch_2.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/data_batch_2.bin -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/data_batch_3.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/data_batch_3.bin -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/data_batch_4.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/data_batch_4.bin -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/data_batch_5.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/data_batch_5.bin -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/test_batch.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/cifar10/cifar-10-batches-bin/test_batch.bin -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/1_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/1_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/3_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/3_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/5_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/5_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/7_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/7_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/9_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/9_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/sample/1_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/sample/1_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/sample/1_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/sample/1_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/sample/5_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/sample/5_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/sample/5_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/sample/5_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/11_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/11_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/11_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/11_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/1_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/1_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/3_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/3_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/5_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/5_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/7_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/7_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/9_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/test/9_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/10_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/10_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/10_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/10_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/12_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/12_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/12_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/12_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/2_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/2_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/2_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/2_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/4_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/4_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/4_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/4_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/6_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/6_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/6_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/6_right.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/8_left.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/8_left.jpeg -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/8_right.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/train/8_right.jpeg -------------------------------------------------------------------------------- /docs/api_docs/python/_redirects.yaml: -------------------------------------------------------------------------------- 1 | redirects: 2 | - from: /datasets/api_docs/python/tfds/GenerateMode 3 | to: /datasets/api_docs/python/tfds/download/GenerateMode 4 | - from: /datasets/api_docs/python/tfds/features/text 5 | to: /datasets/api_docs/python/tfds/features/text_lib 6 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/bair_robot_pushing_small/softmotion30_44k/test/traj_0_to_255.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/bair_robot_pushing_small/softmotion30_44k/test/traj_0_to_255.tfrecords -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/trainLabels.csv: -------------------------------------------------------------------------------- 1 | image,level 2 | 2_left,0 3 | 2_right,0 4 | 4_left,0 5 | 4_right,0 6 | 6_left,1 7 | 6_right,2 8 | 8_left,4 9 | 8_right,4 10 | 10_left,3 11 | 10_right,0 12 | 12_left,1 13 | 12_right,4 14 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/bair_robot_pushing_small/softmotion30_44k/train/traj_1792_to_2047.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/0101011/datasets/master/tensorflow_datasets/testing/test_data/fake_examples/bair_robot_pushing_small/softmotion30_44k/train/traj_1792_to_2047.tfrecords -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # TensorFlow Datasets 2 | 3 | * [List of datasets](datasets.md) 4 | * [Colab Tutorial](https://github.com/tensorflow/datasets/tree/master/docs/overview.ipynb) 5 | * [API Documentation](api_docs/python/tfds.md) 6 | * [Splits](splits.md) 7 | * [Adding a new dataset](add_dataset.md) 8 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/README.md: -------------------------------------------------------------------------------- 1 | * 6pixels.png: a 2x3 pixels image. 2 | * First line: 3 | * green (0, 255, 0) 4 | * red (255, 0, 0) 5 | * purple (255, 0, 255) 6 | * Second line: 7 | * blue (0, 0, 255) 8 | * yellow (255, 255, 0) 9 | * grey (126, 127, 128) 10 | 11 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the list of TensorFlow Datasets authors for copyright purposes. 2 | # 3 | # This does not necessarily list everyone who has contributed code, since in 4 | # some cases, their employer may be the copyright holder. To see the full list 5 | # of contributors, see the revision history in source control. 6 | 7 | Google Inc. 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled python modules. 2 | *.pyc 3 | 4 | # Byte-compiled 5 | _pycache__/ 6 | .cache/ 7 | 8 | # Python egg metadata, regenerated from source files by setuptools. 9 | /*.egg-info 10 | .eggs/ 11 | 12 | # PyPI distribution artifacts. 13 | build/ 14 | dist/ 15 | 16 | # Tests 17 | .pytest_cache/ 18 | 19 | # Other 20 | *.DS_Store -------------------------------------------------------------------------------- /docs/_project.yaml: -------------------------------------------------------------------------------- 1 | name: TensorFlow Datasets 2 | breadcrumb_name: Datasets 3 | home_url: /datasets/ 4 | parent_project_metadata_path: /_project.yaml 5 | description: > 6 | A collection of datasets ready to use with TensorFlow. 7 | use_site_branding: true 8 | hide_from_products_list: true 9 | content_license: cc3-apache2 10 | buganizer_id: 473701 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | language: python 3 | git: 4 | depth: 10 5 | quiet: true 6 | notifications: 7 | email: 8 | - tensorflow-datasets+travis@google.com 9 | python: 10 | - "2.7" 11 | - "3.6" 12 | env: 13 | matrix: 14 | - TF_VERSION="tf-nightly" 15 | - TF_VERSION="1.13.0rc0" 16 | - TF_VERSION="tf2" 17 | install: 18 | - ./oss_scripts/oss_pip_install.sh 19 | script: 20 | - ./oss_scripts/oss_tests.sh 21 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/cifar100/cifar-100-binary/coarse_label_names.txt: -------------------------------------------------------------------------------- 1 | aquatic_mammals 2 | fish 3 | flowers 4 | food_containers 5 | fruit_and_vegetables 6 | household_electrical_devices 7 | household_furniture 8 | insects 9 | large_carnivores 10 | large_man-made_outdoor_things 11 | large_natural_outdoor_scenes 12 | large_omnivores_and_herbivores 13 | medium_mammals 14 | non-insect_invertebrates 15 | people 16 | reptiles 17 | small_mammals 18 | trees 19 | vehicles_1 20 | vehicles_2 -------------------------------------------------------------------------------- /tensorflow_datasets/core/proto/README.md: -------------------------------------------------------------------------------- 1 | # Protobuf 2 | 3 | This page describe how to update the protobuf generated python file. By 4 | default, the protobuf is already compiled into python file so you won't have to 5 | do anything. Those steps are required only if you update the `.proto` file. The 6 | instruction are for linux. 7 | 8 | Install the proto compiler (version 3.6.1): 9 | 10 | ``` 11 | ./install_protoc.sh 12 | ``` 13 | 14 | Re-generate the python file: 15 | 16 | ``` 17 | ./generate_pb2_py.sh 18 | ``` 19 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/proto/install_protoc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Install the .protoc compiler on Linux 3 | 4 | 5 | # Make sure you grab the latest version 6 | curl -OL https://github.com/google/protobuf/releases/download/v3.6.1/protoc-3.6.1-linux-x86_64.zip 7 | 8 | # Unzip 9 | unzip protoc-3.6.1-linux-x86_64.zip -d protoc3 10 | 11 | # Move protoc to /usr/local/bin/ 12 | sudo mv protoc3/bin/* /usr/local/bin/ 13 | 14 | # Move protoc3/include to /usr/local/include/ 15 | sudo mv protoc3/include/* /usr/local/include/ 16 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/imdb_reviews/README.md: -------------------------------------------------------------------------------- 1 | aclImdb_v1.tar.gz contains the following files tree. 2 | it was produced with: 3 | 4 | ``` 5 | tar -zcvf aclImdb_v1.tar.gz aclImdb 6 | ``` 7 | 8 | * aclImdb/ 9 | * train/ 10 | * pos/ 11 | * 0_7.txt 12 | * 1_8.txt 13 | * neg/ 14 | * 0_7.txt 15 | * 1_8.txt 16 | * 2_9.txt 17 | * test/ 18 | * pos/ 19 | * 777_7.txt 20 | * 778_7.txt 21 | * neg/ 22 | * 888_7.txt 23 | * 889_7.txt 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/dataset-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Dataset request 3 | about: Request a dataset to be added 4 | title: "[data request] " 5 | labels: dataset request 6 | assignees: '' 7 | 8 | --- 9 | 10 | * Name of dataset: 11 | * URL of dataset: 12 | * License of dataset: 13 | * Short description of dataset and use case(s): 14 | 15 | Folks who would also like to see this dataset in `tensorflow/datasets`, please +1/thumbs-up so the developers can know which requests to prioritize. 16 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/squad/dev-v1.1.json: -------------------------------------------------------------------------------- 1 | {"data": [{"title": "Paris_France", "paragraphs": [ 2 | {"context": "Paris is the largest city in France with over 2 million inhabitants. It is the capital of France.", 3 | "qas": [ 4 | {"answers": [{"answer_start": 1, "text": "Paris"}, {"answer_start": 1, "text": "Paris"}], "question": "What is the capital of France?", "id": "1e35"}, 5 | {"answers": [{"answer_start": 7, "text": "France"}], "question": "In which country is Paris located?", "id": "1e36"} 6 | ] 7 | }]} 8 | ]} 9 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/core/get_tfds_path.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # tfds.core.get_tfds_path 7 | 8 | ``` python 9 | tfds.core.get_tfds_path(relative_path) 10 | ``` 11 | 12 | 13 | 14 | Defined in [`core/utils/py_utils.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/utils/py_utils.py). 15 | 16 | Returns absolute path to file given path relative to tfds root. -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/celeb_a/list_landmarks_align_celeba.txt: -------------------------------------------------------------------------------- 1 | 6 2 | lefteye_x lefteye_y righteye_x righteye_y nose_x nose_y leftmouth_x leftmouth_y rightmouth_x rightmouth_y 3 | 000001.jpg 69 109 106 113 77 142 73 152 108 154 4 | 000002.jpg 69 110 107 112 81 135 70 151 108 153 5 | 000003.jpg 76 112 104 106 108 128 74 156 98 158 6 | 000004.jpg 72 113 108 108 101 138 71 155 101 151 7 | 000005.jpg 66 114 112 112 86 119 71 147 104 150 8 | 000006.jpg 71 111 106 110 94 131 74 154 102 153 9 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/list_builders.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # tfds.list_builders 7 | 8 | ``` python 9 | tfds.list_builders() 10 | ``` 11 | 12 | 13 | 14 | Defined in [`core/registered.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/registered.py). 15 | 16 | Returns the string names of all tfds.core.DatasetBuilders. -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/dataset_as_numpy.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # tfds.dataset_as_numpy 7 | 8 | ``` python 9 | tfds.dataset_as_numpy( 10 | *args, 11 | **kwargs 12 | ) 13 | ``` 14 | 15 | 16 | 17 | Defined in [`core/dataset_utils.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/dataset_utils.py). 18 | 19 | DEPRECATED. Renamed tfds.as_numpy. -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/percent.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # tfds.percent 7 | 8 | ## Class `percent` 9 | 10 | 11 | 12 | 13 | 14 | Defined in [`core/splits.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/splits.py). 15 | 16 | Syntactic sugar for defining slice subsplits: `tfds.percent[75:-5]`. 17 | 18 | See the 19 | [guide on splits](https://github.com/tensorflow/datasets/tree/master/docs/splits.md) 20 | for more information. 21 | 22 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/generate_archives.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Generate the archives in test_data/archives. 3 | 4 | ROOT=`dirname $0` 5 | PWD=`pwd` 6 | ABSROOT="${PWD}/${ROOT}" 7 | DEST="archives" 8 | F1="6pixels.png" 9 | F2="foo.csv" 10 | 11 | cd "${ROOT}/test_data" 12 | tar -cvf "${DEST}/arch1.tar" ${F1} ${F2} 13 | tar -zcvf "${DEST}/arch1.tar.gz" ${F1} ${F2} 14 | zip "${DEST}/arch1.zip" ${F1} ${F2} 15 | gzip -cv ${F2} > "${DEST}/foo.csv.gz" 16 | 17 | # Archive with absolute link. 18 | # PWD needs to be at / for this to work. 19 | cp -L "${F2}" "/tmp/${F2}" 20 | cd / 21 | tar --hard-dereference --absolute-names -cvf "${ABSROOT}/test_data/${DEST}/absolute_path.tar" "/tmp/${F2}" 22 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/download/iter_archive.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # tfds.download.iter_archive 7 | 8 | ``` python 9 | tfds.download.iter_archive( 10 | path, 11 | method 12 | ) 13 | ``` 14 | 15 | 16 | 17 | Defined in [`core/download/extractor.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/download/extractor.py). 18 | 19 | Yields (path_in_archive, f_obj) for archive at path using tfds.download.ExtractMethod. -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/core/lazy_imports.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # tfds.core.lazy_imports 7 | 8 | ## Class `lazy_imports` 9 | 10 | 11 | 12 | 13 | 14 | Defined in [`core/lazy_imports.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/lazy_imports.py). 15 | 16 | Lazy importer for heavy dependencies. 17 | 18 | Some datasets require heavy dependencies for data generation. To allow for 19 | the default installation to remain lean, those heavy depdencies are 20 | lazily imported here. 21 | 22 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/open_images_v4/class_descriptions.csv: -------------------------------------------------------------------------------- 1 | /m/016x0,name 0 2 | /m/016x1,name 1 3 | /m/016x2,name 2 4 | /m/016x3,name 3 5 | /m/016x4,name 4 6 | /m/016x5,name 5 7 | /m/016x6,name 6 8 | /m/016x7,name 7 9 | /m/016x8,name 8 10 | /m/016x9,name 9 11 | /m/016x10,name 10 12 | /m/016x11,name 11 13 | /m/016x12,name 12 14 | /m/016x13,name 13 15 | /m/016x14,name 14 16 | /m/016x15,name 15 17 | /m/016x16,name 16 18 | /m/016x17,name 17 19 | /m/016x18,name 18 20 | /m/016x19,name 19 21 | /m/016x20,name 20 22 | /m/016x21,name 21 23 | /m/016x22,name 22 24 | /m/016x23,name 23 25 | /m/016x24,name 24 26 | /m/016x25,name 25 27 | /m/016x26,name 26 28 | /m/016x27,name 27 29 | /m/016x28,name 28 30 | /m/016x29,name 29 31 | /m/016x30,name 30 32 | /m/016x31,name 31 -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/units/size_str.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # tfds.units.size_str 7 | 8 | ``` python 9 | tfds.units.size_str(size_in_bytes) 10 | ``` 11 | 12 | 13 | 14 | Defined in [`core/units.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/units.py). 15 | 16 | Returns a human readable size string. 17 | 18 | If size_in_bytes is None, then returns "?? GiB". 19 | 20 | For example `size_str(1.5 * tfds.units.GiB) == "1.50 GiB"`. 21 | 22 | #### Args: 23 | 24 | * `size_in_bytes`: `int` or `None`, the size, in bytes, that we want to 25 | format as a human-readable size string. -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question-or-help.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question or help 3 | about: Ask a question or ask for some help 4 | title: '' 5 | labels: help 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What I need help with / What I was wondering** 11 | Your question, or a clear description of what you're looking for help with. 12 | 13 | **What I've tried so far** 14 | A description of what you've tried so far to solve your problem. 15 | 16 | **It would be nice if...** 17 | Could we have done anything to make things better (documentation, etc.)? 18 | 19 | **Environment information** 20 | (if applicable) 21 | * Operating System: 22 | * Python version: 23 | * `tensorflow-datasets`/`tfds-nightly` version: 24 | * `tensorflow`/`tensorflow-gpu`/`tf-nightly`/`tf-nightly-gpu` version: 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Short description** 11 | Description of the bug. 12 | 13 | **Environment information** 14 | * Operating System: 15 | * Python version: 16 | * `tensorflow-datasets`/`tfds-nightly` version: 17 | * `tensorflow`/`tensorflow-gpu`/`tf-nightly`/`tf-nightly-gpu` version: 18 | 19 | **Reproduction instructions** 20 | 21 | ``` 22 | 23 | ``` 24 | 25 | **Link to logs** 26 | If applicable, 27 | 28 | **Expected behavior** 29 | What you expected to happen. 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /docs/_book.yaml: -------------------------------------------------------------------------------- 1 | upper_tabs: 2 | # Tabs left of dropdown menu 3 | - include: /_upper_tabs_left.yaml 4 | - include: /api_docs/_upper_tabs_api.yaml 5 | # Dropdown menu 6 | - name: Resources 7 | path: /resources 8 | is_default: true 9 | menu: 10 | - include: /resources/_menu_toc.yaml 11 | lower_tabs: 12 | # Subsite tabs 13 | other: 14 | - name: Datasets 15 | contents: 16 | - title: Datasets 17 | path: /datasets/datasets 18 | - name: Guide 19 | contents: 20 | - title: Overview 21 | path: /datasets/overview 22 | - title: Splits 23 | path: /datasets/splits 24 | - title: Add a dataset 25 | path: /datasets/add_dataset 26 | - name: API 27 | skip_translation: true 28 | contents: 29 | - include: /datasets/api_docs/python/_toc.yaml 30 | 31 | - include: /_upper_tabs_right.yaml 32 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/features/text/TextEncoderConfig.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 |
6 | 7 | # tfds.features.text.TextEncoderConfig 8 | 9 | ## Class `TextEncoderConfig` 10 | 11 | 12 | 13 | 14 | 15 | Defined in [`core/features/text/text_encoder.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/features/text/text_encoder.py). 16 | 17 | Configuration for tfds.features.Text. 18 | 19 |

__init__

20 | 21 | ``` python 22 | __init__( 23 | encoder=None, 24 | encoder_cls=None, 25 | vocab_size=None, 26 | name=None 27 | ) 28 | ``` 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /tensorflow_datasets/audio/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Audio datasets.""" 17 | 18 | from tensorflow_datasets.audio.librispeech import Librispeech 19 | from tensorflow_datasets.audio.librispeech import LibrispeechConfig 20 | from tensorflow_datasets.audio.nsynth import Nsynth 21 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/squad/train-v1.1.json: -------------------------------------------------------------------------------- 1 | {"data": [{"title": "Zurich_Switzerland", "paragraphs": [ 2 | {"context": "Zurich is the largest city in Switzerland with over 400000 inhabitants. In spite of this, it is not the capital of Switzerland, which is located in Bern.", 3 | "qas": [ 4 | {"answers": [{"answer_start": 1, "text": "Zurich"}, {"answer_start": 20, "text": "Bern"}], "question": "What is the capital of Switzerland?", "id": "1e11"}, 5 | {"answers": [{"answer_start": 10, "text": "4000000"}], "question": "How many inhabitants does Zurich have?", "id": "1e13"} 6 | ] 7 | }, 8 | {"context": "Switzerland is the country in Euriope with 26 cantons. Zurich canton has the largest population of 1.5 million.", 9 | "qas": [ 10 | {"answers": [{"answer_start": 8, "text": "26"}], "question": "How many cantons does Switzerland have?", "id": "1e16"} 11 | ] 12 | }]} 13 | ]} 14 | -------------------------------------------------------------------------------- /tensorflow_datasets/translate/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Translation datasets.""" 17 | 18 | from tensorflow_datasets.translate.wmt import WMTConfig 19 | from tensorflow_datasets.translate.wmt_ende import WmtTranslateEnde 20 | from tensorflow_datasets.translate.wmt_enfr import WmtTranslateEnfr 21 | -------------------------------------------------------------------------------- /tensorflow_datasets/video/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Video datasets.""" 17 | 18 | from tensorflow_datasets.video.bair_robot_pushing import BairRobotPushingSmall 19 | from tensorflow_datasets.video.starcraft import StarcraftVideo 20 | from tensorflow_datasets.video.starcraft import StarcraftVideoConfig 21 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Util import.""" 17 | 18 | # pylint: disable=wildcard-import 19 | from tensorflow_datasets.core.utils.py_utils import * 20 | from tensorflow_datasets.core.utils.tf_utils import * 21 | from tensorflow_datasets.core.utils.version import Version 22 | # pylint: enable=wildcard-import 23 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/proto/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Public API of the proto package.""" 17 | 18 | # pylint: disable=reimported,g-bad-import-order 19 | from tensorflow_datasets.core.proto import dataset_info_generated_pb2 as dataset_info_pb2 20 | from tensorflow_datasets.core.proto.dataset_info_generated_pb2 import SplitInfo 21 | # pylint: enable=reimported,g-bad-import-order 22 | -------------------------------------------------------------------------------- /tensorflow_datasets/text/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Text-based sentiment analysis datasets.""" 17 | 18 | from tensorflow_datasets.text.imdb import IMDBReviews 19 | from tensorflow_datasets.text.imdb import IMDBReviewsConfig 20 | from tensorflow_datasets.text.lm1b import Lm1b 21 | from tensorflow_datasets.text.lm1b import Lm1bConfig 22 | from tensorflow_datasets.text.squad import Squad 23 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/features/TensorInfo.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 |
8 | 9 | # tfds.features.TensorInfo 10 | 11 | ## Class `TensorInfo` 12 | 13 | 14 | 15 | 16 | 17 | Defined in [`core/features/feature.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/features/feature.py). 18 | 19 | TensorInfo(shape, dtype) 20 | 21 |

__new__

22 | 23 | ``` python 24 | @staticmethod 25 | __new__( 26 | _cls, 27 | shape, 28 | dtype 29 | ) 30 | ``` 31 | 32 | Create new instance of TensorInfo(shape, dtype) 33 | 34 | 35 | 36 | ## Properties 37 | 38 |

shape

39 | 40 | 41 | 42 |

dtype

43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/as_numpy.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # tfds.as_numpy 7 | 8 | ``` python 9 | tfds.as_numpy( 10 | dataset, 11 | graph=None 12 | ) 13 | ``` 14 | 15 | 16 | 17 | Defined in [`core/dataset_utils.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/dataset_utils.py). 18 | 19 | Converts a `tf.data.Dataset` to an iterable of NumPy arrays. 20 | 21 | `as_numpy` converts a possibly nested structure of `tf.data.Dataset`s 22 | and `tf.Tensor`s to iterables of NumPy arrays and NumPy arrays, respectively. 23 | 24 | #### Args: 25 | 26 | * `dataset`: a possibly nested structure of `tf.data.Dataset`s and/or 27 | `tf.Tensor`s. 28 | * `graph`: `tf.Graph`, optional, explicitly set the graph to use. 29 | 30 | 31 | #### Returns: 32 | 33 | A structure matching `dataset` where `tf.data.Dataset`s are converted to 34 | generators of NumPy arrays and `tf.Tensor`s are converted to NumPy arrays. -------------------------------------------------------------------------------- /tensorflow_datasets/import_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Test import.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow as tf 23 | import tensorflow_datasets as tfds # pylint: disable=unused-import 24 | 25 | 26 | class ImportTest(tf.test.TestCase): 27 | 28 | def test_import(self): 29 | pass 30 | 31 | 32 | if __name__ == '__main__': 33 | tf.test.main() 34 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/download/ExtractMethod.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
11 | 12 | # tfds.download.ExtractMethod 13 | 14 | ## Class `ExtractMethod` 15 | 16 | 17 | 18 | 19 | 20 | Defined in [`core/download/resource.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/download/resource.py). 21 | 22 | The extraction method to use to pre-process a downloaded file. 23 | 24 | ## Class Members 25 | 26 |

GZIP

27 | 28 |

NO_EXTRACT

29 | 30 |

TAR

31 | 32 |

TAR_GZ

33 | 34 |

ZIP

35 | 36 |

__members__

37 | 38 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/constants.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Default values for some parameters of the API when no values are passed.""" 17 | # IMPORTANT: when changing values here, update docstrings. 18 | 19 | import os 20 | 21 | # Directory where to store processed datasets. 22 | DATA_DIR = os.path.join("~", "tensorflow_datasets") 23 | 24 | # GCS bucket with dataset info and metadata files 25 | DATASET_INFO_BUCKET = "gs://tfds-data/dataset_info" 26 | 27 | # Suffix of files / directories which aren't finished downloading / extracting. 28 | INCOMPLETE_SUFFIX = ".incomplete" 29 | 30 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/core/Version.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | # tfds.core.Version 12 | 13 | ## Class `Version` 14 | 15 | 16 | 17 | 18 | 19 | Defined in [`core/utils/version.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/utils/version.py). 20 | 21 | Dataset version MAJOR.MINOR.PATCH. 22 | 23 |

__new__

24 | 25 | ``` python 26 | @staticmethod 27 | __new__( 28 | cls, 29 | *args, 30 | **kwargs 31 | ) 32 | ``` 33 | 34 | 35 | 36 | 37 | 38 | ## Properties 39 | 40 |

major

41 | 42 | 43 | 44 |

minor

45 | 46 | 47 | 48 |

patch

49 | 50 | 51 | 52 | 53 | 54 | ## Class Members 55 | 56 |

LATEST

57 | 58 | -------------------------------------------------------------------------------- /tensorflow_datasets/text/lm1b_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for lm1b dataset module.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.testing import dataset_builder_testing 23 | from tensorflow_datasets.text import lm1b 24 | 25 | 26 | class Lm1bTest(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = lm1b.Lm1b 28 | SPLITS = { 29 | "train": 3, 30 | "test": 2, 31 | } 32 | 33 | 34 | if __name__ == "__main__": 35 | dataset_builder_testing.main() 36 | -------------------------------------------------------------------------------- /oss_scripts/oss_pip_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -vx # print command from file as well as evaluated command 4 | set -e # fail and exit on any command erroring 5 | 6 | : "${TF_VERSION:?}" 7 | 8 | # Install ffmpeg for Audio FeatureConnector tests 9 | if command -v ffmpeg 2>/dev/null 10 | then 11 | echo "Using installed ffmpeg" 12 | else 13 | echo "Installing ffmpeg" 14 | sudo add-apt-repository -y ppa:mc3man/trusty-media 15 | sudo apt-get -qq update 16 | sudo apt-get install -y ffmpeg 17 | fi 18 | 19 | if [[ "$TF_VERSION" == "tf-nightly" ]] 20 | then 21 | pip install -q tf-nightly; 22 | elif [[ "$TF_VERSION" == "tf2" ]] 23 | then 24 | pip install -q "tf-nightly-2.0-preview" 25 | else 26 | pip install -q "tensorflow==$TF_VERSION" 27 | fi 28 | 29 | # Make sure we have the latest version of numpy - avoid problems we were 30 | # seeing with Python 3 31 | pip install -q -U numpy 32 | 33 | # First ensure that the base dependencies are sufficient for a full import and 34 | # data load 35 | pip install -q -e . 36 | python -c "import tensorflow_datasets as tfds" 37 | python -c "import tensorflow_datasets as tfds; tfds.load('mnist', split=tfds.Split.TRAIN)" 38 | 39 | # Then install the test dependencies 40 | pip install -q -e .[tests] 41 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/features/BBox.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | # tfds.features.BBox 12 | 13 | ## Class `BBox` 14 | 15 | 16 | 17 | 18 | 19 | Defined in [`core/features/bounding_boxes.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/features/bounding_boxes.py). 20 | 21 | BBox(ymin, xmin, ymax, xmax) 22 | 23 |

__new__

24 | 25 | ``` python 26 | @staticmethod 27 | __new__( 28 | _cls, 29 | ymin, 30 | xmin, 31 | ymax, 32 | xmax 33 | ) 34 | ``` 35 | 36 | Create new instance of BBox(ymin, xmin, ymax, xmax) 37 | 38 | 39 | 40 | ## Properties 41 | 42 |

ymin

43 | 44 | 45 | 46 |

xmin

47 | 48 | 49 | 50 |

ymax

51 | 52 | 53 | 54 |

xmax

55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/core/BuilderConfig.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | 10 | # tfds.core.BuilderConfig 11 | 12 | ## Class `BuilderConfig` 13 | 14 | 15 | 16 | 17 | 18 | Defined in [`core/dataset_builder.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/dataset_builder.py). 19 | 20 | Base class for `DatasetBuilder` data configuration. 21 | 22 | DatasetBuilder subclasses with data configuration options should subclass 23 | `BuilderConfig` and add their own properties. 24 | 25 |

__init__

26 | 27 | ``` python 28 | __init__( 29 | name, 30 | version=None, 31 | description=None 32 | ) 33 | ``` 34 | 35 | 36 | 37 | 38 | 39 | ## Properties 40 | 41 |

description

42 | 43 | 44 | 45 |

name

46 | 47 | 48 | 49 |

version

50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /tensorflow_datasets/text/imdb_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for imdb dataset module.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.testing import dataset_builder_testing 23 | from tensorflow_datasets.text import imdb 24 | 25 | 26 | class IMDBReviewsTest(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = imdb.IMDBReviews 28 | SPLITS = { 29 | "train": 5, 30 | "test": 4, 31 | } 32 | DL_EXTRACT_RESULT = "aclImdb_v1.tar.gz" 33 | 34 | 35 | if __name__ == "__main__": 36 | dataset_builder_testing.main() 37 | -------------------------------------------------------------------------------- /tensorflow_datasets/image/imagenet_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for imagenet dataset module.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.image import imagenet 23 | from tensorflow_datasets.testing import dataset_builder_testing 24 | 25 | 26 | class Imagenet2012Test(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = imagenet.Imagenet2012 28 | SPLITS = { # Expected number of examples on each split. 29 | "train": 100, 30 | "validation": 10, 31 | } 32 | 33 | 34 | if __name__ == "__main__": 35 | dataset_builder_testing.main() 36 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/celeb_a/list_attr_celeba.txt: -------------------------------------------------------------------------------- 1 | 6 2 | 5_o_Clock_Shadow Arched_Eyebrows Attractive Bags_Under_Eyes Bald Bangs Big_Lips Big_Nose Black_Hair Blond_Hair Blurry Brown_Hair Bushy_Eyebrows Chubby Double_Chin Eyeglasses Goatee Gray_Hair Heavy_Makeup High_Cheekbones Male Mouth_Slightly_Open Mustache Narrow_Eyes No_Beard Oval_Face Pale_Skin Pointy_Nose Receding_Hairline Rosy_Cheeks Sideburns Smiling Straight_Hair Wavy_Hair Wearing_Earrings Wearing_Hat Wearing_Lipstick Wearing_Necklace Wearing_Necktie Young 3 | 000001.jpg -1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 -1 1 1 -1 1 -1 1 -1 -1 1 4 | 000002.jpg -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1 -1 1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 1 5 | 000003.jpg -1 -1 -1 -1 -1 -1 1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 1 1 -1 -1 1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 1 6 | 000004.jpg -1 -1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 1 -1 -1 -1 -1 1 -1 1 -1 1 1 -1 1 7 | 000005.jpg -1 1 1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 1 1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 -1 1 -1 -1 1 8 | 000006.jpg -1 1 1 -1 -1 -1 1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 -1 1 -1 -1 1 -1 -1 1 -1 -1 -1 -1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 9 | -------------------------------------------------------------------------------- /oss_scripts/oss_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -vx # print command from file as well as evaluated command 4 | 5 | # Instead of exiting on any failure with "set -e", we'll call set_status after 6 | # each command and exit $STATUS at the end. 7 | STATUS=0 8 | function set_status() { 9 | local last_status=$? 10 | if [[ $last_status -ne 0 ]] 11 | then 12 | echo "<<<<<>>>>> Exit code: $last_status" 13 | fi 14 | STATUS=$(($last_status || $STATUS)) 15 | } 16 | 17 | # Certain datasets/tests don't work with TF2 18 | # Skip them here, and link to a GitHub issue that explains why it doesn't work 19 | # and what the plan is to support it. 20 | TF2_IGNORE_TESTS="" 21 | if [[ "$TF_VERSION" == "tf2" ]] 22 | then 23 | # * lsun_test: https://github.com/tensorflow/datasets/issues/34 24 | TF2_IGNORE_TESTS=" 25 | tensorflow_datasets/image/lsun_test.py 26 | " 27 | fi 28 | TF2_IGNORE=$(for test in $TF2_IGNORE_TESTS; do echo "--ignore=$test "; done) 29 | 30 | # Run Tests 31 | pytest $TF2_IGNORE --ignore="tensorflow_datasets/core/test_utils.py" 32 | set_status 33 | 34 | # Test notebooks 35 | NOTEBOOKS=" 36 | docs/overview.ipynb 37 | " 38 | for notebook in $NOTEBOOKS 39 | do 40 | jupyter nbconvert --ExecutePreprocessor.timeout=600 --to notebook --execute $notebook 41 | set_status 42 | done 43 | 44 | exit $STATUS 45 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/svhn_cropped/generate_data.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | r"""Generate fake data for SVHN. 17 | 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | 24 | import numpy as np 25 | import scipy.io 26 | 27 | for split_name, num_examples in [ 28 | ('train', 3), 29 | ('test', 2), 30 | ('extra', 1), 31 | ]: 32 | img_shape = (32, 32, 3, num_examples) 33 | scipy.io.savemat('{}_32x32.mat'.format(split_name), { 34 | 'X': np.random.randint(255, size=img_shape, dtype=np.uint8), 35 | 'y': np.random.randint(1, 10, size=(num_examples, 1)), 36 | }) 37 | -------------------------------------------------------------------------------- /tensorflow_datasets/video/bair_robot_pushing_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensorflow_datasets.video.bair_robot_pushing.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.testing import dataset_builder_testing 23 | from tensorflow_datasets.video import bair_robot_pushing 24 | 25 | 26 | class BairRobotPushingTest(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = bair_robot_pushing.BairRobotPushingSmall 28 | 29 | SPLITS = { 30 | "train": 1, 31 | "test": 1, 32 | } 33 | 34 | 35 | if __name__ == "__main__": 36 | dataset_builder_testing.main() 37 | -------------------------------------------------------------------------------- /tensorflow_datasets/image/quickdraw_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from tensorflow_datasets.image import quickdraw 21 | from tensorflow_datasets.testing import dataset_builder_testing 22 | 23 | 24 | class QuickdrawTest(dataset_builder_testing.TestCase): 25 | 26 | DATASET_CLASS = quickdraw.QuickdrawBitmap 27 | SPLITS = { 28 | "train": 9, 29 | } 30 | DL_EXTRACT_RESULT = { 31 | "airplane": "airplane.npy", 32 | "ambulance": "ambulance.npy", 33 | "angel": "angel.npy", 34 | } 35 | 36 | if __name__ == "__main__": 37 | dataset_builder_testing.main() 38 | -------------------------------------------------------------------------------- /tensorflow_datasets/image/lsun_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensorflow_datasets.image.lsun.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.image import lsun 23 | from tensorflow_datasets.testing import dataset_builder_testing 24 | 25 | 26 | class LsunTest(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = lsun.Lsun 28 | BUILDER_CONFIG_NAMES_TO_TEST = ["classroom"] 29 | 30 | SPLITS = { 31 | "train": 3, 32 | "validation": 1, 33 | } 34 | 35 | DL_EXTRACT_RESULT = {"train": "", "val": ""} 36 | 37 | 38 | if __name__ == "__main__": 39 | dataset_builder_testing.main() 40 | -------------------------------------------------------------------------------- /tensorflow_datasets/text/squad_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for squad dataset module.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.testing import dataset_builder_testing 23 | from tensorflow_datasets.text import squad 24 | 25 | 26 | class SquadTest(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = squad.Squad 28 | 29 | DL_EXTRACT_RESULT = { 30 | "train": "train-v1.1.json", 31 | "dev": "dev-v1.1.json", 32 | } 33 | 34 | SPLITS = { 35 | "train": 3, 36 | "validation": 2, 37 | } 38 | 39 | 40 | if __name__ == "__main__": 41 | dataset_builder_testing.main() 42 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/cifar100/cifar-100-binary/fine_label_names.txt: -------------------------------------------------------------------------------- 1 | apple 2 | aquarium_fish 3 | baby 4 | bear 5 | beaver 6 | bed 7 | bee 8 | beetle 9 | bicycle 10 | bottle 11 | bowl 12 | boy 13 | bridge 14 | bus 15 | butterfly 16 | camel 17 | can 18 | castle 19 | caterpillar 20 | cattle 21 | chair 22 | chimpanzee 23 | clock 24 | cloud 25 | cockroach 26 | couch 27 | crab 28 | crocodile 29 | cup 30 | dinosaur 31 | dolphin 32 | elephant 33 | flatfish 34 | forest 35 | fox 36 | girl 37 | hamster 38 | house 39 | kangaroo 40 | keyboard 41 | lamp 42 | lawn_mower 43 | leopard 44 | lion 45 | lizard 46 | lobster 47 | man 48 | maple_tree 49 | motorcycle 50 | mountain 51 | mouse 52 | mushroom 53 | oak_tree 54 | orange 55 | orchid 56 | otter 57 | palm_tree 58 | pear 59 | pickup_truck 60 | pine_tree 61 | plain 62 | plate 63 | poppy 64 | porcupine 65 | possum 66 | rabbit 67 | raccoon 68 | ray 69 | road 70 | rocket 71 | rose 72 | sea 73 | seal 74 | shark 75 | shrew 76 | skunk 77 | skyscraper 78 | snail 79 | snake 80 | spider 81 | squirrel 82 | streetcar 83 | sunflower 84 | sweet_pepper 85 | table 86 | tank 87 | telephone 88 | television 89 | tiger 90 | tractor 91 | train 92 | trout 93 | tulip 94 | turtle 95 | wardrobe 96 | whale 97 | willow_tree 98 | wolf 99 | woman 100 | worm -------------------------------------------------------------------------------- /tensorflow_datasets/core/download/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """`tfds.download.DownloadManager` API.""" 17 | 18 | from tensorflow_datasets.core.download.download_manager import DownloadConfig 19 | from tensorflow_datasets.core.download.download_manager import DownloadManager 20 | from tensorflow_datasets.core.download.extractor import iter_archive 21 | from tensorflow_datasets.core.download.resource import ExtractMethod 22 | from tensorflow_datasets.core.download.resource import Resource 23 | from tensorflow_datasets.core.download.util import GenerateMode 24 | 25 | __all__ = [ 26 | "DownloadConfig", 27 | "DownloadManager", 28 | "GenerateMode", 29 | "Resource", 30 | "ExtractMethod", 31 | "iter_archive", 32 | ] 33 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/file_adapter.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: tfds.file_adapter 7 | 8 | 9 | 10 | Defined in [`core/file_format_adapter.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/file_format_adapter.py). 11 | 12 | tfds.file_adapter.FileFormatAdapters for GeneratorBasedBuilder. 13 | 14 | FileFormatAdapters implement methods to write and read data from a 15 | particular file format. 16 | 17 | Currently, two FileAdapter are available: 18 | * TFRecordExampleAdapter: To store the pre-processed dataset as .tfrecord file 19 | * CSVAdapter: To store the dataset as CSV file 20 | 21 | ```python 22 | return TFRecordExampleAdapter({ 23 | "x": tf.FixedLenFeature(tuple(), tf.int64) 24 | }) 25 | ``` 26 | 27 | ## Classes 28 | 29 | [`class FileFormatAdapter`](../tfds/file_adapter/FileFormatAdapter.md): Provides writing and reading methods for a file format. 30 | 31 | [`class TFRecordExampleAdapter`](../tfds/file_adapter/TFRecordExampleAdapter.md): Writes/Reads serialized Examples protos to/from TFRecord files. 32 | 33 | [`class CSVAdapter`](../tfds/file_adapter/CSVAdapter.md): Writes/reads features to/from CSV files. 34 | 35 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/units.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 | 14 | # Module: tfds.units 15 | 16 | 17 | 18 | Defined in [`core/units.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/units.py). 19 | 20 | Defines convenience constants/functions for converting various units. 21 | 22 | ## Functions 23 | 24 | [`size_str(...)`](../tfds/units/size_str.md): Returns a human readable size string. 25 | 26 | ## Other Members 27 | 28 |

GiB

29 | 30 |

KiB

31 | 32 |

MiB

33 | 34 |

PiB

35 | 36 |

TiB

37 | 38 |

absolute_import

39 | 40 |

division

41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /oss_scripts/oss_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -vx # print command from file as well as evaluated command 4 | set -e # fail and exit on any command erroring 5 | 6 | function setup_env() { 7 | local py_version=$1 8 | local venv_path="tfds_env_${py_version}" 9 | virtualenv -p $py_version $venv_path 10 | source $venv_path/bin/activate 11 | pip install -q --upgrade setuptools pip 12 | pip install wheel twine pyopenssl 13 | } 14 | 15 | GIT_COMMIT_ID=${1:-""} 16 | [[ -z $GIT_COMMIT_ID ]] && echo "Must provide a commit" && exit 1 17 | SETUP_ARGS="" 18 | if [ "$GIT_COMMIT_ID" = "nightly" ] 19 | then 20 | GIT_COMMIT_ID="master" 21 | SETUP_ARGS="--nightly" 22 | fi 23 | 24 | TMP_DIR=$(mktemp -d) 25 | pushd $TMP_DIR 26 | 27 | echo "Cloning tensorflow/datasets and checking out commit $GIT_COMMIT_ID" 28 | git clone https://github.com/tensorflow/datasets.git 29 | cd datasets 30 | git checkout $GIT_COMMIT_ID 31 | 32 | setup_env python2 33 | 34 | echo "Building source distribution" 35 | python setup.py sdist $SETUP_ARGS 36 | 37 | # Build the wheels 38 | python setup.py bdist_wheel $SETUP_ARGS 39 | setup_env python3 40 | python setup.py bdist_wheel $SETUP_ARGS 41 | 42 | # Publish to PyPI 43 | read -p "Publish? (y/n) " -r 44 | echo 45 | if [[ $REPLY =~ ^[Yy]$ ]] 46 | then 47 | echo "Publishing to PyPI" 48 | twine upload dist/* 49 | else 50 | echo "Skipping upload" 51 | exit 1 52 | fi 53 | 54 | popd 55 | rm -rf $TMP_DIR 56 | -------------------------------------------------------------------------------- /tensorflow_datasets/image/svhn_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tfds.image.svhn.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.image import svhn 23 | from tensorflow_datasets.testing import dataset_builder_testing 24 | 25 | 26 | class SvhnTest(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = svhn.SvhnCropped 28 | SPLITS = { # Number of examples. 29 | "train": 3, 30 | "test": 2, 31 | "extra": 1, 32 | } 33 | DL_EXTRACT_RESULT = { 34 | "train": "train_32x32.mat", 35 | "test": "test_32x32.mat", 36 | "extra": "extra_32x32.mat", 37 | } 38 | 39 | 40 | if __name__ == "__main__": 41 | dataset_builder_testing.main() 42 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/core/SplitInfo.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | # tfds.core.SplitInfo 12 | 13 | ## Class `SplitInfo` 14 | 15 | 16 | 17 | 18 | 19 | Defined in [`core/utils/py_utils.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/utils/py_utils.py). 20 | 21 | Wraps `proto.SplitInfo` with an additional property. 22 | 23 |

__init__

24 | 25 | ``` python 26 | __init__( 27 | *args, 28 | **kwargs 29 | ) 30 | ``` 31 | 32 | 33 | 34 | 35 | 36 | ## Properties 37 | 38 |

num_examples

39 | 40 | 41 | 42 | 43 | 44 | ## Methods 45 | 46 |

__eq__

47 | 48 | ``` python 49 | __eq__(other) 50 | ``` 51 | 52 | 53 | 54 |

__getattr__

55 | 56 | ``` python 57 | __getattr__(attr_name) 58 | ``` 59 | 60 | 61 | 62 |

get_proto

63 | 64 | ``` python 65 | get_proto() 66 | ``` 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/features/text_lib.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: tfds.features.text 7 | 8 | 9 | 10 | Defined in [`core/features/text/__init__.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/features/text/__init__.py). 11 | 12 | Text utilities. 13 | 14 | tfds includes a set of `TextEncoder`s as well as a `Tokenizer` to enable 15 | expressive, performant, and reproducible natural language research. 16 | 17 | ## Classes 18 | 19 | [`class ByteTextEncoder`](../../tfds/features/text/ByteTextEncoder.md): Byte-encodes text. 20 | 21 | [`class SubwordTextEncoder`](../../tfds/features/text/SubwordTextEncoder.md): Invertible `TextEncoder` using word pieces with a byte-level fallback. 22 | 23 | [`class TextEncoder`](../../tfds/features/text/TextEncoder.md): Abstract base class for converting between text and integers. 24 | 25 | [`class TextEncoderConfig`](../../tfds/features/text/TextEncoderConfig.md): Configuration for tfds.features.Text. 26 | 27 | [`class Tokenizer`](../../tfds/features/text/Tokenizer.md): Splits a string into tokens, and joins them back. 28 | 29 | [`class TokenTextEncoder`](../../tfds/features/text/TokenTextEncoder.md): TextEncoder backed by a list of tokens. 30 | 31 | -------------------------------------------------------------------------------- /tensorflow_datasets/image/cifar_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for cifar dataset module.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.image import cifar 23 | from tensorflow_datasets.testing import dataset_builder_testing 24 | 25 | 26 | # testing/cifar.py generates fake input data 27 | 28 | 29 | class Cifar10Test(dataset_builder_testing.TestCase): 30 | DATASET_CLASS = cifar.Cifar10 31 | SPLITS = { 32 | "train": 10, 33 | "test": 2, 34 | } 35 | 36 | 37 | class Cifar100Test(dataset_builder_testing.TestCase): 38 | DATASET_CLASS = cifar.Cifar100 39 | SPLITS = { 40 | "train": 10, 41 | "test": 2, 42 | } 43 | 44 | 45 | if __name__ == "__main__": 46 | dataset_builder_testing.main() 47 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/download.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: tfds.download 7 | 8 | 9 | 10 | Defined in [`core/download/__init__.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/download/__init__.py). 11 | 12 | tfds.download.DownloadManager API. 13 | 14 | ## Classes 15 | 16 | [`class DownloadConfig`](../tfds/download/DownloadConfig.md): Configuration for tfds.core.DatasetBuilder.download_and_prepare. 17 | 18 | [`class DownloadManager`](../tfds/download/DownloadManager.md): Manages the download and extraction of files, as well as caching. 19 | 20 | [`class GenerateMode`](../tfds/download/GenerateMode.md): `Enum` for how to treat pre-existing downloads and data. 21 | 22 | [`class Resource`](../tfds/download/Resource.md): Represents a resource to download, extract, or both. 23 | 24 | [`class ExtractMethod`](../tfds/download/ExtractMethod.md): The extraction method to use to pre-process a downloaded file. 25 | 26 | ## Functions 27 | 28 | [`iter_archive(...)`](../tfds/download/iter_archive.md): Yields (path_in_archive, f_obj) for archive at path using tfds.download.ExtractMethod. 29 | 30 | -------------------------------------------------------------------------------- /tensorflow_datasets/image/diabetic_retinopathy_detection_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for diabetic_retinopathy_detection dataset module.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.image import diabetic_retinopathy_detection 23 | from tensorflow_datasets.testing import dataset_builder_testing 24 | 25 | 26 | class DiabeticRetinopathyDetectionTest(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = diabetic_retinopathy_detection.DiabeticRetinopathyDetection 28 | SPLITS = { # Expected number of examples on each split. 29 | "sample": 4, 30 | "train": 12, 31 | "test": 12, 32 | } 33 | OVERLAPPING_SPLITS = ["sample"] # contains examples from other examples 34 | 35 | 36 | if __name__ == "__main__": 37 | dataset_builder_testing.main() 38 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/lazy_imports_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensorflow_datasets.core.lazy_imports.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from absl.testing import parameterized 23 | import tensorflow as tf 24 | import tensorflow_datasets as tfds 25 | 26 | 27 | class LazyImportsTest(parameterized.TestCase, tf.test.TestCase): 28 | 29 | @parameterized.parameters( 30 | "matplotlib", 31 | "os", 32 | "pydub", 33 | "pyplot", 34 | "scipy", 35 | "scipy_io", 36 | ) 37 | def test_import(self, module_name): 38 | getattr(tfds.core.lazy_imports, module_name) 39 | 40 | def test_bad_import(self): 41 | with self.assertRaisesRegex(ImportError, "extras_require"): 42 | _ = tfds.core.lazy_imports.test_foo 43 | 44 | 45 | if __name__ == "__main__": 46 | tf.test.main() 47 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/core/SplitGenerator.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 |
6 | 7 | # tfds.core.SplitGenerator 8 | 9 | ## Class `SplitGenerator` 10 | 11 | 12 | 13 | 14 | 15 | Defined in [`core/splits.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/splits.py). 16 | 17 | Defines the split information for the generator. 18 | 19 | This should be used as returned value of 20 | `GeneratorBasedBuilder._split_generators`. 21 | See `GeneratorBasedBuilder._split_generators` for more info and example 22 | of usage. 23 | 24 |

__init__

25 | 26 | ``` python 27 | __init__( 28 | name, 29 | num_shards=1, 30 | gen_kwargs=None 31 | ) 32 | ``` 33 | 34 | Constructs a `SplitGenerator`. 35 | 36 | #### Args: 37 | 38 | * `name`: `str` or `list`, name of the Split for which the generator will 39 | create the examples. If a list is given, the generator examples will be 40 | distributed among the splits proportionally to the num_shards. 41 | * `num_shards`: `int` or `list`, number of shards between which the 42 | generated examples will be written. If name is a list, then num_shards 43 | should be a list with the same number of elements. 44 | * `gen_kwargs`: `dict`, kwargs to forward to the _generate_examples() method 45 | of the builder. 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /tensorflow_datasets/image/celeba_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensorflow_datasets.image.celeba.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.image import celeba 23 | from tensorflow_datasets.testing import dataset_builder_testing 24 | 25 | 26 | class CelebATest(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = celeba.CelebA 28 | 29 | SPLITS = { 30 | "train": 3, 31 | "validation": 2, 32 | "test": 1, 33 | } 34 | 35 | DL_EXTRACT_RESULT = { 36 | "img_align_celeba": "", # Code looks into 'img_align_celeba' subdir. 37 | "list_eval_partition": "list_eval_partition.txt", 38 | "list_attr_celeba": "list_attr_celeba.txt", 39 | "landmarks_celeba": "list_landmarks_align_celeba.txt", 40 | } 41 | 42 | 43 | if __name__ == "__main__": 44 | dataset_builder_testing.main() 45 | -------------------------------------------------------------------------------- /tensorflow_datasets/translate/wmt_ende_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for WMT translate dataset module.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.testing import dataset_builder_testing 23 | from tensorflow_datasets.translate import wmt_ende 24 | 25 | 26 | class TranslateEndeWMTTest(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = wmt_ende.WmtTranslateEnde 28 | BUILDER_CONFIG_NAMES_TO_TEST = ["ende_plain_text_t2t", "ende_subwords8k_t2t"] 29 | OVERLAPPING_SPLITS = ["validation"] 30 | 31 | DL_EXTRACT_RESULT = { 32 | "train_0": "nc_v13", 33 | "train_1": "crawl", 34 | "train_2": "europarl", 35 | "dev_0": "validation", 36 | } 37 | 38 | SPLITS = { 39 | "train": 5, 40 | "validation": 2, 41 | } 42 | 43 | 44 | if __name__ == "__main__": 45 | dataset_builder_testing.main() 46 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/features/text/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Text utilities. 17 | 18 | `tfds` includes a set of `TextEncoder`s as well as a `Tokenizer` to enable 19 | expressive, performant, and reproducible natural language research. 20 | """ 21 | 22 | from tensorflow_datasets.core.features.text.subword_text_encoder import SubwordTextEncoder 23 | from tensorflow_datasets.core.features.text.text_encoder import ByteTextEncoder 24 | from tensorflow_datasets.core.features.text.text_encoder import TextEncoder 25 | from tensorflow_datasets.core.features.text.text_encoder import TextEncoderConfig 26 | from tensorflow_datasets.core.features.text.text_encoder import Tokenizer 27 | from tensorflow_datasets.core.features.text.text_encoder import TokenTextEncoder 28 | 29 | __all__ = [ 30 | "ByteTextEncoder", 31 | "SubwordTextEncoder", 32 | "TextEncoder", 33 | "TextEncoderConfig", 34 | "Tokenizer", 35 | "TokenTextEncoder", 36 | ] 37 | -------------------------------------------------------------------------------- /tensorflow_datasets/image/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Image datasets.""" 17 | 18 | from tensorflow_datasets.image.celeba import CelebA 19 | from tensorflow_datasets.image.cifar import Cifar10 20 | from tensorflow_datasets.image.cifar import Cifar100 21 | from tensorflow_datasets.image.coco import Coco2014 22 | from tensorflow_datasets.image.diabetic_retinopathy_detection import DiabeticRetinopathyDetection 23 | from tensorflow_datasets.image.image_folder import ImageLabelFolder 24 | from tensorflow_datasets.image.imagenet import Imagenet2012 25 | from tensorflow_datasets.image.lsun import Lsun 26 | from tensorflow_datasets.image.mnist import FashionMNIST 27 | from tensorflow_datasets.image.mnist import MNIST 28 | from tensorflow_datasets.image.omniglot import Omniglot 29 | from tensorflow_datasets.image.open_images import OpenImagesV4 30 | from tensorflow_datasets.image.quickdraw import QuickdrawBitmap 31 | from tensorflow_datasets.image.svhn import SvhnCropped 32 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/file_adapter/FileFormatAdapter.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 |
8 | 9 | # tfds.file_adapter.FileFormatAdapter 10 | 11 | ## Class `FileFormatAdapter` 12 | 13 | 14 | 15 | 16 | 17 | Defined in [`core/file_format_adapter.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/file_format_adapter.py). 18 | 19 | Provides writing and reading methods for a file format. 20 | 21 | ## Properties 22 | 23 |

filetype_suffix

24 | 25 | Returns a str file type suffix (e.g. "csv"). 26 | 27 | 28 | 29 | ## Methods 30 | 31 |

dataset_from_filename

32 | 33 | ``` python 34 | dataset_from_filename(filename) 35 | ``` 36 | 37 | Returns a `tf.data.Dataset` whose elements are dicts given a filename. 38 | 39 |

write_from_generator

40 | 41 | ``` python 42 | write_from_generator( 43 | generator_fn, 44 | output_files 45 | ) 46 | ``` 47 | 48 | Write to files from generators_and_filenames. 49 | 50 | #### Args: 51 | 52 | * `generator_fn`: returns generator yielding dictionaries of feature name to 53 | value. 54 | * `output_files`: `list`, output files to write records to. 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/fake_examples/diabetic_retinopathy_detection/README.md: -------------------------------------------------------------------------------- 1 | Files in this directory have been created manually. 2 | They exhibit the following properties seen on original dataset: 3 | 4 | - a: notch 5 | 1. none 6 | 2. triangle 7 | 3. round 8 | 4. rectangle 9 | - b: contrast 10 | 1. high 11 | 2. low 12 | - c: crop 13 | 1. larger than retina circle 14 | 2. just around the retina circle 15 | 3. smaller than retina circle 16 | 17 | Common properties: 18 | 19 | - notch is always on the top right side 20 | - retina (circle) is about centered on the picture 21 | 22 | So we have the following mix of images: 23 | - a1, b1, c1: 1_left.jpeg -> sample 24 | - a1, b1, c2: 1_right.jpeg -> sample 25 | - a1, b1, c3: 2_left.jpeg 26 | - a1, b2, c1: 2_right.jpeg 27 | - a1, b2, c2: 3_left.jpeg 28 | - a1, b2, c3: 3_right.jpeg 29 | 30 | - a2, b1, c1: 4_left.jpeg 31 | - a2, b1, c2: 4_right.jpeg 32 | - a2, b1, c3: 5_left.jpeg -> sample 33 | - a2, b2, c1: 5_right.jpeg -> sample 34 | - a2, b2, c2: 6_left.jpeg 35 | - a2, b2, c3: 6_right.jpeg 36 | 37 | - a3, b1, c1: 7_left.jpeg 38 | - a3, b1, c2: 7_right.jpeg 39 | - a3, b1, c3: 8_left.jpeg 40 | - a3, b2, c1: 8_right.jpeg 41 | - a3, b2, c2: 9_left.jpeg 42 | - a3, b2, c3: 9_right.jpeg 43 | 44 | - a4, b1, c1: 10_left.jpeg 45 | - a4, b1, c2: 10_right.jpeg 46 | - a4, b1, c3: 11_left.jpeg 47 | - a4, b2, c1: 11_right.jpeg 48 | - a4, b2, c2: 12_left.jpeg 49 | - a4, b2, c3: 12_right.jpeg 50 | 51 | There are 4 pictures in `sample` split (marked above). All odd number pictures 52 | are in `test` split, all pair number pictures are in `train` split. 53 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/units_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensorflow_datasets.core.units.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow as tf 23 | 24 | from tensorflow_datasets.core import units 25 | 26 | 27 | class UnitsTest(tf.test.TestCase): 28 | 29 | def test_none(self): 30 | self.assertEqual("?? GiB", units.size_str(None)) 31 | 32 | def test_normal_sizes(self): 33 | self.assertEqual("1.50 PiB", units.size_str(1.5 * units.PiB)) 34 | self.assertEqual("1.50 TiB", units.size_str(1.5 * units.TiB)) 35 | self.assertEqual("1.50 GiB", units.size_str(1.5 * units.GiB)) 36 | self.assertEqual("1.50 MiB", units.size_str(1.5 * units.MiB)) 37 | self.assertEqual("1.50 KiB", units.size_str(1.5 * units.KiB)) 38 | 39 | def test_bytes(self): 40 | self.assertEqual("150 bytes", units.size_str(150)) 41 | 42 | 43 | if __name__ == "__main__": 44 | tf.test.main() 45 | -------------------------------------------------------------------------------- /tensorflow_datasets/image/mnist_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for mnist dataset module.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.image import mnist 23 | from tensorflow_datasets.testing import dataset_builder_testing 24 | 25 | 26 | # testing/mnist.py generates fake input data 27 | 28 | mnist._TRAIN_EXAMPLES = 10 29 | mnist._TEST_EXAMPLES = 2 30 | 31 | 32 | class MNISTTest(dataset_builder_testing.TestCase): 33 | DATASET_CLASS = mnist.MNIST 34 | SPLITS = { 35 | "train": 10, 36 | "test": 2, 37 | } 38 | DL_EXTRACT_RESULT = { 39 | "train_data": "train-image", 40 | "train_labels": "train-label", 41 | "test_data": "test-image", 42 | "test_labels": "test-label", 43 | } 44 | 45 | 46 | class FashionMNISTTest(MNISTTest): 47 | DATASET_CLASS = mnist.FashionMNIST 48 | 49 | 50 | if __name__ == "__main__": 51 | dataset_builder_testing.main() 52 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_data/lorem_ipsum_zh.txt: -------------------------------------------------------------------------------- 1 | 表子市案成先写走提増形予。業知今加例認中変属棋旅崔設潮一郎夜両。料村表護立導戦優報合江定特度必。意自官健英問衛内今薄解掲隣通配国。企於仕税差演際集界場臼夕聞齢転関渡体写。聴特洗来広仙特様出被政惑益南忘了報。価助第遣真毎誠最白新選名野選実暮。会街棋級本辞再園済演好試来紀者宣記健係。意部変毎比年速文朝東約露加覚。 2 | 3 | 加索更世実主女暮断速現知東図春軍部真付。税対辞無記放舞京検勝示子率誕。衝数質療華皇米重合陽収季春毎応厳謝並先止。療券財形変事公平語図次半玉質就席掲必歴見。掲族前中示滋旧伊官安動年間朝目様魅。債鳴畑線親別白郎毎船漠重同型。別卒提庫図横乗団遣交面要応安下生。信記点間新回撲質提図殺画暗。約報苦奏勝申物沖周王万闇前爺暮。 4 | 5 | 号阪観生栄阜規生国必話写。値山線料料委六責備映県市変毎過握。勢辞堀績今鳴朗民岐文統聞満需利京金藤。検地業容宇椅野点断市揮息。職域芸批後意変線企谷何点済先無要加迎指。森業真案害宇友経車拝仕章寺発終芸除。米唐案財来女脱低前済面騰管棋選購事試切。枝科顕原次整道作向米来誉軽録格住回応下。瓶岸命転測顧車把争祉響図。 6 | 7 | 渡水第相済歌野問申前泉回一芸百並掲作点戦。教貯始接駅死足戦任達化郎刺愛一。子限投芸臨憲野皇天平提術再神来下同必覧集。星知芸台熊発場康色都扱助文巨響級社身。解植備迫深身送堀苦東参作告村脱犯断著歳。悪受題栃検子現川事識青子南止短視北車車状。受渡写天現任日分続応味談貞溶午。若平低野郎海関問界社更木展下兆組事厳。 8 | 9 | 怡載査検突能湾価更竹駐有。反資文障測異込報注止合務舗政。横場健川共合紙位索果腔見。竹速視再以録氷円国縄鹿二挙幼属納年経。発子政無回済場散治回録部方土。造被更弾愛所教自当愛見族有出関。読勢将配断販化辞必思都芸掲猿博。来体給話藤細覧隆禎答広大任講。勝全電害区闘好監会断採治決検専診安別。帯発篤覧部要垣就使軽現写抱別規治。 10 | 11 | 多研弟題殖中題講点気化福古登。身日堀木際男守果負夫口誌書分陣者要術体。歌更応県事著名験市円考蕗鹿糠。放社中竹完遺集親検民凝必出件在六痴湾。素液塁活支逮配毅入用夢都情改線謎寄外替賞。参本破長計期芸画集消維情大独。載記信行神氏父経説報日位正水節産国。文走展含人提韓会関江賞彦腎。問氷強裁之直雰本中都構賞。 12 | 13 | 念会軽容学世四聞検了乳賀起賞。碁連極遅良竹者歳興委防皇営職読本問能野。望愛票毎森議校手真猪党家外南間写検国。協視更新情帽出対化報急告売帳料。酢代作北約般気参容託竹目権日。上対請安猛石庫規声球世作。棋能方田航子平約相福労度健羽社明。流方写最萩能気著提地影重媛産案火。一続者能魅紅月祖戦部動況西好汚。 14 | 15 | 待新松程教込不銀写国歯界単。本革良年査聞国間談感高館読納辺率証。東格連査中時生能田田養表質。旅質投他今込真風幅東請対出。近換梨選覧社員種過雅先疑記部次。殖気年隆次討安禁白今要九経要後。容途色解再操月移屋下意提価恵集。関前速万埼詰真言市州養婦必集会様帰青学玉。際佳独避新良初開際覧代目新大当門裁務散送。 16 | 17 | 新和報広供投右神止港韓質更明信読。能思画本著福歓挙状継広経詳交警押策。時鳴児金意大宝策件校二音真景化。貸率製会充初最岳違減能講活充的川。自廊安記調果成愛統晴曲難抜第奇原。根光訴者無創能店面監育車。料邸投賞禁必都利児優親容社組交援状回試。頼者教未量雪発者破属家無然。部側聞特少主円続議論小香智氷岐悩育録導。 18 | 19 | 参強真溝本時方学旅石員政。棄供請盆絵就局神不現市済一間。像告掲者記党並先側供転崎主年養案周安。部瀞馬位掲一問登世感意禁保力業今賀時購。松表土教熊送維著転表与旅西競単故通逆。投休頬健禁職表者月社営侍聞奈空打匹気望。判車現済唆元七主援百趣回。係豊高求供外月高当容百触文。談勢紙描販万繊武載献免鶴。端意端規旬毎務完号亡当業顕。 20 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/builder.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # tfds.builder 7 | 8 | ``` python 9 | tfds.builder( 10 | name, 11 | **builder_init_kwargs 12 | ) 13 | ``` 14 | 15 | 16 | 17 | Defined in [`core/registered.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/registered.py). 18 | 19 | Fetches a tfds.core.DatasetBuilder by string name. 20 | 21 | #### Args: 22 | 23 | * `name`: `str`, the registered name of the `DatasetBuilder` (the snake case 24 | version of the class name). This can be either `"dataset_name"` or 25 | `"dataset_name/config_name"` for datasets with `BuilderConfig`s. 26 | As a convenience, this string may contain comma-separated keyword 27 | arguments for the builder. For example `"foo_bar/a=True,b=3"` would use 28 | the `FooBar` dataset passing the keyword arguments `a=True` and `b=3` 29 | (for builders with configs, it would be `"foo_bar/zoo/a=True,b=3"` to 30 | use the `"zoo"` config and pass to the builder keyword arguments `a=True` 31 | and `b=3`). 32 | * `**builder_init_kwargs`: `dict` of keyword arguments passed to the 33 | `DatasetBuilder`. These will override keyword arguments passed in `name`, 34 | if any. 35 | 36 | 37 | #### Returns: 38 | 39 | A tfds.core.DatasetBuilder. 40 | 41 | 42 | #### Raises: 43 | 44 | * `DatasetNotFoundError`: if `name` is unrecognized. -------------------------------------------------------------------------------- /tensorflow_datasets/testing/test_case.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Base TestCase to use test_data.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | import tempfile 24 | 25 | from absl.testing import absltest 26 | import tensorflow as tf 27 | 28 | 29 | class TestCase(absltest.TestCase): 30 | """Base TestCase for tests using test_data or tmp_dir. 31 | 32 | `test_data` class attribute: path to the directory with test data. 33 | `tmp_dir` attribute: path to temp directory reset before every test. 34 | """ 35 | 36 | @classmethod 37 | def setUpClass(cls): # pylint: disable=g-missing-super-call 38 | cls.test_data = os.path.join(os.path.dirname(__file__), "test_data") 39 | 40 | def setUp(self): 41 | super(TestCase, self).setUp() 42 | # get_temp_dir is actually the same for all tests, so create a temp sub-dir. 43 | self.tmp_dir = tempfile.mkdtemp(dir=tf.compat.v1.test.get_temp_dir()) 44 | 45 | 46 | main = tf.test.main 47 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/download/GenerateMode.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | 10 | # tfds.download.GenerateMode 11 | 12 | ## Class `GenerateMode` 13 | 14 | 15 | 16 | ### Aliases: 17 | 18 | * Class `tfds.GenerateMode` 19 | * Class `tfds.download.GenerateMode` 20 | 21 | 22 | 23 | Defined in [`core/download/util.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/download/util.py). 24 | 25 | `Enum` for how to treat pre-existing downloads and data. 26 | 27 | The default mode is `REUSE_DATASET_IF_EXISTS`, which will reuse both 28 | raw downloads and the prepared dataset if they exist. 29 | 30 | The generations modes: 31 | 32 | | | Downloads | Dataset | 33 | | -----------------------------------|-----------|---------| 34 | | `REUSE_DATASET_IF_EXISTS` (default)| Reuse | Reuse | 35 | | `REUSE_CACHE_IF_EXISTS` | Reuse | Fresh | 36 | | `FORCE_REDOWNLOAD` | Fresh | Fresh | 37 | 38 | ## Class Members 39 | 40 |

FORCE_REDOWNLOAD

41 | 42 |

REUSE_CACHE_IF_EXISTS

43 | 44 |

REUSE_DATASET_IF_EXISTS

45 | 46 |

__members__

47 | 48 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/e2e_binary.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | r"""Binary exercising critical workflow of tensorflow datasets. 17 | 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | 24 | from absl import app 25 | import tensorflow as tf 26 | import tensorflow_datasets as tfds 27 | 28 | tf.enable_eager_execution() 29 | 30 | 31 | def main(argv): 32 | del argv 33 | mnist, info = tfds.load('mnist', with_info=True) 34 | print(mnist, info) 35 | mnist_train = tfds.load('mnist', split='train') 36 | print(mnist_train) 37 | mnist_subsplit = tfds.Split.TRAIN.subsplit(tfds.percent[:10]) 38 | mnist_train2 = tfds.load('mnist', split=mnist_subsplit) 39 | print(mnist_train2) 40 | for i, unused_row in enumerate(mnist_train2): 41 | if i > 10: 42 | break 43 | print(i) 44 | cifar10, info = tfds.load('cifar10', with_info=True) 45 | print(cifar10, info) 46 | cifar10_np = tfds.as_numpy(cifar10) 47 | print(cifar10_np) 48 | 49 | 50 | if __name__ == '__main__': 51 | app.run(main) 52 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/core.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: tfds.core 7 | 8 | 9 | 10 | Defined in [`core/__init__.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/__init__.py). 11 | 12 | API to define datasets. 13 | 14 | ## Classes 15 | 16 | [`class lazy_imports`](../tfds/core/lazy_imports.md): Lazy importer for heavy dependencies. 17 | 18 | [`class BuilderConfig`](../tfds/core/BuilderConfig.md): Base class for `DatasetBuilder` data configuration. 19 | 20 | [`class DatasetBuilder`](../tfds/core/DatasetBuilder.md): Abstract base class for all datasets. 21 | 22 | [`class GeneratorBasedBuilder`](../tfds/core/GeneratorBasedBuilder.md): Base class for datasets with data generation based on dict generators. 23 | 24 | [`class DatasetInfo`](../tfds/core/DatasetInfo.md): Information about a dataset. 25 | 26 | [`class NamedSplit`](../tfds/core/NamedSplit.md): Descriptor corresponding to a named split (train, test, ...). 27 | 28 | [`class SplitBase`](../tfds/core/SplitBase.md): Abstract base class for Split compositionality. 29 | 30 | [`class SplitDict`](../tfds/core/SplitDict.md): Split info object. 31 | 32 | [`class SplitGenerator`](../tfds/core/SplitGenerator.md): Defines the split information for the generator. 33 | 34 | [`class SplitInfo`](../tfds/core/SplitInfo.md): Wraps `proto.SplitInfo` with an additional property. 35 | 36 | [`class Version`](../tfds/core/Version.md): Dataset version MAJOR.MINOR.PATCH. 37 | 38 | ## Functions 39 | 40 | [`get_tfds_path(...)`](../tfds/core/get_tfds_path.md): Returns absolute path to file given path relative to tfds root. 41 | 42 | -------------------------------------------------------------------------------- /tensorflow_datasets/image/image_folder_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for cifar dataset module.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import functools 23 | 24 | from tensorflow_datasets.core import registered 25 | from tensorflow_datasets.image import image_folder 26 | from tensorflow_datasets.testing import dataset_builder_testing 27 | 28 | 29 | class ImageLabelFolderTest(dataset_builder_testing.TestCase): 30 | DATASET_CLASS = functools.partial( 31 | image_folder.ImageLabelFolder, dataset_name="image_folder_data") 32 | # The above construct forces us to disable those checks: 33 | MOCK_OUT_FORBIDDEN_OS_FUNCTIONS = False 34 | SPLITS = { 35 | "train": 2, # Number of examples. 36 | "test": 6, 37 | } 38 | 39 | def test_info(self): 40 | pass 41 | 42 | def test_registered(self): 43 | self.assertIn("image_label_folder", registered.list_builders(), 44 | "Dataset was not registered.") 45 | 46 | 47 | if __name__ == "__main__": 48 | dataset_builder_testing.main() 49 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/download/DownloadConfig.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 |
6 | 7 | # tfds.download.DownloadConfig 8 | 9 | ## Class `DownloadConfig` 10 | 11 | 12 | 13 | 14 | 15 | Defined in [`core/download/download_manager.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/download/download_manager.py). 16 | 17 | Configuration for tfds.core.DatasetBuilder.download_and_prepare. 18 | 19 |

__init__

20 | 21 | ``` python 22 | __init__( 23 | extract_dir=None, 24 | manual_dir=None, 25 | download_mode=None, 26 | compute_stats=True, 27 | max_examples_per_split=None 28 | ) 29 | ``` 30 | 31 | Constructs a `DownloadConfig`. 32 | 33 | #### Args: 34 | 35 | * `extract_dir`: `str`, directory where extracted files are stored. 36 | Defaults to "/extracted". 37 | * `manual_dir`: `str`, read-only directory where manually downloaded/extracted 38 | data is stored. Defaults to 39 | "/manual". 40 | * `download_mode`: tfds.GenerateMode, how to deal with downloads or data 41 | that already exists. Defaults to `REUSE_DATASET_IF_EXISTS`, which will 42 | reuse both downloads and data if it already exists. 43 | * `compute_stats`: `bool`, whether to compute statistics over the generated 44 | data. 45 | * `max_examples_per_split`: `int`, optional max number of examples to write 46 | into each split. 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """API to define datasets.""" 17 | 18 | from tensorflow_datasets.core.dataset_builder import BuilderConfig 19 | from tensorflow_datasets.core.dataset_builder import DatasetBuilder 20 | from tensorflow_datasets.core.dataset_builder import GeneratorBasedBuilder 21 | 22 | from tensorflow_datasets.core.dataset_info import DatasetInfo 23 | 24 | from tensorflow_datasets.core.lazy_imports import lazy_imports 25 | 26 | from tensorflow_datasets.core.splits import NamedSplit 27 | from tensorflow_datasets.core.splits import SplitBase 28 | from tensorflow_datasets.core.splits import SplitDict 29 | from tensorflow_datasets.core.splits import SplitGenerator 30 | from tensorflow_datasets.core.splits import SplitInfo 31 | from tensorflow_datasets.core.utils import get_tfds_path 32 | from tensorflow_datasets.core.utils import Version 33 | 34 | __all__ = [ 35 | "lazy_imports", 36 | "BuilderConfig", 37 | "DatasetBuilder", 38 | "GeneratorBasedBuilder", 39 | "get_tfds_path", 40 | "DatasetInfo", 41 | "NamedSplit", 42 | "SplitBase", 43 | "SplitDict", 44 | "SplitGenerator", 45 | "SplitInfo", 46 | "Version", 47 | ] 48 | -------------------------------------------------------------------------------- /tensorflow_datasets/public_api.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Public API of tfds, without the registered dataset.""" 17 | 18 | # pylint: disable=unused-import 19 | 20 | from tensorflow_datasets import core 21 | from tensorflow_datasets.core import download 22 | from tensorflow_datasets.core import features 23 | from tensorflow_datasets.core import file_format_adapter as file_adapter 24 | from tensorflow_datasets.core import units 25 | from tensorflow_datasets.core.dataset_utils import as_numpy 26 | from tensorflow_datasets.core.dataset_utils import dataset_as_numpy 27 | from tensorflow_datasets.core.download import GenerateMode 28 | from tensorflow_datasets.core.registered import builder 29 | from tensorflow_datasets.core.registered import list_builders 30 | from tensorflow_datasets.core.registered import load 31 | from tensorflow_datasets.core.splits import percent 32 | from tensorflow_datasets.core.splits import Split 33 | 34 | __all__ = [ 35 | "core", 36 | "as_numpy", 37 | "dataset_as_numpy", 38 | "download", 39 | "features", 40 | "file_adapter", 41 | "units", 42 | "GenerateMode", 43 | "builder", 44 | "list_builders", 45 | "load", 46 | "percent", 47 | "Split", 48 | ] 49 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Utility library to generate dataset-like files.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import random 23 | import tempfile 24 | 25 | import numpy as np 26 | import tensorflow as tf 27 | 28 | from tensorflow_datasets.core import utils 29 | 30 | MIN_HEIGHT_WIDTH = 10 31 | MAX_HEIGHT_WIDTH = 15 32 | CHANNELS_NB = 3 33 | 34 | 35 | def get_random_picture(height=None, width=None): 36 | """Returns random picture as np.ndarray (int).""" 37 | height = random.randrange(MIN_HEIGHT_WIDTH, MAX_HEIGHT_WIDTH) 38 | width = random.randrange(MIN_HEIGHT_WIDTH, MAX_HEIGHT_WIDTH) 39 | return np.random.randint( 40 | 256, size=(height, width, CHANNELS_NB), dtype=np.uint8) 41 | 42 | 43 | def get_random_jpeg(height=None, width=None): 44 | """Returns path to JPEG picture.""" 45 | image = get_random_picture(height, width) 46 | jpeg = tf.image.encode_jpeg(image) 47 | with utils.nogpu_session() as sess: 48 | res = sess.run(jpeg) 49 | fobj = tempfile.NamedTemporaryFile(delete=False, mode='wb', suffix='.JPEG') 50 | fobj.write(res) 51 | fobj.close() 52 | return fobj.name 53 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/features/bounding_boxes_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for bounding_boxes.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow as tf 23 | 24 | from tensorflow_datasets.core import features 25 | from tensorflow_datasets.core import test_utils 26 | 27 | tf.compat.v1.enable_eager_execution() 28 | 29 | 30 | class BBoxFeatureTest(test_utils.FeatureExpectationsTestCase): 31 | 32 | @property 33 | def expectations(self): 34 | 35 | return [ 36 | test_utils.FeatureExpectation( 37 | name='bbox', 38 | feature=features.BBoxFeature(), 39 | shape=(4,), 40 | dtype=tf.float32, 41 | tests=[ 42 | # Numpy array 43 | test_utils.FeatureExpectationItem( 44 | value=features.BBox( 45 | ymin=0.0, 46 | xmin=0.25, 47 | ymax=1.0, 48 | xmax=0.75, 49 | ), 50 | expected=[0.0, 0.25, 1.0, 0.75], 51 | ), 52 | ], 53 | ), 54 | ] 55 | 56 | 57 | if __name__ == '__main__': 58 | tf.test.main() 59 | -------------------------------------------------------------------------------- /docs/_index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "gQskE9NgL-ZB" 8 | }, 9 | "source": [ 10 | "Copyright 2018 The TensorFlow Datasets Authors, Licensed under the Apache License, Version 2.0" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 0, 16 | "metadata": { 17 | "colab": {}, 18 | "colab_type": "code", 19 | "id": "RPo1Cw2p83pb" 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "!pip install -q tfds-nightly tf-nightly" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 0, 29 | "metadata": { 30 | "colab": {}, 31 | "colab_type": "code", 32 | "id": "S-RWB9G48uJA" 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "import tensorflow as tf\n", 37 | "import tensorflow_datasets as tfds\n", 38 | "\n", 39 | "# tfds works in both Eager and Graph modes\n", 40 | "tf.enable_eager_execution()\n", 41 | "\n", 42 | "# See available datasets\n", 43 | "print(tfds.list_builders())\n", 44 | "\n", 45 | "# Construct a tf.data.Dataset\n", 46 | "dataset = tfds.load(name=\"mnist\", split=tfds.Split.TRAIN)\n", 47 | "\n", 48 | "# Build your input pipeline\n", 49 | "dataset = dataset.shuffle(1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)\n", 50 | "for features in dataset.take(1):\n", 51 | " image, label = features[\"image\"], features[\"label\"]" 52 | ] 53 | } 54 | ], 55 | "metadata": { 56 | "colab": { 57 | "collapsed_sections": [], 58 | "name": "tensorflow/datasets", 59 | "provenance": [], 60 | "version": "0.3.2" 61 | } 62 | }, 63 | "nbformat": 4, 64 | "nbformat_minor": 0 65 | } 66 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/units.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Defines convenience constants/functions for converting various units.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | # The constants below are used for conveniently defining memory quantities. 23 | # pylint: disable=invalid-name 24 | KiB = 2**10 25 | MiB = 2**20 26 | GiB = 2**30 27 | TiB = 2**40 28 | PiB = 2**50 29 | 30 | _NAME_LIST = [("PiB", PiB), ("TiB", TiB), ("GiB", GiB), ("MiB", MiB), 31 | ("KiB", KiB)] 32 | 33 | 34 | def size_str(size_in_bytes): 35 | """Returns a human readable size string. 36 | 37 | If size_in_bytes is None, then returns "?? GiB". 38 | 39 | For example `size_str(1.5 * tfds.units.GiB) == "1.50 GiB"`. 40 | 41 | Args: 42 | size_in_bytes: `int` or `None`, the size, in bytes, that we want to 43 | format as a human-readable size string. 44 | """ 45 | if not size_in_bytes: 46 | return "?? GiB" 47 | 48 | size_in_bytes = float(size_in_bytes) 49 | for (name, size_bytes) in _NAME_LIST: 50 | value = size_in_bytes / size_bytes 51 | if value >= 1.0: 52 | return "{:.2f} {}".format(value, name) 53 | return "{} {}".format(int(size_in_bytes), "bytes") 54 | 55 | 56 | # pylint: enable=invalid-name 57 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | ## Datasets 4 | 5 | Adding a public dataset to `tensorflow_datasets` is a great way of making it 6 | more accessible to the TensorFlow community. 7 | 8 | See our 9 | [Add a dataset doc](https://github.com/tensorflow/datasets/tree/master/docs/add_dataset.md) 10 | to learn how to add a dataset. 11 | 12 | ## Docstrings 13 | 14 | Methods and classes should have clear and complete docstrings. 15 | Most methods (and all publicly-facing API methods) should have an `Args:` 16 | section that documents the name, type, and description of each argument. 17 | Argument lines should be formatted as 18 | `` arg_name: (`arg_type`) Description of arg. `` 19 | 20 | References to `tfds` methods or classes within a docstring should go in 21 | backticks and use the publicly accessible path to that symbol. For example 22 | `` `tfds.core.DatasetBuilder` ``. 23 | Doing so ensures that the API documentation will insert a link to the 24 | documentation for that symbol. 25 | 26 | # Pull Requests 27 | 28 | All contributions are done through Pull Requests here on GitHub. 29 | 30 | ## Contributor License Agreement 31 | 32 | Contributions to this project must be accompanied by a Contributor License 33 | Agreement. You (or your employer) retain the copyright to your contribution, 34 | this simply gives us permission to use and redistribute your contributions as 35 | part of the project. Head over to to see 36 | your current agreements on file or to sign a new one. 37 | 38 | You generally only need to submit a CLA once, so if you've already submitted one 39 | (even if it was for a different project), you probably don't need to do it 40 | again. 41 | 42 | ## Code reviews 43 | 44 | All submissions, including submissions by project members, require review. We 45 | use GitHub pull requests for this purpose. Consult 46 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 47 | information on using pull requests. 48 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/features/video_feature_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensorflow_datasets.core.features.video_feature.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import numpy as np 23 | import tensorflow as tf 24 | 25 | from tensorflow_datasets.core import features 26 | from tensorflow_datasets.core import test_utils 27 | 28 | tf.compat.v1.enable_eager_execution() 29 | 30 | 31 | class VideoFeatureTest(test_utils.FeatureExpectationsTestCase): 32 | 33 | @property 34 | def expectations(self): 35 | 36 | np_video = np.random.randint(256, size=(128, 64, 64, 3), dtype=np.uint8) 37 | 38 | return [ 39 | test_utils.FeatureExpectation( 40 | name='video', 41 | feature=features.Video(shape=(None, 64, 64, 3)), 42 | shape=(None, 64, 64, 3), 43 | dtype=tf.uint8, 44 | tests=[ 45 | # Numpy array 46 | test_utils.FeatureExpectationItem( 47 | value=np_video, 48 | expected=np_video, 49 | ), 50 | # File path (Gif) 51 | # File path (.mp4) 52 | ], 53 | ), 54 | ] 55 | 56 | 57 | if __name__ == '__main__': 58 | tf.test.main() 59 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/Split.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | # tfds.Split 12 | 13 | ## Class `Split` 14 | 15 | 16 | 17 | 18 | 19 | Defined in [`core/splits.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/splits.py). 20 | 21 | `Enum` for dataset splits. 22 | 23 | Datasets are typically split into different subsets to be used at various 24 | stages of training and evaluation. 25 | 26 | * `TRAIN`: the training data. 27 | * `VALIDATION`: the validation data. If present, this is typically used as 28 | evaluation data while iterating on a model (e.g. changing hyperparameters, 29 | model architecture, etc.). 30 | * `TEST`: the testing data. This is the data to report metrics on. Typically 31 | you do not want to use this during model iteration as you may overfit to it. 32 | * `ALL`: Special value corresponding to all existing splits of a dataset 33 | merged together 34 | 35 | Note: All splits, including compositions inherit from tfds.core.SplitBase 36 | 37 | See the 38 | [guide on splits](https://github.com/tensorflow/datasets/tree/master/docs/splits.md) 39 | for more information. 40 | 41 |

__new__

42 | 43 | ``` python 44 | @staticmethod 45 | __new__( 46 | cls, 47 | name 48 | ) 49 | ``` 50 | 51 | Create a custom split with tfds.Split('custom_name'). 52 | 53 | 54 | 55 | ## Class Members 56 | 57 |

ALL

58 | 59 |

TEST

60 | 61 |

TRAIN

62 | 63 |

VALIDATION

64 | 65 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/file_adapter/TFRecordExampleAdapter.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | 10 | # tfds.file_adapter.TFRecordExampleAdapter 11 | 12 | ## Class `TFRecordExampleAdapter` 13 | 14 | Inherits From: [`FileFormatAdapter`](../../tfds/file_adapter/FileFormatAdapter.md) 15 | 16 | 17 | 18 | Defined in [`core/file_format_adapter.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/file_format_adapter.py). 19 | 20 | Writes/Reads serialized Examples protos to/from TFRecord files. 21 | 22 | Constraints on generators: 23 | 24 | * The generator must yield feature dictionaries (`dict`). 26 | * The allowed feature types are `int`, `float`, and `str` (or `bytes` in 27 | Python 3; `unicode` strings will be encoded in `utf-8`), or lists thereof. 28 | 29 |

__init__

30 | 31 | ``` python 32 | __init__(example_reading_spec) 33 | ``` 34 | 35 | Construcs a TFRecordExampleAdapter. 36 | 37 | #### Args: 38 | 39 | example_reading_spec (dict): feature name to tf.FixedLenFeature or 40 | tf.VarLenFeature. Passed to tf.parse_single_example. 41 | 42 | 43 | 44 | ## Properties 45 | 46 |

filetype_suffix

47 | 48 | 49 | 50 | 51 | 52 | ## Methods 53 | 54 |

dataset_from_filename

55 | 56 | ``` python 57 | dataset_from_filename(filename) 58 | ``` 59 | 60 | 61 | 62 |

write_from_generator

63 | 64 | ``` python 65 | write_from_generator( 66 | generator_fn, 67 | output_files 68 | ) 69 | ``` 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/lsun.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tool for putting given images into a single lmdb database. 17 | 18 | This is the format that's used by LSUN dataset (it also 19 | uses webP encoded images inside). 20 | 21 | To generate the example dataset: 22 | 23 | python lsun.py --input_files=test_data/lsun_examples/1.webp, 24 | test_data/lsun_examples/2.webp, 25 | test_data/lsun_examples/3.webp 26 | --output_file=/tmp/lsun/train 27 | 28 | 29 | """ 30 | 31 | from __future__ import absolute_import 32 | from __future__ import division 33 | from __future__ import print_function 34 | 35 | from absl import app 36 | from absl import flags 37 | import lmdb 38 | import tensorflow as tf 39 | 40 | FLAGS = flags.FLAGS 41 | 42 | flags.DEFINE_string("input_files", None, 43 | "Comma separated list of files to put into the database.") 44 | flags.DEFINE_string("output_file", None, "Path to the output file.") 45 | 46 | 47 | def main(argv): 48 | if len(argv) > 1: 49 | raise tf.app.UsageError("Too many command-line arguments.") 50 | 51 | db = lmdb.open(FLAGS.output_file) 52 | with db.begin(write=True) as txn: 53 | for index, path in enumerate(FLAGS.input_files.split(",")): 54 | data = tf.io.gfile.GFile(path, "rb").read() 55 | txn.put(str(index), data) 56 | 57 | 58 | if __name__ == "__main__": 59 | app.run(main) 60 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/features/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """`tfds.features.FeatureConnector` API defining feature types.""" 17 | 18 | from tensorflow_datasets.core.features import text 19 | 20 | from tensorflow_datasets.core.features.audio_feature import Audio 21 | from tensorflow_datasets.core.features.bounding_boxes import BBox 22 | from tensorflow_datasets.core.features.bounding_boxes import BBoxFeature 23 | from tensorflow_datasets.core.features.class_label_feature import ClassLabel 24 | from tensorflow_datasets.core.features.feature import FeatureConnector 25 | from tensorflow_datasets.core.features.feature import FeaturesDict 26 | from tensorflow_datasets.core.features.feature import Tensor 27 | from tensorflow_datasets.core.features.feature import TensorInfo 28 | from tensorflow_datasets.core.features.image_feature import Image 29 | from tensorflow_datasets.core.features.sequence_feature import Sequence 30 | from tensorflow_datasets.core.features.sequence_feature import SequenceDict 31 | from tensorflow_datasets.core.features.text_feature import Text 32 | from tensorflow_datasets.core.features.video_feature import Video 33 | 34 | __all__ = [ 35 | "text", 36 | "Audio", 37 | "BBox", 38 | "BBoxFeature", 39 | "ClassLabel", 40 | "FeatureConnector", 41 | "FeaturesDict", 42 | "Tensor", 43 | "TensorInfo", 44 | "Sequence", 45 | "SequenceDict", 46 | "Image", 47 | "Text", 48 | "Video", 49 | ] 50 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/features.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |
5 | 6 | # Module: tfds.features 7 | 8 | 9 | 10 | Defined in [`core/features/__init__.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/features/__init__.py). 11 | 12 | tfds.features.FeatureConnector API defining feature types. 13 | 14 | ## Modules 15 | 16 | [`text`](../tfds/features/text.md) module: Text utilities. 17 | 18 | ## Classes 19 | 20 | [`class Audio`](../tfds/features/Audio.md): `FeatureConnector` for audio, encoded as raw integer wave form. 21 | 22 | [`class BBox`](../tfds/features/BBox.md): BBox(ymin, xmin, ymax, xmax) 23 | 24 | [`class BBoxFeature`](../tfds/features/BBoxFeature.md): `FeatureConnector` for a normalized bounding box. 25 | 26 | [`class ClassLabel`](../tfds/features/ClassLabel.md): `FeatureConnector` for integer class labels. 27 | 28 | [`class FeatureConnector`](../tfds/features/FeatureConnector.md): Abstract base class for feature types. 29 | 30 | [`class FeaturesDict`](../tfds/features/FeaturesDict.md): Composite `FeatureConnector`; each feature in `dict` has its own connector. 31 | 32 | [`class Tensor`](../tfds/features/Tensor.md): `FeatureConnector` for generic data of arbitrary shape and type. 33 | 34 | [`class TensorInfo`](../tfds/features/TensorInfo.md): TensorInfo(shape, dtype) 35 | 36 | [`class Sequence`](../tfds/features/Sequence.md): Similar to `tfds.featuresSequenceDict`, but only contains a single feature. 37 | 38 | [`class SequenceDict`](../tfds/features/SequenceDict.md): Composite `FeatureConnector` for a `dict` where each value is a list. 39 | 40 | [`class Image`](../tfds/features/Image.md): `FeatureConnector` for images. 41 | 42 | [`class Text`](../tfds/features/Text.md): `FeatureConnector` for text, encoding to integers with a `TextEncoder`. 43 | 44 | [`class Video`](../tfds/features/Video.md): `FeatureConnector` for videos, png-encoding frames on disk. 45 | 46 | -------------------------------------------------------------------------------- /tensorflow_datasets/video/starcraft_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for starcraft video dataset.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.testing import dataset_builder_testing 23 | from tensorflow_datasets.video import starcraft 24 | 25 | 26 | class StarcraftVideoDatasetTest(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = starcraft.StarcraftVideo 28 | BUILDER_CONFIG_NAMES_TO_TEST = ["brawl_64"] 29 | 30 | DL_EXTRACT_RESULT = { 31 | "valid": "valid.tfrecords", 32 | "test": "test.tfrecords", 33 | "train_0": "train_0.tfrecords", 34 | "train_1": "train_1.tfrecords" 35 | } 36 | 37 | SPLITS = { 38 | "train": 2, 39 | "test": 1, 40 | "validation": 1, 41 | } 42 | 43 | 44 | class StarcraftVideoDataset128Test(dataset_builder_testing.TestCase): 45 | """Separate test to cover the 128x128 resolution videos.""" 46 | DATASET_CLASS = starcraft.StarcraftVideo 47 | BUILDER_CONFIG_NAMES_TO_TEST = ["brawl_128"] 48 | 49 | DL_EXTRACT_RESULT = { 50 | "valid": "128_valid.tfrecords", 51 | "test": "128_test.tfrecords", 52 | "train_0": "128_train_0.tfrecords", 53 | "train_1": "128_train_1.tfrecords" 54 | } 55 | 56 | SPLITS = { 57 | "train": 2, 58 | "test": 1, 59 | "validation": 1, 60 | } 61 | 62 | 63 | if __name__ == "__main__": 64 | dataset_builder_testing.main() 65 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/features/text/ByteTextEncoder.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | # tfds.features.text.ByteTextEncoder 14 | 15 | ## Class `ByteTextEncoder` 16 | 17 | Inherits From: [`TextEncoder`](../../../tfds/features/text/TextEncoder.md) 18 | 19 | 20 | 21 | Defined in [`core/features/text/text_encoder.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/features/text/text_encoder.py). 22 | 23 | Byte-encodes text. 24 | 25 |

__init__

26 | 27 | ``` python 28 | __init__(additional_tokens=None) 29 | ``` 30 | 31 | Constructs ByteTextEncoder. 32 | 33 | #### Args: 34 | 35 | * `additional_tokens`: `list`, list of additional tokens. These will be 36 | assigned vocab ids `[1, 1+len(additional_tokens)]`. Useful for things 37 | like "end-of-string" tokens (e.g. ""). 38 | 39 | 40 | 41 | ## Properties 42 | 43 |

additional_tokens

44 | 45 | 46 | 47 |

vocab_size

48 | 49 | 50 | 51 | 52 | 53 | ## Methods 54 | 55 |

decode

56 | 57 | ``` python 58 | decode(ids) 59 | ``` 60 | 61 | 62 | 63 |

encode

64 | 65 | ``` python 66 | encode(s) 67 | ``` 68 | 69 | 70 | 71 |

load_from_file

72 | 73 | ``` python 74 | @classmethod 75 | load_from_file( 76 | cls, 77 | filename_prefix 78 | ) 79 | ``` 80 | 81 | 82 | 83 |

save_to_file

84 | 85 | ``` python 86 | save_to_file(filename_prefix) 87 | ``` 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/features/text/TextEncoder.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | # tfds.features.text.TextEncoder 12 | 13 | ## Class `TextEncoder` 14 | 15 | 16 | 17 | 18 | 19 | Defined in [`core/features/text/text_encoder.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/features/text/text_encoder.py). 20 | 21 | Abstract base class for converting between text and integers. 22 | 23 | **A note on padding**: 24 | 25 | Because text data is typically variable length and nearly always requires 26 | padding during training, ID 0 is always reserved for padding. To accommodate 27 | this, all `TextEncoder`s behave in certain ways: 28 | 29 | * `encode`: never returns id 0 (all ids are 1+) 30 | * `decode`: drops 0 in the input ids 31 | * `vocab_size`: includes ID 0 32 | 33 | New subclasses should be careful to match this behavior. 34 | 35 | ## Properties 36 | 37 |

vocab_size

38 | 39 | Size of the vocabulary. Decode produces ints [1, vocab_size). 40 | 41 | 42 | 43 | ## Methods 44 | 45 |

decode

46 | 47 | ``` python 48 | decode(ids) 49 | ``` 50 | 51 | Decodes a list of integers into text. 52 | 53 |

encode

54 | 55 | ``` python 56 | encode(s) 57 | ``` 58 | 59 | Encodes text into a list of integers. 60 | 61 |

load_from_file

62 | 63 | ``` python 64 | @classmethod 65 | load_from_file( 66 | cls, 67 | filename_prefix 68 | ) 69 | ``` 70 | 71 | Load from file. Inverse of save_to_file. 72 | 73 |

save_to_file

74 | 75 | ``` python 76 | save_to_file(filename_prefix) 77 | ``` 78 | 79 | Store to file. Inverse of load_from_file. 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /tensorflow_datasets/image/open_images_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for open_images dataset module.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.image import open_images 23 | from tensorflow_datasets.testing import dataset_builder_testing 24 | 25 | 26 | class OpenImagesV42012Test(dataset_builder_testing.TestCase): 27 | DATASET_CLASS = open_images.OpenImagesV4 28 | SPLITS = { # Expected number of examples on each split. 29 | 'train': 512, 30 | 'test': 36, 31 | 'validation': 12, 32 | } 33 | DL_EXTRACT_RESULT = { 34 | 'train_images': ['s3-tar_train_sha1_%s.tar' % i 35 | for i in '0123456789abcdef'], 36 | 'test_images': 's3-tar_test_sha2.tar', 37 | 'validation_images': 's3-tar_validation_sha3.tar', 38 | 'train_human_labels': 'train-human-labels.csv', 39 | 'train_machine_labels': 'train-machine-labels.csv', 40 | 'test_human_labels': 'test-human-labels.csv', 41 | 'test_machine_labels': 'test-machine-labels.csv', 42 | 'validation_human_labels': 'validation-human-labels.csv', 43 | 'validation_machine_labels': 'validation-machine-labels.csv', 44 | 'train-annotations-bbox': 'train-annotations-bbox.csv', 45 | 'test-annotations-bbox': 'test-annotations-bbox.csv', 46 | 'validation-annotations-bbox': 'validation-annotations-bbox.csv', 47 | 'class_descriptions': 'class_descriptions.csv', 48 | } 49 | 50 | 51 | if __name__ == '__main__': 52 | dataset_builder_testing.main() 53 | -------------------------------------------------------------------------------- /tensorflow_datasets/testing/bair_robot_pushing.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tool for preparing test example of BAIR dataset. 17 | 18 | mkdir test/ 19 | mkdir train/ 20 | 21 | ./bair_robot_pushing --output_file=train/traj_1792_to_2047.tfrecords 22 | ./bair_robot_pushing --output_file=test/traj_0_to_255.tfrecords 23 | """ 24 | 25 | from __future__ import absolute_import 26 | from __future__ import division 27 | from __future__ import print_function 28 | 29 | from absl import app 30 | from absl import flags 31 | import numpy as np 32 | import tensorflow as tf 33 | 34 | FLAGS = flags.FLAGS 35 | 36 | flags.DEFINE_string("output_file", None, "Path to the output file.") 37 | 38 | 39 | def main(argv): 40 | if len(argv) > 1: 41 | raise tf.app.UsageError("Too many command-line arguments.") 42 | 43 | writer = tf.io.TFRecordWriter(FLAGS.output_file) 44 | 45 | feature = {} 46 | 47 | for frame in range(30): 48 | feature["%d/action" % frame] = tf.train.Feature( 49 | float_list=tf.train.FloatList(value=np.random.uniform(size=(4)))) 50 | feature["%d/endeffector_pos" % frame] = tf.train.Feature( 51 | float_list=tf.train.FloatList(value=np.random.uniform(size=(3)))) 52 | feature["%d/image_aux1/encoded" % frame] = tf.train.Feature( 53 | bytes_list=tf.train.BytesList(value=["\x00\xff\x00" * 64 * 64])) 54 | feature["%d/image_main/encoded" % frame] = tf.train.Feature( 55 | bytes_list=tf.train.BytesList(value=["\x00\x00\xff" * 64 * 64])) 56 | example = tf.train.Example(features=tf.train.Features(feature=feature)) 57 | writer.write(example.SerializeToString()) 58 | writer.close() 59 | 60 | 61 | if __name__ == "__main__": 62 | app.run(main) 63 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/utils/version_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensorflow_datasets.core.utils.version.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow as tf 23 | from tensorflow_datasets.core.utils import version 24 | 25 | 26 | class VersionTest(tf.test.TestCase): 27 | 28 | def test_version(self): 29 | """Test the zip nested function.""" 30 | 31 | self.assertEqual(version.Version(), version.Version(0, 0, 0)) 32 | self.assertEqual(version.Version('1.3.534'), version.Version(1, 3, 534)) 33 | self.assertEqual( 34 | version.Version(major=1, minor=3, patch=5), version.Version(1, 3, 5)) 35 | 36 | self.assertEqual(version.Version('latest'), version.Version.LATEST) 37 | self.assertEqual( 38 | version.Version(version.Version('1.3.5')), version.Version(1, 3, 5)) 39 | 40 | self.assertEqual(str(version.Version(10, 2, 3)), '10.2.3') 41 | self.assertEqual(str(version.Version()), '0.0.0') 42 | 43 | with self.assertRaisesWithPredicateMatch(ValueError, 'Format should be '): 44 | version.Version('1.3.-534') 45 | with self.assertRaisesWithPredicateMatch(ValueError, 'Format should be '): 46 | version.Version('1.3') 47 | with self.assertRaisesWithPredicateMatch(ValueError, 'Format should be '): 48 | version.Version('1.3.') 49 | with self.assertRaisesWithPredicateMatch(ValueError, 'Format should be '): 50 | version.Version('1..5') 51 | with self.assertRaisesWithPredicateMatch(ValueError, 'Format should be '): 52 | version.Version('a.b.c') 53 | 54 | 55 | if __name__ == '__main__': 56 | tf.test.main() 57 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/features/video_feature.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Video Feature.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | from tensorflow_datasets.core.features import image_feature 23 | from tensorflow_datasets.core.features import sequence_feature 24 | 25 | # TODO(tfds): Support more formats (gifs, mp4,...) 26 | 27 | 28 | class Video(sequence_feature.Sequence): 29 | """`FeatureConnector` for videos, png-encoding frames on disk. 30 | 31 | Video: The image connector accepts as input: 32 | * uint8 array representing an video. 33 | 34 | Output: 35 | video: tf.Tensor of type tf.uint8 and shape [num_frames, height, width, 3] 36 | 37 | Example: 38 | * In the DatasetInfo object: 39 | features=features.FeatureDict({ 40 | 'video': features.Video(shape=(None, 64, 64, 3)), 41 | }) 42 | 43 | * During generation: 44 | yield { 45 | 'input': np.ones(shape=(128, 64, 64, 3), dtype=np.uint8), 46 | } 47 | """ 48 | 49 | def __init__(self, shape): 50 | """Construct the connector. 51 | 52 | Args: 53 | shape: tuple of ints, the shape of the video (num_frames, height, width, 54 | channels=3). 55 | 56 | Raises: 57 | ValueError: If the shape is invalid 58 | """ 59 | shape = tuple(shape) 60 | if len(shape) != 4: 61 | raise ValueError('Video shape should be of rank 4') 62 | if shape.count(None) > 1: 63 | raise ValueError('Video shape cannot have more than 1 unknown dim') 64 | 65 | super(Video, self).__init__( 66 | image_feature.Image(shape=shape[1:], encoding_format='png'), 67 | length=shape[0], 68 | ) 69 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/core/SplitDict.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 15 | # tfds.core.SplitDict 16 | 17 | ## Class `SplitDict` 18 | 19 | 20 | 21 | 22 | 23 | Defined in [`core/splits.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/splits.py). 24 | 25 | Split info object. 26 | 27 |

__init__

28 | 29 | ``` python 30 | __init__() 31 | ``` 32 | 33 | 34 | 35 | 36 | 37 | ## Properties 38 | 39 |

total_num_examples

40 | 41 | Return the total number of examples. 42 | 43 | 44 | 45 | ## Methods 46 | 47 |

__getitem__

48 | 49 | ``` python 50 | __getitem__(key) 51 | ``` 52 | 53 | 54 | 55 |

__setitem__

56 | 57 | ``` python 58 | __setitem__( 59 | key, 60 | value 61 | ) 62 | ``` 63 | 64 | 65 | 66 |

add

67 | 68 | ``` python 69 | add(split_info) 70 | ``` 71 | 72 | Add the split info. 73 | 74 |

copy

75 | 76 | ``` python 77 | copy() 78 | ``` 79 | 80 | 81 | 82 |

from_proto

83 | 84 | ``` python 85 | @classmethod 86 | from_proto( 87 | cls, 88 | repeated_split_infos 89 | ) 90 | ``` 91 | 92 | Returns a new SplitDict initialized from the `repeated_split_infos`. 93 | 94 |

to_proto

95 | 96 | ``` python 97 | to_proto() 98 | ``` 99 | 100 | Returns a list of SplitInfo protos that we have. 101 | 102 |

update

103 | 104 | ``` python 105 | update(other) 106 | ``` 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/utils/tf_utils_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensorflow_datasets.core.utils.tf_utils.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow as tf 23 | 24 | from tensorflow_datasets.core import test_utils 25 | from tensorflow_datasets.core.utils import tf_utils 26 | 27 | tf.compat.v1.enable_eager_execution() 28 | 29 | 30 | class TfUtilsTest(tf.test.TestCase): 31 | 32 | @test_utils.run_in_graph_and_eager_modes() 33 | def test_graph_runner(self): 34 | graph_runner = tf_utils.TFGraphRunner() 35 | 36 | output = graph_runner.run(tf.nn.relu, [1, 1, -1, -1, 1]) 37 | self.assertAllEqual(output, [1, 1, 0, 0, 1]) 38 | 39 | output = graph_runner.run(tf.nn.relu, [-1, -1, -1, 1, 1]) 40 | self.assertAllEqual(output, [0, 0, 0, 1, 1]) 41 | 42 | # Cache should have been re-used, so should only contains one GraphRun 43 | # Ideally there should be two separate @tf.eager.run_test_in_graph() and 44 | # @tf.eager.run_test_in_eager() to avoid logic on the test. But haven't 45 | # found it. 46 | if not tf.executing_eagerly(): 47 | self.assertEqual(len(graph_runner._graph_run_cache), 1) 48 | else: 49 | self.assertEqual(len(graph_runner._graph_run_cache), 0) 50 | 51 | # Different signature (different shape), so new GraphRun created 52 | output = graph_runner.run(tf.nn.relu, [-1, 1, 1]) 53 | self.assertAllEqual(output, [0, 1, 1]) 54 | if not tf.executing_eagerly(): 55 | self.assertEqual(len(graph_runner._graph_run_cache), 2) 56 | else: 57 | self.assertEqual(len(graph_runner._graph_run_cache), 0) 58 | 59 | 60 | if __name__ == '__main__': 61 | tf.test.main() 62 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/features/audio_feature.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Audio feature.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import numpy as np 23 | import tensorflow as tf 24 | 25 | from tensorflow_datasets.core import api_utils 26 | from tensorflow_datasets.core.features import feature 27 | from tensorflow_datasets.core.lazy_imports import lazy_imports 28 | 29 | 30 | class Audio(feature.Tensor): 31 | """`FeatureConnector` for audio, encoded as raw integer wave form.""" 32 | 33 | @api_utils.disallow_positional_args 34 | def __init__(self, file_format=None, shape=(None,)): 35 | """Constructs the connector. 36 | 37 | Args: 38 | file_format: `str`, the audio file format. Can be any format ffmpeg 39 | understands. If `None`, will attempt to infer from the file extension. 40 | shape: `tuple`, shape of the data. 41 | """ 42 | self._file_format = file_format 43 | if len(shape) != 1: 44 | raise TypeError( 45 | "Audio feature currently only supports 1-D values, got %s." % shape) 46 | self._shape = shape 47 | super(Audio, self).__init__(shape=shape, dtype=tf.int64) 48 | 49 | def encode_example(self, audio_or_path_or_fobj): 50 | audio = audio_or_path_or_fobj 51 | if isinstance(audio, (np.ndarray, list)): 52 | return audio 53 | 54 | with tf.io.gfile.GFile(audio, "rb") as audio_f: 55 | file_format = self._file_format or audio.split(".")[-1] 56 | audio_segment = lazy_imports.pydub.AudioSegment.from_file( 57 | audio_f, format=file_format) 58 | return super(Audio, self).encode_example( 59 | np.array(audio_segment.get_array_of_samples()).astype(np.int64)) 60 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/utils/version.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Version utils. 17 | """ 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | 23 | import collections 24 | 25 | 26 | class Version(collections.namedtuple("Version", ["major", "minor", "patch"])): 27 | """Dataset version MAJOR.MINOR.PATCH.""" 28 | 29 | LATEST = "latest" 30 | 31 | def __new__(cls, *args, **kwargs): 32 | if len(args) == 1: 33 | if kwargs: 34 | raise ValueError( 35 | "Only one of version str or major/minor/patch can be set") 36 | version_str = args[0] 37 | if isinstance(version_str, cls): 38 | return version_str 39 | elif version_str == cls.LATEST: 40 | return version_str 41 | return super(Version, cls).__new__(cls, *_str_to_version(version_str)) 42 | elif not args and not kwargs: 43 | return super(Version, cls).__new__(cls, 0, 0, 0) 44 | else: 45 | return super(Version, cls).__new__(cls, *args, **kwargs) 46 | 47 | def __str__(self): 48 | return "{}.{}.{}".format(self.major, self.minor, self.patch) 49 | 50 | 51 | def _str_to_version(version_str): 52 | """Return the tuple (major, minor, patch) version extracted from the str.""" 53 | version_ids = version_str.split(".") 54 | if len(version_ids) != 3 or "-" in version_str: 55 | raise ValueError( 56 | "Could not convert the {} to version. Format should be x.y.z".format( 57 | version_str)) 58 | try: 59 | version_ids = tuple(int(v) for v in version_ids) 60 | except ValueError: 61 | raise ValueError( 62 | "Could not convert the {} to version. Format should be x.y.z".format( 63 | version_str)) 64 | return version_ids 65 | -------------------------------------------------------------------------------- /docs/_index.yaml: -------------------------------------------------------------------------------- 1 | book_path: /datasets/_book.yaml 2 | project_path: /datasets/_project.yaml 3 | description: 4 | landing_page: 5 | custom_css_path: /site-assets/css/style.css 6 | rows: 7 | - heading: A collection of datasets ready to use with TensorFlow. 8 | items: 9 | - description: > 10 | TensorFlow Datasets is a collection of datasets ready to use with 11 | TensorFlow. 12 | 13 | All datasets are exposed as 14 | 15 | tf.data.Datasets 16 | , 17 | enabling easy-to-use and high-performance 18 | input pipelines. 19 | 20 | To get started see the 21 | guide 22 | and our 23 | list of datasets. 24 | - code_block: | 25 |
26 |         import tensorflow as tf
27 |         import tensorflow_datasets as tfds
28 | 
29 |         # tfds works in both Eager and Graph modes
30 |         tf.enable_eager_execution()
31 | 
32 |         # See available datasets
33 |         print(tfds.list_builders())
34 | 
35 |         # Construct a tf.data.Dataset
36 |         dataset = tfds.load(name="mnist", split=tfds.Split.TRAIN)
37 | 
38 |         # Build your input pipeline
39 |         dataset = dataset.shuffle(1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
40 |         for features in dataset.take(1):
41 |           image, label = features["image"], features["label"]
42 |         
43 | 44 | {% dynamic if request.tld != 'cn' %} 45 | Run in a Notebook 46 | {% dynamic endif %} 47 | - classname: devsite-landing-row-cards 48 | items: 49 | - heading: Introducing TensorFlow Datasets 50 | image_path: /resources/images/tf-logo-card-16x9.png 51 | path: https://www.tensorflow.org/datasets/ 52 | buttons: 53 | - label: Coming soon... 54 | path: https://www.tensorflow.org/datasets/ 55 | - heading: TensorFlow Datasets on GitHub 56 | image_path: /resources/images/github-card-16x9.png 57 | path: https://github.com/tensorflow/datasets 58 | buttons: 59 | - label: View on GitHub 60 | path: https://github.com/tensorflow/datasets 61 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/file_adapter/CSVAdapter.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | 10 | # tfds.file_adapter.CSVAdapter 11 | 12 | ## Class `CSVAdapter` 13 | 14 | Inherits From: [`FileFormatAdapter`](../../tfds/file_adapter/FileFormatAdapter.md) 15 | 16 | 17 | 18 | Defined in [`core/file_format_adapter.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/file_format_adapter.py). 19 | 20 | Writes/reads features to/from CSV files. 21 | 22 | Constraints on generators: 23 | 24 | * The generator must yield feature dictionaries (`dict`). 26 | * The allowed feature types are `int`, `float`, and `str`. By default, only 27 | scalar features are supported (that is, not lists). 28 | 29 | You can modify how records are written by passing `csv_writer_ctor`. 30 | 31 | You can modify how records are read by passing `csv_dataset_kwargs`. 32 | 33 | Note that all CSV files produced will have a header row. 34 | 35 |

__init__

36 | 37 | ``` python 38 | __init__( 39 | feature_types, 40 | csv_dataset_kwargs=None, 41 | csv_writer_ctor=csv.writer 42 | ) 43 | ``` 44 | 45 | Constructs CSVAdapter. 46 | 47 | #### Args: 48 | 49 | feature_types (dict): specifies the dtypes of each of the 50 | features (columns in the CSV file). 51 | csv_dataset_kwargs (dict): forwarded to `tf.data.experimental.CsvDataset`. 52 | csv_writer_ctor (function): takes file handle and returns writer. 53 | 54 | 55 | #### Raises: 56 | 57 | * `ValueError`: if csv_dataset_kwargs["header"] is present. 58 | 59 | 60 | 61 | ## Properties 62 | 63 |

filetype_suffix

64 | 65 | 66 | 67 | 68 | 69 | ## Methods 70 | 71 |

dataset_from_filename

72 | 73 | ``` python 74 | dataset_from_filename(filename) 75 | ``` 76 | 77 | 78 | 79 |

write_from_generator

80 | 81 | ``` python 82 | write_from_generator( 83 | generator_fn, 84 | output_files 85 | ) 86 | ``` 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/proto/dataset_info.proto: -------------------------------------------------------------------------------- 1 | // Definitions for metadata related to whole datasets and its instances and 2 | // splits. 3 | 4 | syntax = "proto3"; 5 | option cc_enable_arenas = true; 6 | 7 | package tensorflow_datasets; 8 | 9 | import "tensorflow_metadata/proto/v0/statistics.proto"; 10 | import "tensorflow_metadata/proto/v0/schema.proto"; 11 | 12 | // Message to represent location of a dataset, for now just has a url field, but 13 | // can internally have folders etc. 14 | message DatasetLocation { 15 | repeated string urls = 1; 16 | } 17 | 18 | // This is a serialization of tensorflow_datasets.core.SplitInfo -- this is 19 | // supposed to encapsulate the information specific to a particular instance 20 | // of this dataset, so attributes that are common to this dataset go directly 21 | // in DatasetInfo (name, location, schema), but attributes specific to an 22 | // instance go here. 23 | message SplitInfo { 24 | // A string identifying this SplitInfo, i.e. "TRAIN", "TEST", "v18" etc. 25 | string name = 1; 26 | 27 | // The number of shards in this splits on-disk representation. 28 | int64 num_shards = 2; 29 | 30 | // The concrete statistics about this split. 31 | tensorflow.metadata.v0.DatasetFeatureStatistics statistics = 3; 32 | } 33 | 34 | // This message indicates which feature in the dataset schema is the input and 35 | // which one is the output. 36 | message SupervisedKeys { 37 | string input = 1; 38 | string output = 2; 39 | } 40 | 41 | // This is a serialization of tensorflow_datasets.core.DatasetInfo. 42 | message DatasetInfo { 43 | string name = 1; 44 | string description = 2; 45 | 46 | // Version string of the dataset (ex: '1.0.5') 47 | string version = 9; 48 | 49 | // A citation string if one exists for this dataset. 50 | string citation = 3; 51 | 52 | // *Approximate* size in bytes of this dataset on disk. 53 | int64 size_in_bytes = 4; 54 | 55 | // Canonical location of the dataset. 56 | DatasetLocation location = 5; 57 | 58 | // Checksums of resources: URL to checksum of resource at URL. 59 | map download_checksums = 10; 60 | 61 | // The schema of the dataset. 62 | tensorflow.metadata.v0.Schema schema = 6; 63 | 64 | // The information about the specific splits. 65 | repeated SplitInfo splits = 7; 66 | 67 | // If this is a supervised learning problem, then the input and output feature 68 | // can be specified using this. 69 | SupervisedKeys supervised_keys = 8; 70 | 71 | // Next available: 11 72 | } 73 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/proto/generate_py_proto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script use the protoc compiler to generate the python code of the 4 | # all of our proto files. 5 | 6 | 7 | # Ensure we have the desired protoc version. 8 | if [[ $(protoc --version) != 'libprotoc 3.6.1' ]]; then 9 | echo 'Please use version 3.6.1 of protoc for compatibility with Python 2 and 3.' 10 | echo 'Please run install_protoc.sh to install it.' 11 | exit 12 | fi 13 | 14 | # Function to prepend a pylint directive to skip the generated python file. 15 | function pylint_skip_file() { 16 | local file_name=$1 17 | printf "%s\n%s" "# pylint: skip-file" "$(cat ${file_name})" > ${file_name} 18 | } 19 | 20 | 21 | # Setup tmp directories 22 | TMP_DIR=$(mktemp -d) 23 | TMP_TFDS_DIR="$PWD" 24 | TMP_METADATA_DIR=${TMP_DIR}/metadata 25 | 26 | echo "Temporary directory created: " 27 | echo ${TMP_DIR} 28 | 29 | 30 | TMP_TFDS_PROTO_DIR="${TMP_TFDS_DIR}/tensorflow_datasets/core/proto" 31 | DATASET_INFO_PROTO="${TMP_TFDS_PROTO_DIR}/dataset_info.proto" 32 | if [ ! -f ${DATASET_INFO_PROTO} ]; then 33 | echo "${DATASET_INFO_PROTO} not found." 34 | echo "Please run this script from the appropriate root directory." 35 | fi 36 | 37 | # Clone tf.metadata 38 | git clone https://github.com/tensorflow/metadata.git ${TMP_METADATA_DIR} 39 | 40 | # Invoke protoc compiler on dataset_info.proto 41 | protoc ${DATASET_INFO_PROTO} \ 42 | --python_out=${TMP_TFDS_PROTO_DIR} \ 43 | --proto_path=${TMP_METADATA_DIR} \ 44 | --proto_path=${TMP_TFDS_PROTO_DIR} 45 | 46 | # Add pylint ignore and name the file as generated. 47 | GENERATED_DATASET_INFO_PY="${TMP_TFDS_PROTO_DIR}/dataset_info_generated_pb2.py" 48 | mv ${TMP_TFDS_PROTO_DIR}/dataset_info_pb2.py \ 49 | ${GENERATED_DATASET_INFO_PY} 50 | pylint_skip_file "${GENERATED_DATASET_INFO_PY}" 51 | 52 | 53 | LICENSING_TEXT=$(cat <<-END 54 | # coding=utf-8 55 | # Copyright 2018 The TensorFlow Datasets Authors. 56 | # 57 | # Licensed under the Apache License, Version 2.0 (the "License"); 58 | # you may not use this file except in compliance with the License. 59 | # You may obtain a copy of the License at 60 | # 61 | # http://www.apache.org/licenses/LICENSE-2.0 62 | # 63 | # Unless required by applicable law or agreed to in writing, software 64 | # distributed under the License is distributed on an "AS IS" BASIS, 65 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 66 | # See the License for the specific language governing permissions and 67 | # limitations under the License. 68 | END 69 | ) 70 | 71 | printf "%s\n%s" "${LICENSING_TEXT}" "$(cat ${GENERATED_DATASET_INFO_PY})" > \ 72 | ${GENERATED_DATASET_INFO_PY} 73 | 74 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/test_utils_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensorflow_datasets.core.test_utils.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import tensorflow as tf 23 | 24 | from tensorflow_datasets.core import test_utils 25 | 26 | tf.compat.v1.enable_eager_execution() 27 | 28 | 29 | class RunInGraphAndEagerTest(tf.test.TestCase): 30 | 31 | def test_run_in_graph_and_eager_modes(self): 32 | l = [] 33 | def inc(self, with_brackets): 34 | del self # self argument is required by run_in_graph_and_eager_modes. 35 | mode = "eager" if tf.executing_eagerly() else "graph" 36 | with_brackets = "with_brackets" if with_brackets else "without_brackets" 37 | l.append((with_brackets, mode)) 38 | 39 | f = test_utils.run_in_graph_and_eager_modes(inc) 40 | f(self, with_brackets=False) 41 | f = test_utils.run_in_graph_and_eager_modes()(inc) 42 | f(self, with_brackets=True) 43 | 44 | self.assertEqual(len(l), 4) 45 | self.assertEqual(set(l), { 46 | ("with_brackets", "graph"), 47 | ("with_brackets", "eager"), 48 | ("without_brackets", "graph"), 49 | ("without_brackets", "eager"), 50 | }) 51 | 52 | def test_run_in_graph_and_eager_modes_setup_in_same_mode(self): 53 | modes = [] 54 | mode_name = lambda: "eager" if tf.executing_eagerly() else "graph" 55 | 56 | class ExampleTest(tf.test.TestCase): 57 | 58 | def runTest(self): 59 | pass 60 | 61 | def setUp(self): 62 | modes.append("setup_" + mode_name()) 63 | 64 | @test_utils.run_in_graph_and_eager_modes 65 | def testBody(self): 66 | modes.append("run_" + mode_name()) 67 | 68 | e = ExampleTest() 69 | e.setUp() 70 | e.testBody() 71 | 72 | self.assertEqual(modes[0:2], ["setup_eager", "run_eager"]) 73 | self.assertEqual(modes[2:], ["setup_graph", "run_graph"]) 74 | 75 | if __name__ == "__main__": 76 | tf.test.main() 77 | -------------------------------------------------------------------------------- /docs/api_docs/python/tfds/features/text/Tokenizer.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | # tfds.features.text.Tokenizer 14 | 15 | ## Class `Tokenizer` 16 | 17 | 18 | 19 | 20 | 21 | Defined in [`core/features/text/text_encoder.py`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/core/features/text/text_encoder.py). 22 | 23 | Splits a string into tokens, and joins them back. 24 | 25 |

__init__

26 | 27 | ``` python 28 | __init__( 29 | alphanum_only=True, 30 | reserved_tokens=None 31 | ) 32 | ``` 33 | 34 | Constructs a Tokenizer. 35 | 36 | Note that the Tokenizer is invertible if `alphanum_only=False`. 37 | i.e. `s == t.join(t.tokenize(s))`. 38 | 39 | #### Args: 40 | 41 | * `alphanum_only`: `bool`, if `True`, only parse out alphanumeric tokens 42 | (non-alphanumeric characters are dropped); 43 | otherwise, keep all characters (individual tokens will still be either 44 | all alphanumeric or all non-alphanumeric). 45 | * `reserved_tokens`: `list`, a list of strings that, if any are in `s`, 46 | will be preserved as whole tokens, even if they contain mixed 47 | alphnumeric/non-alphanumeric characters. 48 | 49 | 50 | 51 | ## Properties 52 | 53 |

alphanum_only

54 | 55 | 56 | 57 |

reserved_tokens

58 | 59 | 60 | 61 | 62 | 63 | ## Methods 64 | 65 |

join

66 | 67 | ``` python 68 | join(tokens) 69 | ``` 70 | 71 | Joins tokens into a string. 72 | 73 |

load_from_file

74 | 75 | ``` python 76 | @classmethod 77 | load_from_file( 78 | cls, 79 | filename_prefix 80 | ) 81 | ``` 82 | 83 | 84 | 85 |

save_to_file

86 | 87 | ``` python 88 | save_to_file(filename_prefix) 89 | ``` 90 | 91 | 92 | 93 |

tokenize

94 | 95 | ``` python 96 | tokenize(s) 97 | ``` 98 | 99 | Splits a string into tokens. 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/lazy_imports.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Lazy imports for heavy dependencies.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import importlib 23 | 24 | from tensorflow_datasets.core.utils import py_utils as utils 25 | 26 | 27 | def _try_import(module_name): 28 | """Try importing a module, with an informative error message on failure.""" 29 | try: 30 | mod = importlib.import_module(module_name) 31 | return mod 32 | except ImportError: 33 | err_msg = ("Tried importing %s but failed. See setup.py extras_require. " 34 | "The dataset you are trying to use may have additional " 35 | "dependencies.") 36 | utils.reraise(err_msg) 37 | 38 | 39 | class LazyImporter(object): 40 | """Lazy importer for heavy dependencies. 41 | 42 | Some datasets require heavy dependencies for data generation. To allow for 43 | the default installation to remain lean, those heavy depdencies are 44 | lazily imported here. 45 | """ 46 | 47 | @utils.classproperty 48 | @classmethod 49 | def pydub(cls): 50 | return _try_import("pydub") 51 | 52 | @utils.classproperty 53 | @classmethod 54 | def matplotlib(cls): 55 | return _try_import("matplotlib") 56 | 57 | @utils.classproperty 58 | @classmethod 59 | def pyplot(cls): 60 | return _try_import("matplotlib.pyplot") 61 | 62 | @utils.classproperty 63 | @classmethod 64 | def scipy(cls): 65 | return _try_import("scipy") 66 | 67 | @utils.classproperty 68 | @classmethod 69 | def scipy_io(cls): 70 | return _try_import("scipy.io") 71 | 72 | @utils.classproperty 73 | @classmethod 74 | def os(cls): 75 | """For testing purposes only.""" 76 | return _try_import("os") 77 | 78 | @utils.classproperty 79 | @classmethod 80 | def test_foo(cls): 81 | """For testing purposes only.""" 82 | return _try_import("test_foo") 83 | 84 | 85 | lazy_imports = LazyImporter # pylint: disable=invalid-name 86 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/naming.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Utilities for file names.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | import re 24 | 25 | _first_cap_re = re.compile("(.)([A-Z][a-z0-9]+)") 26 | _all_cap_re = re.compile("([a-z0-9])([A-Z])") 27 | 28 | 29 | def camelcase_to_snakecase(name): 30 | """Convert camel-case string to snake-case.""" 31 | s1 = _first_cap_re.sub(r"\1_\2", name) 32 | return _all_cap_re.sub(r"\1_\2", s1).lower() 33 | 34 | 35 | def filename_prefix_for_name(name): 36 | if os.path.basename(name) != name: 37 | raise ValueError("Should be a dataset name, not a path: %s" % name) 38 | return camelcase_to_snakecase(name) 39 | 40 | 41 | def filename_prefix_for_split(name, split): 42 | if os.path.basename(name) != name: 43 | raise ValueError("Should be a dataset name, not a path: %s" % name) 44 | return "%s-%s" % (filename_prefix_for_name(name), split) 45 | 46 | 47 | def sharded_filenames(filename_prefix, num_shards): 48 | """Sharded filenames given prefix and number of shards.""" 49 | shard_suffix = "%05d-of-%05d" 50 | return [ 51 | "%s-%s" % (filename_prefix, shard_suffix % (i, num_shards)) 52 | for i in range(num_shards) 53 | ] 54 | 55 | 56 | def filepattern_for_dataset_split(dataset_name, split, data_dir, 57 | filetype_suffix=None): 58 | prefix = filename_prefix_for_split(dataset_name, split) 59 | if filetype_suffix: 60 | prefix += ".%s" % filetype_suffix 61 | filepath = os.path.join(data_dir, prefix) 62 | return "%s*" % filepath 63 | 64 | 65 | def filepaths_for_dataset_split(dataset_name, split, num_shards, data_dir, 66 | filetype_suffix=None): 67 | prefix = filename_prefix_for_split(dataset_name, split) 68 | if filetype_suffix: 69 | prefix += ".%s" % filetype_suffix 70 | filenames = sharded_filenames(prefix, num_shards) 71 | filepaths = [os.path.join(data_dir, fname) for fname in filenames] 72 | return filepaths 73 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/features/audio_feature_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Tests for tensorflow_datasets.core.features.audio_feature.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import array 23 | import tempfile 24 | 25 | import numpy as np 26 | import pydub 27 | import tensorflow as tf 28 | 29 | from tensorflow_datasets.core import features 30 | from tensorflow_datasets.core import test_utils 31 | 32 | tf.compat.v1.enable_eager_execution() 33 | 34 | 35 | class AudioFeatureTest(test_utils.FeatureExpectationsTestCase): 36 | 37 | @property 38 | def expectations(self): 39 | 40 | np_audio = np.random.randint(-2**10, 2**10, size=(10,), dtype=np.int64) 41 | audio = pydub.AudioSegment.empty().set_sample_width(2) 42 | # See documentation for _spawn usage: 43 | # https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples 44 | audio = audio._spawn(array.array(audio.array_type, np_audio)) 45 | _, tmp_file = tempfile.mkstemp() 46 | audio.export(tmp_file, format="wav") 47 | 48 | return [ 49 | # Numpy array 50 | test_utils.FeatureExpectation( 51 | name="audio_np", 52 | feature=features.Audio(), 53 | shape=(None,), 54 | dtype=tf.int64, 55 | tests=[ 56 | test_utils.FeatureExpectationItem( 57 | value=np_audio, 58 | expected=np_audio, 59 | ), 60 | ], 61 | ), 62 | # WAV file 63 | test_utils.FeatureExpectation( 64 | name="audio_np", 65 | feature=features.Audio(file_format="wav"), 66 | shape=(None,), 67 | dtype=tf.int64, 68 | tests=[ 69 | test_utils.FeatureExpectationItem( 70 | value=tmp_file, 71 | expected=np_audio, 72 | ), 73 | ], 74 | ), 75 | ] 76 | 77 | 78 | if __name__ == "__main__": 79 | tf.test.main() 80 | -------------------------------------------------------------------------------- /tensorflow_datasets/core/features/bounding_boxes.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2018 The TensorFlow Datasets Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Bounding boxes feature.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import collections 23 | 24 | import tensorflow as tf 25 | 26 | from tensorflow_datasets.core.features import feature 27 | 28 | 29 | BBox = collections.namedtuple('BBox', 'ymin, xmin, ymax, xmax') 30 | 31 | 32 | class BBoxFeature(feature.Tensor): 33 | """`FeatureConnector` for a normalized bounding box. 34 | 35 | Note: If you have multiple bounding boxes, you may want to wrap the feature 36 | inside a `tfds.feature.SequenceDict`. 37 | 38 | Input: 39 | * `tfds.features.BBox` tuple. 40 | 41 | Output: 42 | bbox: tf.Tensor of type tf.float32 and shape [4,] which contains the 43 | normalized coordinates of the bounding box [ymin, xmin, ymax, xmax] 44 | 45 | Example: 46 | * In the DatasetInfo object: 47 | features=features.FeatureDict({ 48 | 'bbox': features.BBox(shape=(None, 64, 64, 3)), 49 | }) 50 | 51 | * During generation: 52 | yield { 53 | 'input': tfds.feature.BBox(ymin=0.3, xmin=0.8, ymax=0.5, xmax=1.0), 54 | } 55 | """ 56 | 57 | def __init__(self): 58 | super(BBoxFeature, self).__init__(shape=(4,), dtype=tf.float32) 59 | 60 | def encode_example(self, bbox): 61 | """See base class for details.""" 62 | # Validate the coordinates 63 | for coordinate in bbox: 64 | if not isinstance(coordinate, float): 65 | raise ValueError( 66 | 'BBox coordinates should be float. Got {}.'.format(bbox)) 67 | if not 0.0 <= coordinate <= 1.0: 68 | raise ValueError( 69 | 'BBox coordinates should be between 0 and 1. Got {}.'.format(bbox)) 70 | if bbox.xmax < bbox.xmin or bbox.ymax < bbox.ymin: 71 | raise ValueError( 72 | 'BBox coordinates should have min <= max. Got {}.'.format(bbox)) 73 | 74 | return super(BBoxFeature, self).encode_example( 75 | [bbox.ymin, bbox.xmin, bbox.ymax, bbox.xmax] 76 | ) 77 | --------------------------------------------------------------------------------