├── .bazelrc ├── .bazelversion ├── .devcontainer ├── Dockerfile └── devcontainer.json ├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug-performance-report.md │ └── feature-request.md ├── boring-cyborg.yml ├── pull_request_template.md ├── release-template.yml └── workflows │ ├── backport.yml │ ├── ci_test.yml │ ├── make_wheel_Linux_x86.sh │ ├── release-drafter.yml │ └── release.yml ├── .gitignore ├── .vscode └── extensions.json ├── .yapf ├── BUILD ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── STYLE_GUIDE.md ├── VERSION.txt ├── WORKSPACE ├── build.sh ├── build_deps ├── build_pip_pkg.sh ├── tf_dependency │ ├── BUILD │ ├── BUILD.tpl │ ├── build_defs.bzl.tpl │ └── tf_configure.bzl └── toolchains │ └── gpu │ ├── BUILD │ ├── crosstool │ ├── BUILD │ ├── BUILD.tpl │ ├── CROSSTOOL.tpl │ ├── cc_toolchain_config.bzl.tpl │ ├── clang │ │ └── bin │ │ │ └── crosstool_wrapper_driver_is_not_gcc.tpl │ └── windows │ │ └── msvc_wrapper_for_nvcc.py.tpl │ ├── cub.BUILD │ ├── cuda │ ├── BUILD │ ├── BUILD.tpl │ ├── BUILD.windows.tpl │ ├── build_defs.bzl.tpl │ └── cuda_config.h.tpl │ ├── cuda_configure.bzl │ └── find_cuda_config.py ├── configure ├── configure.py ├── deepray ├── BUILD ├── __init__.py ├── activations │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── hardshrink.py │ ├── lisht.py │ ├── mish.py │ ├── rrelu.py │ ├── snake.py │ ├── softshrink.py │ ├── sparsemax.py │ ├── swish.py │ ├── tanhshrink.py │ └── tests │ │ ├── __init__.py │ │ ├── activations_test.py │ │ ├── hardshrink_test.py │ │ ├── lisht_test.py │ │ ├── mish_test.py │ │ ├── rrelu_test.py │ │ ├── run_all_test.py │ │ ├── snake_test.py │ │ ├── softshrink_test.py │ │ ├── sparsemax_test.py │ │ ├── swish_test.py │ │ └── tanhshrink_test.py ├── callbacks │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── average_model_checkpoint.py │ ├── callbacks.py │ ├── tests │ │ ├── __init__.py │ │ ├── avg_model_checkpoint_test.py │ │ ├── run_all_test.py │ │ ├── time_stopping_test.py │ │ └── tqdm_progress_bar_test.py │ ├── time_stopping.py │ └── tqdm_progress_bar.py ├── conftest.py ├── copts.bzl ├── core │ ├── BUILD │ ├── __init__.py │ ├── base_trainer.py │ ├── base_trainer_test.py │ ├── common │ │ ├── __init__.py │ │ ├── distribution_utils.py │ │ ├── distribution_utils_test.py │ │ ├── flags.py │ │ ├── registry_imports.py │ │ └── streamz_counters.py │ ├── compile_utils.py │ ├── dllogger_class.py │ ├── export_saved_model.py │ ├── export_saved_model_test.py │ ├── modeling_tf_utils.py │ ├── module.py │ ├── platform │ │ ├── BUILD │ │ ├── build_config.default.bzl │ │ ├── build_config_root.bzl │ │ ├── build_config_root.default.bzl │ │ └── rules_cc.bzl │ ├── runner.py │ ├── standard_runner.py │ ├── standard_runner_test.py │ └── utils │ │ ├── __init__.py │ │ ├── clip.py │ │ ├── common.py │ │ ├── common_test.py │ │ ├── epoch_helper.py │ │ ├── loop_fns.py │ │ ├── misc │ │ ├── __init__.py │ │ ├── callstack_sampler.py │ │ ├── distribution_utils.py │ │ ├── distribution_utils_test.py │ │ ├── keras_utils.py │ │ ├── model_helpers.py │ │ ├── model_helpers_test.py │ │ └── tpu_lib.py │ │ ├── summary_manager.py │ │ ├── summary_manager_interface.py │ │ ├── tpu_summaries.py │ │ └── tpu_summaries_test.py ├── custom_ops │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── correlation_cost │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── cc │ │ │ ├── kernels │ │ │ │ ├── correlation_cost_op.cc │ │ │ │ ├── correlation_cost_op.h │ │ │ │ └── correlation_cost_op_gpu.cu.cc │ │ │ └── ops │ │ │ │ └── correlation_cost_op.cc │ │ └── python │ │ │ ├── __init__.py │ │ │ ├── optical_flow.py │ │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── optical_flow_test.py │ │ │ └── run_all_test.py │ ├── distributed_embeddings │ │ ├── BUILD │ │ ├── README.md │ │ ├── __init__.py │ │ ├── cc │ │ │ ├── kernels │ │ │ │ ├── embedding_lookup.h │ │ │ │ ├── embedding_lookup_kernels.cc │ │ │ │ └── embedding_lookup_kernels.cu.cc │ │ │ └── ops │ │ │ │ └── embedding_lookup_ops.cc │ │ └── python │ │ │ ├── __init__.py │ │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── dist_model_parallel.py │ │ │ └── embedding.py │ │ │ ├── ops │ │ │ ├── __init__.py │ │ │ └── embedding_lookup_ops.py │ │ │ └── tests │ │ │ ├── dist_model_parallel_test.py │ │ │ ├── embedding_lookup_ops_test.py │ │ │ ├── embedding_test.py │ │ │ ├── integer_lookup_test.py │ │ │ └── run_all_test.py │ ├── ffm_ops │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── cc │ │ │ ├── kernels │ │ │ │ ├── ffm_kernels.cc │ │ │ │ ├── ffm_kernels.cu.cc │ │ │ │ └── ffm_kernels.h │ │ │ └── ops │ │ │ │ └── ffm_ops.cc │ │ └── python │ │ │ ├── __init__.py │ │ │ ├── ffm_ops.py │ │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── feature_cross_test.py │ │ │ ├── ffm_ops_test.py │ │ │ └── run_all_test.py │ ├── multiplex_1 │ │ ├── BUILD │ │ ├── README.md │ │ ├── multiplex_1_kernel.cc │ │ ├── multiplex_1_op.cc │ │ ├── multiplex_1_op.py │ │ └── multiplex_1_test.py │ ├── multiplex_2 │ │ ├── BUILD │ │ ├── README.md │ │ ├── multiplex_2_kernel.cc │ │ ├── multiplex_2_kernel.cu.cc │ │ ├── multiplex_2_kernel.h │ │ ├── multiplex_2_op.cc │ │ ├── multiplex_2_op.py │ │ └── multiplex_2_test.py │ ├── multiplex_3 │ │ ├── BUILD │ │ ├── README.md │ │ ├── multiplex_3_kernel.cc │ │ ├── multiplex_3_op.cc │ │ ├── multiplex_3_op.py │ │ └── multiplex_3_test.py │ ├── multiplex_4 │ │ ├── BUILD │ │ ├── README.md │ │ ├── model_using_multiplex.py │ │ ├── multiplex_2_save.py │ │ ├── multiplex_4_kernel.cc │ │ ├── multiplex_4_load_use.py │ │ ├── multiplex_4_op.cc │ │ ├── multiplex_4_op.py │ │ └── multiplex_4_test.py │ ├── parquet_dataset │ │ ├── BUILD │ │ ├── README.md │ │ ├── __init__.py │ │ ├── cc │ │ │ ├── kernels │ │ │ │ ├── arrow_util.cc │ │ │ │ ├── arrow_util.h │ │ │ │ ├── eigen.h │ │ │ │ ├── parquet_batch_reader.cc │ │ │ │ ├── parquet_batch_reader.h │ │ │ │ ├── parquet_dataset_ops.cc │ │ │ │ ├── parquet_dataset_ops.h │ │ │ │ └── parquet_pybind.cc │ │ │ └── ops │ │ │ │ └── parquet_ops.cc │ │ ├── python │ │ │ ├── __init__.py │ │ │ ├── dataframe.py │ │ │ ├── parquet_dataset_ops.py │ │ │ ├── parquet_pybind.py │ │ │ └── tests │ │ │ │ ├── parquet_dataset_ops_test.py │ │ │ │ └── run_all_test.py │ │ └── read_parquet_deepray.py │ ├── seq2seq │ │ ├── BUILD │ │ └── cc │ │ │ ├── kernels │ │ │ ├── beam_search_ops.cc │ │ │ ├── beam_search_ops.h │ │ │ └── beam_search_ops_gpu.cu.cc │ │ │ └── ops │ │ │ └── beam_search_ops.cc │ ├── simple_hash_table │ │ ├── BUILD │ │ ├── README.md │ │ ├── __init__.py │ │ ├── simple_hash_table.py │ │ ├── simple_hash_table_kernel.cc │ │ ├── simple_hash_table_op.cc │ │ ├── simple_hash_table_op.py │ │ └── simple_hash_table_test.py │ ├── sleep │ │ ├── BUILD │ │ ├── README.md │ │ ├── sleep_bin.py │ │ ├── sleep_kernel.cc │ │ ├── sleep_op.cc │ │ ├── sleep_op.py │ │ └── sleep_test.py │ ├── text │ │ ├── BUILD │ │ └── cc │ │ │ ├── kernels │ │ │ ├── parse_time_kernel.cc │ │ │ └── skip_gram_kernels.cc │ │ │ └── ops │ │ │ ├── parse_time_op.cc │ │ │ └── skip_gram_ops.cc │ ├── training_ops │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── cc │ │ │ ├── kernels │ │ │ │ ├── training_ops.cc │ │ │ │ ├── training_ops.h │ │ │ │ └── training_ops_gpu.cu.cc │ │ │ └── ops │ │ │ │ └── training_ops.cc │ │ └── python │ │ │ ├── __init__.py │ │ │ └── training_ops.py │ ├── unique_ops │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── cc │ │ │ ├── kernels │ │ │ │ ├── random.cc │ │ │ │ ├── random.h │ │ │ │ ├── random_test.cc │ │ │ │ ├── task_runner.h │ │ │ │ ├── unique_ali_op.cc │ │ │ │ ├── unique_ali_op_gpu.cu.cc │ │ │ │ └── unique_ali_op_util.h │ │ │ └── ops │ │ │ │ └── unique_ops.cc │ │ └── python │ │ │ ├── __init__.py │ │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── run_all_test.py │ │ │ └── unique_op_test.py │ │ │ └── unique_ops.py │ └── zero_out │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── cc │ │ ├── kernels │ │ │ └── zero_out_kernels.cc │ │ └── ops │ │ │ └── zero_out_ops.cc │ │ └── python │ │ ├── __init__.py │ │ ├── ops │ │ ├── __init__.py │ │ └── zero_out_ops.py │ │ └── tests │ │ ├── __init__.py │ │ ├── run_all_test.py │ │ └── zero_out_ops_test.py ├── datasets │ ├── BUILD │ ├── __init__.py │ ├── adult_census_income │ │ ├── __init__.py │ │ ├── adult_census_income.py │ │ ├── adult_census_income_test.py │ │ └── feature_map.csv │ ├── ali-ccp │ │ ├── README.md │ │ ├── __init__.py │ │ ├── ali_ccp.py │ │ ├── ali_ccp_test.py │ │ └── preprocess_ali_ccp.py │ ├── ali_display_ad_click │ │ ├── README.md │ │ ├── __init__.py │ │ ├── ali_display_ad_click.py │ │ ├── ali_display_ad_click_test.py │ │ ├── feature_map.csv │ │ └── processing │ │ │ ├── prep_1_backbone.py │ │ │ ├── prep_2_bahavior.py │ │ │ ├── prep_3_merge.py │ │ │ ├── prep_4_sort.py │ │ │ ├── run.sh │ │ │ ├── stats.py │ │ │ └── to_tfrecord.py │ ├── amazon_books_2014 │ │ ├── README.md │ │ ├── __init__.py │ │ ├── amazon_books_2014.py │ │ ├── amazon_books_2014_test.py │ │ ├── defaults.py │ │ ├── download_amazon_books_2014.sh │ │ ├── feature_map.csv │ │ └── preprocessing │ │ │ ├── __init__.py │ │ │ ├── io.py │ │ │ ├── ops.py │ │ │ ├── parquet_to_tfrecord.py │ │ │ └── sim_preprocessing.py │ ├── avazu │ │ ├── README.md │ │ ├── __init__.py │ │ ├── avazu.py │ │ ├── avazu_test.py │ │ ├── feature_map.csv │ │ └── processing.py │ ├── bookscorpus │ │ └── __init__.py │ ├── cifar │ │ ├── __init__.py │ │ ├── cifar.py │ │ └── cifar_test.py │ ├── creditcardfraud │ │ ├── __init__.py │ │ ├── creditcardfraud.py │ │ └── creditcardfraud_test.py │ ├── criteo │ │ ├── README.md │ │ ├── __init__.py │ │ ├── criteo.py │ │ ├── criteo_test.py │ │ ├── criteo_tsv_reader.py │ │ ├── criteo_tsv_reader_test.py │ │ ├── docker │ │ │ ├── Dockerfile_preprocessing │ │ │ └── requirements_preprocessing.txt │ │ ├── feature_map.csv │ │ ├── feature_map_small.csv │ │ ├── feature_map_xlarge.csv │ │ ├── preproc │ │ │ ├── DGX-2_config.sh │ │ │ ├── DGX-A100_config.sh │ │ │ ├── NVT_shuffle_spark.py │ │ │ ├── gpu │ │ │ │ ├── get_gpu_resources.sh │ │ │ │ └── spark-defaults.conf │ │ │ ├── parquet_to_binary.py │ │ │ ├── prepare_dataset.sh │ │ │ ├── preproc_NVTabular.py │ │ │ ├── run_NVTabular.sh │ │ │ ├── run_spark.sh │ │ │ ├── run_spark_cpu.sh │ │ │ ├── run_spark_gpu_DGX-2.sh │ │ │ ├── run_spark_gpu_DGX-A100.sh │ │ │ ├── spark_data_utils.py │ │ │ ├── split_dataset.py │ │ │ └── verify_criteo_downloaded.sh │ │ └── run.sh │ ├── csv_pipeline.py │ ├── datapipeline.py │ ├── dataset_factory.py │ ├── downloader │ │ ├── Downloader.py │ │ ├── GooglePretrainedWeightDownloader.py │ │ ├── NVIDIAPretrainedWeightDownloader.py │ │ ├── TextSharding.py │ │ ├── bertPrep.py │ │ ├── bookscorpus │ │ │ ├── BooksDownloader.py │ │ │ ├── BookscorpusTextFormatting.py │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── download_files.py │ │ │ ├── download_list.py │ │ │ ├── epub2txt.py │ │ │ ├── make_sentlines.py │ │ │ ├── requirements.txt │ │ │ ├── tokenize_sentlines.py │ │ │ └── url_list.jsonl │ │ ├── classifier_data_lib.py │ │ ├── create_datasets_from_start.sh │ │ ├── create_finetuning_data.py │ │ ├── create_pretraining_data.py │ │ ├── download.sh │ │ ├── glue │ │ │ ├── GLUEDownloader.py │ │ │ └── __init__.py │ │ ├── process.sh │ │ ├── pubmed │ │ │ ├── PubMedDownloader.py │ │ │ ├── PubMedTextFormatting.py │ │ │ └── __init__.py │ │ ├── squad │ │ │ ├── SquadDownloader.py │ │ │ └── __init__.py │ │ ├── tokenization.py │ │ └── wikicorpus │ │ │ ├── WikiDownloader.py │ │ │ ├── WikicorpusTextFormatting.py │ │ │ └── __init__.py │ ├── enwik8 │ │ └── __init__.py │ ├── fashion_mnist │ │ ├── __init__.py │ │ ├── fashion_mnist.py │ │ └── fashion_mnist_test.py │ ├── glue │ │ └── __init__.py │ ├── imagenet-1k │ │ ├── __init__.py │ │ └── imagenet_to_gcs.py │ ├── imdb │ │ ├── __init__.py │ │ ├── imdb.py │ │ └── imdb_test.py │ ├── kafka_dataset.py │ ├── mnist │ │ ├── __init__.py │ │ ├── mnist.py │ │ └── mnist_test.py │ ├── movielens │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── data_pipeline.py │ │ ├── data_preprocessing.py │ │ ├── movielens.csv │ │ ├── movielens.py │ │ ├── movielens_100k_ratings.py │ │ ├── movielens_100k_ratings_test.py │ │ ├── movielens_1m_ratings.py │ │ ├── movielens_1m_ratings_test.py │ │ ├── popen_helper.py │ │ ├── process.py │ │ ├── producer.py │ │ └── stat_utils.py │ ├── openwebtext │ │ ├── README.md │ │ ├── __init__.py │ │ ├── build_openwebtext_pretraining_dataset.py │ │ ├── build_pretraining_dataset.py │ │ ├── openwebtext.py │ │ ├── openwebtext_test.py │ │ ├── run.sh │ │ ├── tokenization.py │ │ ├── util │ │ │ ├── __init__.py │ │ │ ├── training_utils.py │ │ │ └── utils.py │ │ └── vocab.txt │ ├── parquet_pipeline │ │ ├── __init__.py │ │ ├── ali_parquet_dataset.py │ │ ├── ali_parquet_dataset_test.py │ │ └── parquet_pipeline_test.py │ ├── pubmed │ │ └── __init__.py │ ├── squad │ │ ├── __init__.py │ │ ├── squad.csv │ │ ├── squad.py │ │ ├── squad_lib.py │ │ ├── squad_lib_sp.py │ │ ├── squad_test.py │ │ ├── v1.1 │ │ │ ├── evaluate-v1.1.py │ │ │ └── squad_v1.1_meta_data │ │ └── v2.0 │ │ │ ├── evaluate-v2.0.py │ │ │ └── squad_v2.0_meta_data │ ├── taobao │ │ ├── README.md │ │ ├── __init__.py │ │ ├── csv_price_int32_to_parquet.py │ │ └── csv_price_string_to_parquet.py │ ├── tfrecord_pipeline │ │ ├── __init__.py │ │ ├── tfrecord_pipeline.py │ │ └── tfrecord_pipeline_test.py │ ├── tokenization.py │ ├── toxic_comment_classification_challenge │ │ ├── __init__.py │ │ ├── toxic_comment_classification_challenge.py │ │ └── toxic_comment_classification_challenge_test.py │ ├── wikicorpus_en │ │ ├── README.md │ │ ├── processing │ │ │ ├── BookscorpusTextFormatting.py │ │ │ ├── Downloader.py │ │ │ ├── TextSharding.py │ │ │ ├── WikiDownloader.py │ │ │ ├── WikicorpusTextFormatting.py │ │ │ ├── __init__.py │ │ │ ├── build_pretraining_dataset.py │ │ │ ├── create_datasets_from_start.sh │ │ │ ├── dataPrep.py │ │ │ ├── file_utils.py │ │ │ ├── run.sh │ │ │ ├── tokenization.py │ │ │ ├── tokenization_utils.py │ │ │ ├── utils.py │ │ │ ├── vocab │ │ │ │ └── vocab.txt │ │ │ └── wikiextractor │ │ │ │ ├── .gitignore │ │ │ │ ├── LICENSE │ │ │ │ ├── README.md │ │ │ │ ├── WikiExtractor.py │ │ │ │ ├── categories.filter │ │ │ │ ├── cirrus-extract.py │ │ │ │ └── extract.sh │ │ ├── wikicorpus_en.py │ │ └── wikicorpus_en_test.py │ ├── wikitext103 │ │ ├── __init__.py │ │ └── wikitext103.py │ └── wikitext2 │ │ └── __init__.py ├── deepray.bzl ├── design_patterns.py ├── layers │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── adaptive_pooling.py │ ├── attention.py │ ├── bucketize.py │ ├── crf.py │ ├── dcn.py │ ├── dense_einsum.py │ ├── dot_interaction.py │ ├── dynamic_embedding.py │ ├── embedding.py │ ├── esn.py │ ├── feature_cross.py │ ├── flash_attention.py │ ├── masked_softmax.py │ ├── max_unpooling_2d.py │ ├── max_unpooling_2d_v2.py │ ├── maxout.py │ ├── mlp.py │ ├── netvlad.py │ ├── networks │ │ ├── README.md │ │ ├── __init__.py │ │ ├── albert_encoder.py │ │ ├── albert_encoder_test.py │ │ ├── albert_transformer_encoder.py │ │ ├── albert_transformer_encoder_test.py │ │ ├── bert_classifier.py │ │ ├── bert_classifier_test.py │ │ ├── bert_dense_encoder_test.py │ │ ├── bert_encoder.py │ │ ├── bert_encoder_test.py │ │ ├── bert_pretrainer.py │ │ ├── bert_pretrainer_test.py │ │ ├── bert_span_labeler.py │ │ ├── bert_span_labeler_test.py │ │ ├── classification.py │ │ ├── classification_test.py │ │ ├── encoder_scaffold.py │ │ ├── encoder_scaffold_test.py │ │ ├── fnet.py │ │ ├── fnet_test.py │ │ ├── funnel_transformer.py │ │ ├── funnel_transformer_test.py │ │ ├── masked_lm.py │ │ ├── masked_lm_test.py │ │ ├── mobile_bert_encoder.py │ │ ├── mobile_bert_encoder_test.py │ │ ├── packed_sequence_embedding.py │ │ ├── packed_sequence_embedding_test.py │ │ ├── span_labeling.py │ │ ├── span_labeling_test.py │ │ ├── sparse_mixer.py │ │ ├── sparse_mixer_test.py │ │ ├── transformer_encoder.py │ │ ├── transformer_encoder_test.py │ │ ├── xlnet_base.py │ │ └── xlnet_base_test.py │ ├── nlp │ │ ├── __init__.py │ │ ├── bert_modeling.py │ │ ├── bert_models.py │ │ └── transformer │ │ │ ├── __init__.py │ │ │ ├── beam_search_v1.py │ │ │ ├── beam_search_v1_test.py │ │ │ ├── model_params.py │ │ │ ├── model_utils.py │ │ │ └── model_utils_test.py │ ├── noisy_dense.py │ ├── normalizations.py │ ├── on_device_embedding.py │ ├── poincare.py │ ├── polynomial.py │ ├── pooling.py │ ├── position_embedding.py │ ├── relative_attention.py │ ├── rnn │ │ ├── BUILD │ │ ├── README.md │ │ ├── __init__.py │ │ ├── esn_cell.py │ │ ├── layer_norm_lstm_cell.py │ │ ├── layer_norm_simple_rnn_cell.py │ │ ├── nas_cell.py │ │ ├── peephole_lstm_cell.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ ├── esn_cell_test.py │ │ │ ├── layer_norm_lstm_cell_test.py │ │ │ ├── layer_norm_simple_rnn_cell_test.py │ │ │ ├── nas_cell_test.py │ │ │ ├── peephole_lstm_cell_test.py │ │ │ └── run_all_test.py │ ├── self_attention_mask.py │ ├── snake.py │ ├── sparsemax.py │ ├── spatial_pyramid_pooling.py │ ├── spectral_normalization.py │ ├── stochastic_depth.py │ ├── tests_bak │ │ ├── __init__.py │ │ ├── adaptive_pooling_test.py │ │ ├── attention_test.py │ │ ├── crf_test.py │ │ ├── dcn_test.py │ │ ├── dense_einsum_test.py │ │ ├── dnn_test.py │ │ ├── dot_interaction_test.py │ │ ├── embedding_test.py │ │ ├── esn_test.py │ │ ├── feature_cross_test.py │ │ ├── masked_softmax_test.py │ │ ├── max_unpooling_2d_test.py │ │ ├── max_unpooling_2d_v2_test.py │ │ ├── maxout_test.py │ │ ├── mlp_test.py │ │ ├── netvlad_test.py │ │ ├── noisy_dense_test.py │ │ ├── normalizations_test.py │ │ ├── poincare_test.py │ │ ├── polynomial_test.py │ │ ├── position_embedding_test.py │ │ ├── relative_attention_test.py │ │ ├── run_all_test.py │ │ ├── snake_test.py │ │ ├── sparsemax_test.py │ │ ├── spatial_pyramid_pooling_test.py │ │ ├── spectral_normalization_test.py │ │ ├── stochastic_depth_test.py │ │ ├── tlu_test.py │ │ ├── transformer_scaffold_test.py │ │ ├── transformer_test.py │ │ ├── transformer_xl_test.py │ │ └── wrappers_test.py │ ├── tf_utils.py │ ├── tlu.py │ ├── transformer.py │ ├── transformer_scaffold.py │ ├── transformer_xl.py │ └── wrappers.py ├── losses │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── contrastive.py │ ├── focal_loss.py │ ├── giou_loss.py │ ├── kappa_loss.py │ ├── lifted.py │ ├── metric_learning.py │ ├── npairs.py │ ├── quantiles.py │ ├── sparsemax_loss.py │ ├── tests │ │ ├── __init__.py │ │ ├── contrastive_test.py │ │ ├── focal_loss_test.py │ │ ├── giou_loss_test.py │ │ ├── kappa_loss_test.py │ │ ├── lifted_test.py │ │ ├── metric_test.py │ │ ├── npairs_test.py │ │ ├── quantiles_test.py │ │ ├── run_all_test.py │ │ ├── sparsemax_loss_test.py │ │ └── triplet_test.py │ └── triplet.py ├── metrics │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── cohens_kappa.py │ ├── f_scores.py │ ├── geometric_mean.py │ ├── hamming.py │ ├── harmonic_mean.py │ ├── matthews_correlation_coefficient.py │ ├── multilabel_confusion_matrix.py │ ├── r_square.py │ ├── streaming_correlations.py │ ├── tests │ │ ├── __init__.py │ │ ├── cohens_kappa_test.py │ │ ├── f_scores_test.py │ │ ├── geometric_mean_test.py │ │ ├── hamming_test.py │ │ ├── harmonic_mean_test.py │ │ ├── matthews_correlation_coefficient_test.py │ │ ├── metrics_test.py │ │ ├── multilabel_confusion_matrix_test.py │ │ ├── r_square_test.py │ │ ├── run_all_test.py │ │ └── streaming_correlations_test.py │ └── utils.py ├── models │ ├── BUILD │ ├── __init__.py │ ├── deep_cross_net.py │ ├── framework.py │ ├── generative │ │ ├── __init__.py │ │ └── conditional_gan.py │ ├── ncf_common.py │ ├── ncf_model.py │ ├── ncf_test.py │ ├── neumf_model.py │ ├── rec │ │ ├── __init__.py │ │ ├── base_model.py │ │ ├── cgc_v3.py │ │ ├── deepfm.py │ │ ├── dien_model.py │ │ ├── din_model.py │ │ ├── dual_channels_deep_model.py │ │ ├── flen.py │ │ ├── flend.py │ │ ├── sequential_recommender_model.py │ │ ├── sim_model.py │ │ ├── tfra_demo.py │ │ └── tower_new_tfra.py │ ├── stable_diffusion │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __internal__ │ │ │ ├── __init__.py │ │ │ └── layers │ │ │ │ ├── __init__.py │ │ │ │ ├── attention_block.py │ │ │ │ ├── padded_conv2d.py │ │ │ │ └── resnet_block.py │ │ ├── clip_tokenizer.py │ │ ├── constants.py │ │ ├── decoder.py │ │ ├── diffusion_model.py │ │ ├── image_encoder.py │ │ ├── noise_scheduler.py │ │ ├── stable_diffusion.py │ │ ├── stable_diffusion_test.py │ │ └── text_encoder.py │ └── word2vec.py ├── ops │ ├── __init__.py │ ├── beam_search.py │ ├── beam_search_test.py │ ├── decoding_module.py │ ├── decoding_module_test.py │ ├── sampling_module.py │ ├── segment_extractor.py │ └── segment_extractor_test.py ├── optimizers │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── adabelief.py │ ├── adam.py │ ├── average_wrapper.py │ ├── cocob.py │ ├── conditional_gradient.py │ ├── constants.py │ ├── cyclical_learning_rate.py │ ├── lamb.py │ ├── lazy_adam.py │ ├── lookahead.py │ ├── moving_average.py │ ├── multi_optimizer.py │ ├── novograd.py │ ├── optimization.py │ ├── proximal_adagrad.py │ ├── rectified_adam.py │ ├── stochastic_weight_averaging.py │ ├── tests │ │ ├── __init__.py │ │ ├── adabelief_test.py │ │ ├── adam_test.py │ │ ├── cocob_test.py │ │ ├── conditional_gradient_test.py │ │ ├── cyclical_learning_rate_test.py │ │ ├── lamb_test.py │ │ ├── lazy_adam_test.py │ │ ├── lookahead_test.py │ │ ├── moving_average_test.py │ │ ├── multi_optimizer_test.py │ │ ├── novograd_test.py │ │ ├── proximal_adagrad_test.py │ │ ├── rectified_adam_test.py │ │ ├── run_all_test.py │ │ ├── standard_test.py │ │ ├── stochastic_weight_averaging_test.py │ │ ├── weight_decay_optimizers_test.py │ │ └── yogi_test.py │ ├── utils.py │ ├── warmup.py │ ├── weight_decay_optimizers.py │ └── yogi.py ├── options.py ├── register.py ├── seq2seq │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── attention_wrapper.py │ ├── basic_decoder.py │ ├── beam_search_decoder.py │ ├── decoder.py │ ├── loss.py │ ├── sampler.py │ └── tests │ │ ├── __init__.py │ │ ├── attention_wrapper_test.py │ │ ├── basic_decoder_test.py │ │ ├── beam_search_decoder_test.py │ │ ├── beam_search_ops_test.py │ │ ├── decoder_test.py │ │ ├── loss_test.py │ │ └── run_all_test.py ├── tensorflow.bzl ├── testing │ ├── BUILD │ ├── __init__.py │ ├── serialization.py │ └── tests │ │ ├── __init__.py │ │ ├── run_all_test.py │ │ └── serialization_test.py ├── tests │ ├── __init__.py │ ├── register_test.py │ └── run_all_test.py ├── text │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── crf.py │ ├── crf_wrapper.py │ ├── parse_time_op.py │ ├── skip_gram_ops.py │ └── tests │ │ ├── __init__.py │ │ ├── crf_test.py │ │ ├── crf_wrapper_test.py │ │ ├── parse_time_op_test.py │ │ ├── run_all_test.py │ │ └── skip_gram_ops_test.py ├── utils │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── accelerator │ │ ├── __init__.py │ │ ├── tpu.py │ │ └── tpu_test.py │ ├── benchmark.py │ ├── data │ │ ├── __init__.py │ │ ├── feature_map.py │ │ ├── feature_map_test.py │ │ ├── file_io.py │ │ ├── file_io_test.py │ │ └── input_meta.py │ ├── dllogger_class.py │ ├── ensure_tf_install.py │ ├── export │ │ ├── __init__.py │ │ ├── export.py │ │ └── export_test.py │ ├── flags │ │ ├── README.md │ │ ├── __init__.py │ │ ├── _base.py │ │ ├── _benchmark.py │ │ ├── _conventions.py │ │ ├── _data.py │ │ ├── _device.py │ │ ├── _distribution.py │ │ ├── _misc.py │ │ ├── _performance.py │ │ ├── common_flags.py │ │ ├── core.py │ │ ├── flags_test.py │ │ └── guidelines.md │ ├── gpu_affinity.py │ ├── horovod_utils.py │ ├── keras_utils.py │ ├── logs │ │ ├── __init__.py │ │ ├── cloud_lib.py │ │ ├── cloud_lib_test.py │ │ ├── guidelines.md │ │ ├── hooks.py │ │ ├── hooks_helper.py │ │ ├── hooks_helper_test.py │ │ ├── hooks_test.py │ │ ├── logger.py │ │ ├── logger_test.py │ │ ├── metric_hook.py │ │ ├── metric_hook_test.py │ │ ├── mlperf_helper.py │ │ └── summary_manager.py │ ├── misc │ │ ├── __init__.py │ │ ├── keras_utils.py │ │ ├── model_helpers.py │ │ └── model_helpers_test.py │ ├── resource_loader.py │ ├── test_utils.py │ ├── tests │ │ ├── __init__.py │ │ ├── keras_utils_test.py │ │ ├── run_all_test.py │ │ └── test_utils_test.py │ ├── tfrecord_inspecter.py │ └── types.py ├── version.py ├── workspace0.bzl ├── workspace2.bzl └── workspace3.bzl ├── docker.sh ├── docs ├── README.md ├── overview.md └── tutorials │ ├── README.md │ ├── _template.ipynb │ ├── _toc.yaml │ ├── average_optimizers_callback.ipynb │ ├── image_ops.ipynb │ ├── layers_normalizations.ipynb │ ├── layers_weightnormalization.ipynb │ ├── losses_triplet.ipynb │ ├── networks_seq2seq_nmt.ipynb │ ├── optimizers_conditionalgradient.ipynb │ ├── optimizers_cyclicallearningrate.ipynb │ ├── optimizers_lazyadam.ipynb │ ├── time_stopping.ipynb │ └── tqdm_progress_bar.ipynb ├── modelzoo ├── CV │ ├── Classify_images_of_clothing │ │ ├── run_horovod.sh │ │ └── train.py │ ├── GAN │ │ ├── conditional_gan_mnist.py │ │ └── train.py │ ├── SwinTransformers │ │ ├── model.py │ │ ├── run_horovod.sh │ │ └── train.py │ ├── __init__.py │ ├── knowledge_distillation │ │ └── knowledge_distillation.py │ └── mnist │ │ ├── __init__.py │ │ ├── run_early.sh │ │ ├── run_horovod.sh │ │ ├── train.py │ │ └── train_earlystop.py ├── LanguageModeling │ ├── BERT │ │ ├── Bert_result.md │ │ ├── README.md │ │ ├── logs │ │ │ ├── squad_train_benchmark_base_fp16_gpu4_bs8.log │ │ │ └── tf_bert_finetuning_squad_base_fp16_gbs48.230222025408.log │ │ ├── optimization.py │ │ ├── run_squad.py │ │ ├── run_squad_predict.py │ │ └── scripts │ │ │ ├── benchmark_pretraining_lamb_phase2.sh │ │ │ ├── configs │ │ │ ├── configurations.yml │ │ │ ├── pretrain_config.sh │ │ │ └── squad_config.sh │ │ │ ├── data_download.sh │ │ │ ├── finetune_inference_benchmark.sh │ │ │ ├── finetune_train_benchmark.sh │ │ │ ├── gen_squad_evel.sh │ │ │ ├── run_inference_benchmark.sh │ │ │ ├── run_inference_benchmark_seq128.sh │ │ │ ├── run_pretraining_adam.sh │ │ │ ├── run_pretraining_lamb.sh │ │ │ ├── run_pretraining_lamb_phase1.sh │ │ │ ├── run_pretraining_lamb_phase2.sh │ │ │ ├── run_squad.sh │ │ │ └── run_squad_inference.sh │ ├── Classify-text-with-BERT │ │ └── classify_text_with_bert.py │ ├── Multi-label-classification-with-BERT │ │ ├── README.md │ │ ├── run_horovod.sh │ │ └── trainer.py │ └── text_generation_with_miniature_gpt │ │ └── text_generation_with_miniature_gpt.py ├── Recommendation │ ├── CreditCardFraudDetection │ │ ├── run_horovod.sh │ │ └── train.py │ ├── NCF │ │ ├── run_ncf.py │ │ └── run_ncf.sh │ ├── SIM │ │ ├── defaults.py │ │ ├── feature_spec.py │ │ ├── logs │ │ │ ├── tf_training_amazon_books_2014_dien_fp16_gbs32768.230321132836.log │ │ │ ├── tf_training_amazon_books_2014_din_fp16_gbs32768.230321132123.log │ │ │ └── tf_training_amazon_books_2014_sim_fp16_gbs32768.230321133429.log │ │ ├── main.py │ │ ├── run_dien.py │ │ ├── run_din.py │ │ ├── run_horovod.sh │ │ └── run_sim.py │ ├── TFRA │ │ ├── demo.py │ │ ├── demo_tfra.py │ │ └── run_horovod.sh │ ├── WideDeep │ │ ├── model.py │ │ ├── run_horovod.sh │ │ └── train.py │ ├── __init__.py │ ├── avazu-ctr-prediction │ │ ├── BUILD │ │ ├── README.md │ │ ├── __init__.py │ │ ├── ccpm.py │ │ ├── ccpm_diamond.py │ │ ├── feature_map.csv │ │ ├── run_horovod.sh │ │ └── train.py │ ├── criteo_ctr │ │ ├── Frozen-Graph-TensorFlow │ │ │ ├── README.md │ │ │ └── TensorFlow_v2 │ │ │ │ ├── README.md │ │ │ │ ├── example_1.py │ │ │ │ ├── example_2.py │ │ │ │ └── utils.py │ │ ├── dcn_v2.py │ │ ├── feature_map_small.csv │ │ ├── frozen.py │ │ ├── infer.py │ │ ├── optimize_for_inference.py │ │ ├── run_horovod.sh │ │ ├── run_optimize.sh │ │ └── train.py │ └── keras_horovod_dis │ │ ├── demo_tfra.py │ │ ├── keras_horovod_distributed_demo.py │ │ ├── run_horovod.sh │ │ └── start_train.sh ├── __init__.py └── horovod_test │ ├── run.sh │ └── tensorflow2_synthetic_benchmark.py ├── pytest.ini ├── recommendation ├── README.md ├── __init__.py ├── constants.py ├── create_ncf_data.py ├── data_pipeline.py ├── data_preprocessing.py ├── data_test.py ├── movielens.py ├── movielens_dataset.py ├── ncf_common.py ├── ncf_input_pipeline.py ├── ncf_keras_main.py ├── ncf_test.py ├── neumf_model.py ├── popen_helper.py ├── ranking │ ├── README.md │ ├── __init__.py │ ├── common.py │ ├── configs │ │ ├── __init__.py │ │ ├── config.py │ │ ├── config_test.py │ │ └── yaml │ │ │ ├── dcn_v2_criteo_tpu.yaml │ │ │ └── dlrm_criteo_tpu.yaml │ ├── data │ │ ├── __init__.py │ │ ├── data_pipeline.py │ │ └── data_pipeline_test.py │ ├── preprocessing │ │ ├── README.md │ │ ├── criteo_preprocess.py │ │ ├── setup.py │ │ └── shard_rebalancer.py │ ├── task.py │ ├── task_test.py │ ├── train.py │ └── train_test.py ├── run.sh ├── stat_utils.py └── train.sh ├── requirements.txt ├── setup.py ├── third_party ├── BUILD ├── __init__.py ├── arrow │ ├── BUILD │ └── arrow.BUILD ├── aws-c-common.BUILD ├── aws-c-event-stream.BUILD ├── aws-checksums.BUILD ├── aws-sdk-cpp.BUILD ├── brotli.BUILD ├── cuckoohash_map.patch ├── cucollection │ ├── BUILD │ ├── cuco.BUILD │ └── cucollection.patch ├── cutlass.BUILD ├── double_conversion.BUILD ├── eigen3.BUILD ├── farmhash.BUILD ├── gpus │ └── find_cuda_config.py ├── hadoop │ ├── BUILD │ ├── LICENSE.txt │ └── hdfs.h ├── libcuckoo.BUILD ├── murmurhash.BUILD ├── openblas.BUILD ├── oss_c_sdk.BUILD ├── rapidjson.BUILD ├── sparsehash.BUILD ├── sparsehash_c11.BUILD ├── tbb.BUILD ├── thrift │ ├── BUILD │ ├── build_defs.bzl │ ├── extra │ │ ├── BUILD │ │ └── config.h │ └── thrift.BUILD └── xsimd.BUILD └── tools ├── build_base_container.sh ├── docker ├── base_container.Dockerfile ├── bashrc.bash ├── build_wheel.Dockerfile ├── pre-commit.Dockerfile └── sanity_check.Dockerfile ├── docs ├── BUILD ├── Readme.md └── build_docs.py ├── format.py ├── install_deps ├── buildifier.sh ├── clang-format.sh ├── doc_requirements.txt ├── install_bazelisk.sh ├── install_clang.sh ├── install_cmake.sh ├── install_openmpi.sh ├── install_python.sh ├── pytest.txt ├── tensorflow-cpu.txt ├── tensorflow.txt ├── typedapi.txt └── yapf.txt ├── pre-commit.sh ├── releases └── tf_auditwheel_patch.sh ├── run_build.sh ├── run_google_cloud_tests.sh ├── run_gpu_tests.sh ├── run_sanity_check.sh ├── testing └── parallel_gpu_execute.sh └── update_release_version.sh /.bazelversion: -------------------------------------------------------------------------------- 1 | 5.3.1 -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG IMAGE_TYPE=latest-cpu 2 | FROM hailinfufu/deepray:$IMAGE_TYPE 3 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .github 3 | *.Dockerfile 4 | .coverage* 5 | # C extensions 6 | *.so 7 | wheelhouse/ 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-performance-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug/Performance Issue 3 | about: Use this template for reporting a bug or a performance issue. 4 | 5 | --- 6 | 7 | **System information** 8 | - OS Platform and Distribution (e.g., Linux Ubuntu 16.04): 9 | - TensorFlow version and how it was installed (source or binary): 10 | - TensorFlow-Deepray version and how it was installed (source or binary): 11 | - Python version: 12 | - Is GPU used? (yes/no): 13 | 14 | **Describe the bug** 15 | 16 | A clear and concise description of what the bug is. 17 | 18 | **Code to reproduce the issue** 19 | 20 | Provide a reproducible test case that is the bare minimum necessary to generate the problem. 21 | 22 | **Other info / logs** 23 | 24 | Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached. 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Use this template for raising a feature request 4 | 5 | --- 6 | 7 | **Describe the feature and the current behavior/state.** 8 | 9 | **Relevant information** 10 | - Are you willing to contribute it (yes/no): 11 | *If you wish to contribute, then read the requirements for new contributions in [`CONTRIBUTING.md`](https://github.com/tensorflow/deepray/blob/master/CONTRIBUTING.md#requirements-for-new-contributions-to-the-repository)* 12 | - Are you willing to maintain it going forward? (yes/no): 13 | - Is there a relevant academic paper? (if so, where): 14 | - Does the relavent academic paper exceed 50 citations? (yes/no): 15 | - Is there already an implementation in another framework? (if so, where): 16 | - Was it part of tf.contrib? (if so, where): 17 | 18 | **Which API type would this fall under (layer, metric, optimizer, etc.)** 19 | 20 | **Who will benefit with this feature?** 21 | 22 | **Any other info.** 23 | -------------------------------------------------------------------------------- /.github/boring-cyborg.yml: -------------------------------------------------------------------------------- 1 | labelPRBasedOnFilePath: 2 | # Subpackages 3 | activations: 4 | - deepray/activations/**/* 5 | 6 | callbacks: 7 | - deepray/callbacks/**/* 8 | 9 | custom-ops: 10 | - deepray/custom_ops/**/* 11 | 12 | image: 13 | - deepray/image/**/* 14 | 15 | layers: 16 | - deepray/layers/**/* 17 | 18 | losses: 19 | - deepray/losses/**/* 20 | 21 | metrics: 22 | - deepray/metrics/**/* 23 | 24 | optimizers: 25 | - deepray/optimizers/**/* 26 | 27 | seq2seq: 28 | - deepray/seq2seq/**/* 29 | 30 | text: 31 | - deepray/text/**/* 32 | 33 | # Other labels 34 | build: 35 | - build_deps/**/* 36 | - tools/releases/**/* 37 | 38 | documentation: 39 | - docs/**/* 40 | 41 | tutorials: 42 | - docs/tutorials/**/* 43 | 44 | test-cases: 45 | - tools/testing/**/ 46 | 47 | style: 48 | - STYLE_GUIDE.md 49 | 50 | github: 51 | - .github/**/* 52 | -------------------------------------------------------------------------------- /.github/release-template.yml: -------------------------------------------------------------------------------- 1 | template: | 2 | ## Release Notes 3 | 4 | $CHANGES 5 | 6 | ## Thanks to our Contributors 7 | 8 | $CONTRIBUTORS 9 | -------------------------------------------------------------------------------- /.github/workflows/backport.yml: -------------------------------------------------------------------------------- 1 | name: Backport 2 | on: 3 | pull_request: 4 | types: 5 | - closed 6 | - labeled 7 | 8 | jobs: 9 | backport: 10 | runs-on: ubuntu-20.04 11 | name: Backport 12 | steps: 13 | - name: Backport Bot 14 | if: github.event.pull_request.merged && ( ( github.event.action == 'closed' && contains( join( github.event.pull_request.labels.*.name ), 'backport') ) || contains( github.event.label.name, 'backport' ) ) 15 | uses: Gaurav0/backport@d69fd1d2469762a7b4007f671857e4f94deed0af # Version 1.0.24 16 | with: 17 | bot_username: bot-of-gabrieldemarmiesse 18 | bot_token: 1353d990cdb8b8ceb1b73d301dce83cc0da3db29 19 | bot_token_key: a1b2c3d47311f8e29e204f85a81b4df4a44e252c 20 | github_token: ${{ secrets.GITHUB_TOKEN }} 21 | -------------------------------------------------------------------------------- /.github/workflows/make_wheel_Linux_x86.sh: -------------------------------------------------------------------------------- 1 | set -e -x 2 | 3 | df -h 4 | docker info 5 | 6 | # Tests are ran as part of make_wheel target 7 | DOCKER_BUILDKIT=1 docker build \ 8 | -f tools/docker/build_wheel.Dockerfile \ 9 | --output type=local,dest=wheelhouse \ 10 | --build-arg PY_VERSION \ 11 | --build-arg TF_VERSION \ 12 | --build-arg NIGHTLY_FLAG \ 13 | --build-arg NIGHTLY_TIME \ 14 | --build-arg SKIP_CUSTOM_OP_TESTS \ 15 | ./ 16 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: release-drafter 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - r* 8 | 9 | jobs: 10 | update_release_draft: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: release-drafter/release-drafter@74e7c423dafbb406c9c18b1638334f67a7c891c3 # Version 5.7.0 14 | with: 15 | config-name: release-template.yml 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | pip-wheel-metadata 27 | 28 | # Jupyter Notebook 29 | .ipynb_checkpoints 30 | 31 | # IDE 32 | .vscode/ 33 | .idea/ 34 | *.iml 35 | 36 | # Build 37 | /bazel-* 38 | /artifacts 39 | /.bazelrc.user 40 | /.dp_configure.bazelrc 41 | 42 | .coverage* 43 | htmlcov 44 | 45 | wheelhouse/ 46 | .vscode/sftp.json 47 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "bazelbuild.vscode-bazel" 4 | ] 5 | } -------------------------------------------------------------------------------- /.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = google 3 | indent_width = 2 4 | dedent_closing_brackets = True 5 | split_before_dot = True 6 | column_limit = 120 -------------------------------------------------------------------------------- /BUILD: -------------------------------------------------------------------------------- 1 | sh_binary( 2 | name = "build_pip_pkg", 3 | srcs = ["build_deps/build_pip_pkg.sh"], 4 | data = [ 5 | "LICENSE", 6 | "MANIFEST.in", 7 | "requirements.txt", 8 | "setup.py", 9 | "//deepray", 10 | ], 11 | ) 12 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include deepray *.so 2 | include docs/* -------------------------------------------------------------------------------- /VERSION.txt: -------------------------------------------------------------------------------- 1 | 0.21.0 -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | yes "" | bash ./configure || true 5 | 6 | # bazel build build_pip_pkg \ 7 | # --action_env=HTTP_PROXY=http://127.0.0.1:7890 \ 8 | # --action_env=HTTPS_PROXY=http://127.0.0.1:7890 9 | 10 | bazel build build_pip_pkg 11 | 12 | rm -rf artifacts/ 13 | 14 | bazel-bin/build_pip_pkg artifacts 15 | 16 | pip uninstall deepray -y 17 | 18 | pip install artifacts/deepray-*.whl 19 | 20 | # sphinx-autobuild docs/ docs/_build/html/ --host 10.0.74.1 21 | -------------------------------------------------------------------------------- /build_deps/tf_dependency/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/build_deps/tf_dependency/BUILD -------------------------------------------------------------------------------- /build_deps/tf_dependency/BUILD.tpl: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | cc_library( 4 | name = "tf_header_lib", 5 | hdrs = [":tf_header_include"], 6 | includes = ["include"], 7 | visibility = ["//visibility:public"], 8 | ) 9 | 10 | 11 | cc_library( 12 | name = "libtensorflow_framework", 13 | srcs = ["%{TF_SHARED_LIBRARY_NAME}"], 14 | visibility = ["//visibility:public"], 15 | ) 16 | 17 | 18 | cc_library( 19 | name = "libtensorflow_cc", 20 | srcs = ["%{TF_SHARED_CC_LIBRARY_NAME}"], 21 | visibility = ["//visibility:public"], 22 | ) 23 | 24 | %{TF_HEADER_GENRULE} 25 | %{TF_SHARED_LIBRARY_GENRULE} 26 | %{TF_SHARED_CC_LIBRARY_GENRULE} -------------------------------------------------------------------------------- /build_deps/tf_dependency/build_defs.bzl.tpl: -------------------------------------------------------------------------------- 1 | # Deepray Build Definitions inherited from TensorFlow Core 2 | 3 | D_GLIBCXX_USE_CXX11_ABI = "%{tf_cx11_abi}" 4 | CPLUSPLUS_VERSION = "%{tf_cplusplus_ver}" 5 | -------------------------------------------------------------------------------- /build_deps/toolchains/gpu/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/build_deps/toolchains/gpu/BUILD -------------------------------------------------------------------------------- /build_deps/toolchains/gpu/crosstool/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/build_deps/toolchains/gpu/crosstool/BUILD -------------------------------------------------------------------------------- /build_deps/toolchains/gpu/cub.BUILD: -------------------------------------------------------------------------------- 1 | # Description: CUB library which is a set of primitives for GPU programming. 2 | 3 | load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts", "if_cuda") 4 | 5 | package( 6 | default_visibility = ["//visibility:public"], 7 | ) 8 | 9 | licenses(["notice"]) # BSD 10 | 11 | filegroup( 12 | name = "cub_header_files", 13 | srcs = glob([ 14 | "cub/**", 15 | ]), 16 | ) 17 | 18 | cc_library( 19 | name = "cub", 20 | hdrs = if_cuda([":cub_header_files"]), 21 | include_prefix = "gpu", 22 | deps = [ 23 | "@local_config_cuda//cuda:cuda_headers", 24 | ], 25 | ) 26 | -------------------------------------------------------------------------------- /build_deps/toolchains/gpu/cuda/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/build_deps/toolchains/gpu/cuda/BUILD -------------------------------------------------------------------------------- /build_deps/toolchains/gpu/cuda/cuda_config.h.tpl: -------------------------------------------------------------------------------- 1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | #ifndef CUDA_CUDA_CONFIG_H_ 17 | #define CUDA_CUDA_CONFIG_H_ 18 | 19 | #define TF_CUDA_CAPABILITIES %{cuda_compute_capabilities} 20 | 21 | #define TF_CUDA_VERSION "%{cuda_version}" 22 | #define TF_CUDNN_VERSION "%{cudnn_version}" 23 | 24 | #define TF_CUDA_TOOLKIT_PATH "%{cuda_toolkit_path}" 25 | 26 | #endif // CUDA_CUDA_CONFIG_H_ 27 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -o pipefail 5 | 6 | if [ -z "$PYTHON_BIN_PATH" ]; then 7 | PYTHON_BIN_PATH=$(which python3 || which python || true) 8 | fi 9 | 10 | # Set all env variables 11 | CONFIGURE_DIR=$(dirname "$0") 12 | "$PYTHON_BIN_PATH" "${CONFIGURE_DIR}/configure.py" "$@" 13 | 14 | echo "Configuration finished" 15 | 16 | -------------------------------------------------------------------------------- /deepray/activations/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "activations", 7 | srcs = glob(["*.py"]), 8 | data = [ 9 | "//deepray:options.py", 10 | "//deepray/testing", 11 | "//deepray/utils", 12 | ], 13 | ) 14 | 15 | py_test( 16 | name = "activations_test", 17 | size = "small", 18 | srcs = glob(["tests/*"]), 19 | main = "run_all_test.py", 20 | deps = [ 21 | ":activations", 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /deepray/activations/README.md: -------------------------------------------------------------------------------- 1 | # Deepray - Activations 2 | 3 | ## Contents 4 | https://www.tensorflow.org/deepray/api_docs/python/dp/activations 5 | 6 | ## Contribution Guidelines 7 | #### Standard API 8 | In order to conform with the current API standard, all activations 9 | must: 10 | * Be a `tf.function` unless it is a straightforward call to a custom op or likely to be retraced. 11 | * Register as a keras global object so it can be serialized properly: `@tf.keras.utils.register_keras_serializable(package='Deepray')` 12 | 13 | #### Testing Requirements 14 | * Simple unittests that demonstrate the layer is behaving as expected. 15 | * To run your `tf.functions` in eager mode and graph mode in the tests, 16 | you can use the `@pytest.mark.usefixtures("maybe_run_functions_eagerly")` 17 | decorator. This will run the tests twice, once normally, and once 18 | with `tf.config.run_functions_eagerly(True)`. 19 | * Add activation name to [activations_test.py](https://github.com/tensorflow/deepray/tree/master/deepray/activations/tests/activations_test.py) to test serialization. 20 | 21 | #### Documentation Requirements 22 | * Update the [CODEOWNERS file](https://github.com/deepray-AI/deepray/blob/main/.github/CODEOWNERS) 23 | -------------------------------------------------------------------------------- /deepray/activations/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Additional activation functions.""" 16 | 17 | from deepray.activations.hardshrink import hardshrink 18 | from deepray.activations.lisht import lisht 19 | from deepray.activations.mish import mish 20 | from deepray.activations.softshrink import softshrink 21 | from deepray.activations.rrelu import rrelu 22 | from deepray.activations.snake import snake 23 | from deepray.activations.sparsemax import sparsemax 24 | from deepray.activations.tanhshrink import tanhshrink 25 | from deepray.activations.swish import simple_swish 26 | from deepray.activations.swish import hard_swish 27 | from deepray.activations.swish import identity -------------------------------------------------------------------------------- /deepray/activations/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/activations/tests/__init__.py -------------------------------------------------------------------------------- /deepray/activations/tests/lisht_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import pytest 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | from deepray.activations import lisht 21 | from deepray.utils import test_utils 22 | 23 | 24 | @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) 25 | def test_lisht(dtype): 26 | x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) 27 | expected_result = tf.constant([1.9280552, 0.7615942, 0.0, 0.7615942, 1.9280552], dtype=dtype) 28 | test_utils.assert_allclose_according_to_type(lisht(x), expected_result) 29 | -------------------------------------------------------------------------------- /deepray/activations/tests/mish_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import pytest 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | from deepray.activations import mish 21 | from deepray.utils import test_utils 22 | 23 | 24 | @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) 25 | def test_mish(dtype): 26 | x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) 27 | expected_result = tf.constant([-0.2525015, -0.30340144, 0.0, 0.86509836, 1.943959], dtype=dtype) 28 | test_utils.assert_allclose_according_to_type(mish(x), expected_result) 29 | -------------------------------------------------------------------------------- /deepray/activations/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/activations/tests/snake_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import pytest 17 | 18 | import numpy as np 19 | from deepray.activations import snake 20 | from deepray.utils import test_utils 21 | 22 | 23 | @pytest.mark.usefixtures("maybe_run_functions_eagerly") 24 | @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) 25 | def test_activation(dtype): 26 | x = dtype(np.random.rand(2, 5)) 27 | a = dtype(np.random.randn()) 28 | expected_result = x + np.power(np.sin(a * x), 2) / a 29 | test_utils.assert_allclose_according_to_type(snake(x, a), expected_result) 30 | -------------------------------------------------------------------------------- /deepray/activations/tests/tanhshrink_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | import pytest 17 | 18 | import numpy as np 19 | import tensorflow as tf 20 | from deepray.activations import tanhshrink 21 | from deepray.utils import test_utils 22 | 23 | 24 | @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) 25 | def test_tanh(dtype): 26 | x = tf.constant([-1.0, 0.0, 1.0], dtype=dtype) 27 | expected_result = tf.constant([-0.23840582, 0.0, 0.238405825], dtype=dtype) 28 | test_utils.assert_allclose_according_to_type(tanhshrink(x), expected_result) 29 | -------------------------------------------------------------------------------- /deepray/callbacks/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "callbacks", 7 | srcs = glob(["*.py"]), 8 | deps = [ 9 | "//deepray/optimizers", 10 | "//deepray/testing", 11 | "//deepray/utils", 12 | ], 13 | ) 14 | 15 | py_test( 16 | name = "callbacks_test", 17 | size = "small", 18 | srcs = glob(["tests/*"]), 19 | main = "tests/run_all_test.py", 20 | deps = [ 21 | ":callbacks", 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /deepray/callbacks/README.md: -------------------------------------------------------------------------------- 1 | # Deepray - Callbacks 2 | 3 | ## Contents 4 | https://www.tensorflow.org/deepray/api_docs/python/dp/callbacks 5 | 6 | ## Contribution Guidelines 7 | #### Standard API 8 | In order to conform with the current API standard, all callbacks 9 | must: 10 | * Inherit from `tf.keras.callbacks.Callback`. 11 | * Register as a keras global object so it can be serialized properly: `@tf.keras.utils.register_keras_serializable(package='Deepray')` 12 | 13 | #### Testing Requirements 14 | * Simple unittests that demonstrate the callback is behaving as expected. 15 | * To run your `tf.functions` in eager mode and graph mode in the tests, 16 | you can use the `@pytest.mark.usefixtures("maybe_run_functions_eagerly")` 17 | decorator. This will run the tests twice, once normally, and once 18 | with `tf.config.run_functions_eagerly(True)`. 19 | 20 | #### Documentation Requirements 21 | * Update the [CODEOWNERS file](https://github.com/deepray-AI/deepray/blob/main/.github/CODEOWNERS) 22 | -------------------------------------------------------------------------------- /deepray/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Additional callbacks that conform to Keras API.""" 16 | 17 | from deepray.callbacks.average_model_checkpoint import AverageModelCheckpoint 18 | from deepray.callbacks.time_stopping import TimeStopping 19 | from deepray.callbacks.tqdm_progress_bar import TQDMProgressBar 20 | from deepray.callbacks.callbacks import HvdCallbackList 21 | -------------------------------------------------------------------------------- /deepray/callbacks/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/callbacks/tests/__init__.py -------------------------------------------------------------------------------- /deepray/callbacks/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/conftest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import tensorflow as tf 4 | 5 | import deepray as dp 6 | 7 | from deepray.utils.test_utils import ( # noqa: F401 8 | maybe_run_functions_eagerly, 9 | only_run_functions_eagerly, 10 | run_custom_and_py_ops, 11 | run_with_mixed_precision_policy, 12 | pytest_make_parametrize_id, 13 | data_format, 14 | set_seeds, 15 | pytest_addoption, 16 | set_global_variables, 17 | pytest_configure, 18 | device, 19 | pytest_generate_tests, 20 | pytest_collection_modifyitems, 21 | ) 22 | 23 | # fixtures present in this file will be available 24 | # when running tests and can be referenced with strings 25 | # https://docs.pytest.org/en/latest/fixture.html#conftest-py-sharing-fixture-functions 26 | 27 | 28 | @pytest.fixture(autouse=True) 29 | def add_doctest_namespace(doctest_namespace): 30 | doctest_namespace["np"] = np 31 | doctest_namespace["tf"] = tf 32 | doctest_namespace["dp"] = dp 33 | -------------------------------------------------------------------------------- /deepray/core/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "core", 7 | srcs = glob([ 8 | "*.py", 9 | "**/*.py", 10 | ]), 11 | deps = [ 12 | "//deepray/testing", 13 | "//deepray/utils", 14 | ], 15 | ) 16 | 17 | # py_test( 18 | # name = "core_test", 19 | # size = "small", 20 | # srcs = glob(["tests/*"]), 21 | # main = "tests/run_all_test.py", 22 | # deps = [ 23 | # ":core", 24 | # ], 25 | # ) 26 | -------------------------------------------------------------------------------- /deepray/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/core/__init__.py -------------------------------------------------------------------------------- /deepray/core/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /deepray/core/common/registry_imports.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """All necessary imports for registration.""" 15 | # pylint: disable=unused-import 16 | from official import vision 17 | from official.nlp import tasks 18 | from official.nlp.configs import experiment_configs 19 | from official.utils.testing import mock_task 20 | -------------------------------------------------------------------------------- /deepray/core/common/streamz_counters.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Global streamz counters.""" 15 | 16 | from tensorflow.python.eager import monitoring 17 | 18 | progressive_policy_creation_counter = monitoring.Counter( 19 | "/tensorflow/training/fast_training/progressive_policy_creation", 20 | "Counter for the number of ProgressivePolicy creations." 21 | ) 22 | 23 | stack_vars_to_vars_call_counter = monitoring.Counter( 24 | "/tensorflow/training/fast_training/tf_vars_to_vars", "Counter for the number of low-level stacking API calls." 25 | ) 26 | -------------------------------------------------------------------------------- /deepray/core/platform/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/core/platform/BUILD -------------------------------------------------------------------------------- /deepray/core/platform/build_config_root.default.bzl: -------------------------------------------------------------------------------- 1 | """TODO(jakeharmon): Write module docstring.""" 2 | 3 | # unused in TSL 4 | def tf_additional_plugin_deps(): 5 | return select({ 6 | str(Label("//deepray/tsl:with_xla_support")): [ 7 | str(Label("//deepray/compiler/jit")), 8 | ], 9 | "//conditions:default": [], 10 | }) 11 | 12 | def if_dynamic_kernels(extra_deps, otherwise = []): 13 | return select({ 14 | str(Label("//deepray:dynamic_loaded_kernels")): extra_deps, 15 | "//conditions:default": otherwise, 16 | }) 17 | -------------------------------------------------------------------------------- /deepray/core/platform/rules_cc.bzl: -------------------------------------------------------------------------------- 1 | """Provides an indirection layer to bazel cc_rules""" 2 | 3 | load( 4 | "//deepray/tsl/platform/default:rules_cc.bzl", 5 | _cc_binary = "cc_binary", 6 | _cc_import = "cc_import", 7 | _cc_library = "cc_library", 8 | _cc_shared_library = "cc_shared_library", 9 | _cc_test = "cc_test", 10 | ) 11 | 12 | cc_binary = _cc_binary 13 | cc_import = _cc_import 14 | cc_library = _cc_library 15 | cc_shared_library = _cc_shared_library 16 | cc_test = _cc_test 17 | -------------------------------------------------------------------------------- /deepray/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The Orbit Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Defines exported symbols for the `orbit.utils` package.""" 15 | 16 | from deepray.core.utils.common import create_global_step 17 | from deepray.core.utils.common import get_value 18 | from deepray.core.utils.common import make_distributed_dataset 19 | 20 | from deepray.core.utils.epoch_helper import EpochHelper 21 | 22 | from deepray.core.utils.loop_fns import create_loop_fn 23 | from deepray.core.utils.loop_fns import create_tf_while_loop_fn 24 | from deepray.core.utils.loop_fns import LoopFnWithSummaries 25 | 26 | from deepray.core.utils.summary_manager import SummaryManager 27 | from deepray.core.utils.summary_manager_interface import SummaryManagerInterface 28 | 29 | from deepray.core.utils.tpu_summaries import OptionalSummariesFunction 30 | -------------------------------------------------------------------------------- /deepray/core/utils/clip.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | # https://github.com/bytedance/LargeBatchCTR/blob/main/clip.py 5 | def cow_clip(w, g, ratio=1, ids=None, cnts=None, min_w=0.03, const=False): 6 | if isinstance(g, tf.IndexedSlices): 7 | # FIXME: This part is not tested 8 | values = tf.convert_to_tensor(g.values) 9 | clipnorm = tf.norm(tf.gather(w, g.indices), axis=-1) 10 | else: 11 | values = g 12 | if const: 13 | clipnorm = tf.constant([min_w] * g.shape[0]) 14 | else: 15 | clipnorm = tf.norm(w, axis=-1) 16 | # bound weight norm by min_w 17 | clipnorm = tf.maximum(clipnorm, min_w) 18 | # scale by cnting 19 | cnts = tf.tensor_scatter_nd_update( 20 | tf.ones([clipnorm.shape[0]], dtype=tf.int32), 21 | tf.expand_dims(ids, -1), 22 | cnts, 23 | ) 24 | clipnorm = clipnorm * tf.cast(cnts, tf.float32) 25 | 26 | clip_t = ratio * clipnorm 27 | l2sum_row = tf.reduce_sum(values * values, axis=-1) 28 | pred = l2sum_row > 0 29 | l2sum_row_safe = tf.where(pred, l2sum_row, tf.ones_like(l2sum_row)) 30 | l2norm_row = tf.sqrt(l2sum_row_safe) 31 | intermediate = values * tf.expand_dims(clip_t, -1) 32 | g_clip = intermediate / tf.expand_dims(tf.maximum(l2norm_row, clip_t), -1) 33 | 34 | if isinstance(g, tf.IndexedSlices): 35 | return tf.IndexedSlices(g_clip, g.indices, g.dense_shape) 36 | else: 37 | return g_clip 38 | -------------------------------------------------------------------------------- /deepray/core/utils/common_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The Orbit Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for orbit.utils.common.""" 15 | 16 | from deepray.core.utils import common 17 | 18 | import tensorflow as tf 19 | 20 | 21 | class UtilsTest(tf.test.TestCase): 22 | 23 | def test_create_global_step(self): 24 | step = common.create_global_step() 25 | self.assertEqual(step.name, "global_step:0") 26 | self.assertEqual(step.dtype, tf.int64) 27 | self.assertEqual(step, 0) 28 | step.assign_add(1) 29 | self.assertEqual(step, 1) 30 | 31 | 32 | if __name__ == "__main__": 33 | tf.test.main() 34 | -------------------------------------------------------------------------------- /deepray/core/utils/misc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/core/utils/misc/__init__.py -------------------------------------------------------------------------------- /deepray/core/utils/misc/tpu_lib.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Initializes TPU system for TF 2.0.""" 16 | 17 | import tensorflow as tf 18 | 19 | 20 | def tpu_initialize(tpu_address): 21 | """Initializes TPU for TF 2.0 training. 22 | 23 | Args: 24 | tpu_address: string, bns address of master TPU worker. 25 | 26 | Returns: 27 | A TPUClusterResolver. 28 | """ 29 | cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=tpu_address) 30 | if tpu_address not in ('', 'local'): 31 | tf.config.experimental_connect_to_cluster(cluster_resolver) 32 | tf.tpu.experimental.initialize_tpu_system(cluster_resolver) 33 | return cluster_resolver 34 | -------------------------------------------------------------------------------- /deepray/custom_ops/BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | py_library( 4 | name = "custom_ops", 5 | srcs = glob(["**/*.py"]), 6 | deps = [ 7 | "//deepray/custom_ops/correlation_cost", 8 | "//deepray/custom_ops/ffm_ops", 9 | "//deepray/custom_ops/multiplex_1:multiplex_1_op", 10 | "//deepray/custom_ops/multiplex_2:multiplex_2_op", 11 | "//deepray/custom_ops/multiplex_3:multiplex_3_op", 12 | "//deepray/custom_ops/multiplex_4:multiplex_4_op", 13 | "//deepray/custom_ops/parquet_dataset", 14 | "//deepray/custom_ops/simple_hash_table", 15 | "//deepray/custom_ops/sleep:sleep_op", 16 | "//deepray/custom_ops/training_ops", 17 | "//deepray/custom_ops/unique_ops", 18 | "//deepray/custom_ops/zero_out:zero_out_ops", 19 | ], 20 | ) 21 | -------------------------------------------------------------------------------- /deepray/custom_ops/README.md: -------------------------------------------------------------------------------- 1 | # Deepray - Custom Ops 2 | 3 | ## Contents 4 | | Sub-Package | Description | 5 | |:----------------------- |:-----------------------------| 6 | | Image | Ops for image manipulation | 7 | | Seq2seq | Ops for seq2seq encoder-decoder framework | 8 | | Text | Ops for text processing | 9 | | Layers | Ops for model layers | 10 | -------------------------------------------------------------------------------- /deepray/custom_ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/correlation_cost/BUILD: -------------------------------------------------------------------------------- 1 | load("//deepray:deepray.bzl", "custom_op_library") 2 | 3 | licenses(["notice"]) # Apache 2.0 4 | 5 | package(default_visibility = ["//visibility:public"]) 6 | 7 | custom_op_library( 8 | name = "_correlation_cost_ops.so", 9 | srcs = [ 10 | "cc/kernels/correlation_cost_op.cc", 11 | "cc/kernels/correlation_cost_op.h", 12 | "cc/ops/correlation_cost_op.cc", 13 | ], 14 | cuda_srcs = [ 15 | "cc/kernels/correlation_cost_op.h", 16 | "cc/kernels/correlation_cost_op_gpu.cu.cc", 17 | ], 18 | ) 19 | 20 | py_library( 21 | name = "correlation_cost", 22 | srcs = glob( 23 | [ 24 | "python/*.py", 25 | "*.py", 26 | ], 27 | ), 28 | data = [ 29 | ":_correlation_cost_ops.so", 30 | ], 31 | deps = [ 32 | "//deepray/utils", 33 | ], 34 | ) 35 | 36 | py_test( 37 | name = "correlation_cost_test", 38 | size = "small", 39 | srcs = glob(["python/tests/*"]), 40 | main = "python/tests/run_all_test.py", 41 | deps = [ 42 | ":correlation_cost", 43 | ], 44 | ) 45 | -------------------------------------------------------------------------------- /deepray/custom_ops/correlation_cost/__init__.py: -------------------------------------------------------------------------------- 1 | from .python.optical_flow import _correlation_cost, _correlation_cost_grad, CorrelationCost 2 | -------------------------------------------------------------------------------- /deepray/custom_ops/correlation_cost/python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/correlation_cost/python/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/correlation_cost/python/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/correlation_cost/python/tests/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/correlation_cost/python/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/custom_ops/distributed_embeddings/BUILD: -------------------------------------------------------------------------------- 1 | load("//deepray:deepray.bzl", "custom_op_library") 2 | 3 | licenses(["notice"]) # Apache 2.0 4 | 5 | package(default_visibility = ["//visibility:public"]) 6 | 7 | custom_op_library( 8 | name = "_distributed_embeddings_ops.so", 9 | srcs = [ 10 | "cc/kernels/embedding_lookup.h", 11 | "cc/kernels/embedding_lookup_kernels.cc", 12 | "cc/ops/embedding_lookup_ops.cc", 13 | ], 14 | cuda_srcs = [ 15 | "cc/kernels/embedding_lookup_kernels.cu.cc", 16 | ], 17 | deps = [ 18 | "@cuCollections//:cuco_hash_table", 19 | ], 20 | ) 21 | 22 | py_library( 23 | name = "distributed_embeddings_ops", 24 | srcs = glob( 25 | [ 26 | "python/*.py", 27 | "*.py", 28 | ], 29 | ), 30 | data = [ 31 | ":_distributed_embeddings_ops.so", 32 | "//deepray:options.py", 33 | ], 34 | deps = [ 35 | "//deepray/utils", 36 | ], 37 | ) 38 | 39 | py_test( 40 | name = "distributed_embeddings_ops_test", 41 | size = "small", 42 | srcs = glob(["python/tests/*"]), 43 | main = "python/tests/run_all_test.py", 44 | deps = [ 45 | ":distributed_embeddings_ops", 46 | ], 47 | ) 48 | -------------------------------------------------------------------------------- /deepray/custom_ops/distributed_embeddings/python/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /deepray/custom_ops/distributed_embeddings/python/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /deepray/custom_ops/distributed_embeddings/python/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /deepray/custom_ops/distributed_embeddings/python/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/custom_ops/ffm_ops/BUILD: -------------------------------------------------------------------------------- 1 | load("//deepray:deepray.bzl", "custom_op_library") 2 | load("@local_config_tf//:build_defs.bzl", "CPLUSPLUS_VERSION") 3 | 4 | licenses(["notice"]) # Apache 2.0 5 | 6 | custom_op_library( 7 | name = "_ffm_ops.so", 8 | srcs = [ 9 | "cc/kernels/ffm_kernels.cc", 10 | "cc/kernels/ffm_kernels.h", 11 | "cc/ops/ffm_ops.cc", 12 | ], 13 | copts = [CPLUSPLUS_VERSION], 14 | cuda_srcs = [ 15 | "cc/kernels/ffm_kernels.h", 16 | "cc/kernels/ffm_kernels.cu.cc", 17 | ], 18 | ) 19 | 20 | py_library( 21 | name = "ffm_ops", 22 | srcs = glob( 23 | [ 24 | "python/*.py", 25 | "python/**/*.py", 26 | "*.py", 27 | ], 28 | ), 29 | data = [ 30 | ":_ffm_ops.so", 31 | ], 32 | visibility = ["//visibility:public"], 33 | deps = [ 34 | "//deepray/utils", 35 | ], 36 | ) 37 | 38 | py_test( 39 | name = "ffm_ops_test", 40 | size = "small", 41 | srcs = glob(["python/tests/*"]), 42 | main = "python/tests/run_all_test.py", 43 | deps = [ 44 | ":ffm_ops", 45 | ], 46 | ) 47 | -------------------------------------------------------------------------------- /deepray/custom_ops/ffm_ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | 17 | from .python.ffm_ops import ffm 18 | -------------------------------------------------------------------------------- /deepray/custom_ops/ffm_ops/python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/ffm_ops/python/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/ffm_ops/python/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/ffm_ops/python/tests/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/ffm_ops/python/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/custom_ops/multiplex_1/BUILD: -------------------------------------------------------------------------------- 1 | # Build multiplex_1 custom ops examples, which is similar to np.where 2 | load("//deepray:deepray.bzl", "custom_op_library") 3 | 4 | licenses(["notice"]) 5 | 6 | custom_op_library( 7 | name = "multiplex_1_kernel.so", 8 | srcs = [ 9 | "multiplex_1_kernel.cc", 10 | "multiplex_1_op.cc", 11 | ], 12 | ) 13 | 14 | py_library( 15 | name = "multiplex_1_op", 16 | srcs = ["multiplex_1_op.py"], 17 | data = [":multiplex_1_kernel.so"], 18 | srcs_version = "PY3", 19 | visibility = ["//visibility:public"], 20 | ) 21 | 22 | py_test( 23 | name = "multiplex_1_test", 24 | size = "medium", 25 | srcs = ["multiplex_1_test.py"], 26 | python_version = "PY3", 27 | srcs_version = "PY3", 28 | tags = [ 29 | "no_mac", # TODO(b/216321151): Re-enable this test. 30 | ], 31 | deps = [ 32 | ":multiplex_1_op", 33 | ], 34 | ) 35 | -------------------------------------------------------------------------------- /deepray/custom_ops/multiplex_2/BUILD: -------------------------------------------------------------------------------- 1 | # Build multiplex_2 custom ops example, which is similar to np.where. 2 | # This example supports GPU (and CPU), in contrast to multiplex_1 which 3 | # only supports CPU. 4 | 5 | load("//deepray:deepray.bzl", "custom_op_library") 6 | 7 | licenses(["notice"]) 8 | 9 | custom_op_library( 10 | name = "multiplex_2_kernel.so", 11 | srcs = [ 12 | "multiplex_2_kernel.cc", 13 | "multiplex_2_kernel.h", 14 | "multiplex_2_op.cc", 15 | ], 16 | cuda_srcs = [ 17 | "multiplex_2_kernel.h", 18 | "multiplex_2_kernel.cu.cc", 19 | ], 20 | ) 21 | 22 | py_library( 23 | name = "multiplex_2_op", 24 | srcs = ["multiplex_2_op.py"], 25 | data = ["multiplex_2_kernel.so"], 26 | srcs_version = "PY3", 27 | visibility = ["//visibility:public"], 28 | ) 29 | 30 | py_test( 31 | name = "multiplex_2_test", 32 | size = "medium", 33 | srcs = ["multiplex_2_test.py"], 34 | python_version = "PY3", 35 | srcs_version = "PY3", 36 | tags = [ 37 | "no_mac", # TODO(b/216321151): Re-enable this test. 38 | ], 39 | deps = [ 40 | ":multiplex_2_op", 41 | ], 42 | ) 43 | -------------------------------------------------------------------------------- /deepray/custom_ops/multiplex_3/BUILD: -------------------------------------------------------------------------------- 1 | # Build multiplex_3 custom ops example, which is similar to np.where. 2 | # This example shows how a Python wrapper can choose either to use "dispach 3 | # for custom object types" to choose an old C++ Op (that supports only dense 4 | # tensors) for backwards compatibility or a new C++ for new functionality 5 | # (that supprots sparse tensors). 6 | 7 | load("//deepray:deepray.bzl", "custom_op_library") 8 | 9 | licenses(["notice"]) 10 | 11 | custom_op_library( 12 | name = "multiplex_3_kernel.so", 13 | srcs = [ 14 | "multiplex_3_kernel.cc", 15 | "multiplex_3_op.cc", 16 | ], 17 | ) 18 | 19 | py_library( 20 | name = "multiplex_3_op", 21 | srcs = ["multiplex_3_op.py"], 22 | data = [":multiplex_3_kernel.so"], 23 | srcs_version = "PY3", 24 | visibility = ["//visibility:public"], 25 | deps = [ 26 | "//deepray/custom_ops/multiplex_2:multiplex_2_op", 27 | ], 28 | ) 29 | 30 | py_test( 31 | name = "multiplex_3_test", 32 | size = "small", 33 | srcs = ["multiplex_3_test.py"], 34 | python_version = "PY3", 35 | srcs_version = "PY3", 36 | tags = [ 37 | "no_mac", # TODO(b/216321151): Re-enable this test. 38 | ], 39 | deps = [ 40 | ":multiplex_3_op", 41 | ], 42 | ) 43 | -------------------------------------------------------------------------------- /deepray/custom_ops/parquet_dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from deepray.custom_ops.parquet_dataset.python import parquet_dataset_ops 2 | from deepray.custom_ops.parquet_dataset.python import dataframe 3 | -------------------------------------------------------------------------------- /deepray/custom_ops/parquet_dataset/python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/parquet_dataset/python/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/parquet_dataset/python/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/custom_ops/seq2seq/BUILD: -------------------------------------------------------------------------------- 1 | load("//deepray:deepray.bzl", "custom_op_library") 2 | 3 | licenses(["notice"]) # Apache 2.0 4 | 5 | package(default_visibility = ["//visibility:public"]) 6 | 7 | custom_op_library( 8 | name = "_beam_search_ops.so", 9 | srcs = [ 10 | "cc/kernels/beam_search_ops.cc", 11 | "cc/kernels/beam_search_ops.h", 12 | "cc/ops/beam_search_ops.cc", 13 | ], 14 | cuda_srcs = [ 15 | "cc/kernels/beam_search_ops.h", 16 | "cc/kernels/beam_search_ops_gpu.cu.cc", 17 | ], 18 | ) 19 | -------------------------------------------------------------------------------- /deepray/custom_ops/simple_hash_table/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/simple_hash_table/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/simple_hash_table/simple_hash_table_op.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Load the simple_hash_table_op kernel.""" 16 | 17 | import tensorflow as tf 18 | from tensorflow.python.platform import resource_loader 19 | 20 | gen_simple_hash_table_op = tf.load_op_library(resource_loader.get_path_to_datafile("simple_hash_table_kernel.so")) 21 | -------------------------------------------------------------------------------- /deepray/custom_ops/sleep/BUILD: -------------------------------------------------------------------------------- 1 | # (non-blocking) op using AsyncOpKernel 2 | 3 | load("//deepray:deepray.bzl", "custom_op_library") 4 | 5 | licenses(["notice"]) 6 | 7 | custom_op_library( 8 | name = "sleep_kernel.so", 9 | srcs = [ 10 | "sleep_kernel.cc", 11 | "sleep_op.cc", 12 | ], 13 | deps = [ 14 | "@com_google_absl//absl/container:flat_hash_map", 15 | ], 16 | ) 17 | 18 | py_library( 19 | name = "sleep_op", 20 | srcs = ["sleep_op.py"], 21 | data = ["sleep_kernel.so"], 22 | srcs_version = "PY3", 23 | visibility = ["//visibility:public"], 24 | ) 25 | 26 | py_library( 27 | name = "sleep_bin", 28 | srcs = ["sleep_bin.py"], 29 | srcs_version = "PY3", 30 | deps = [ 31 | ":sleep_op", 32 | "@absl_py//absl:app", 33 | ], 34 | ) 35 | 36 | py_test( 37 | name = "sleep_test", 38 | size = "medium", # This test blocks using sleep, 39 | timeout = "short", # but it still runs quickly. 40 | srcs = ["sleep_test.py"], 41 | python_version = "PY3", 42 | srcs_version = "PY3", 43 | tags = [ 44 | "no_mac", # TODO(b/216321151): Re-enable this test. 45 | ], 46 | deps = [ 47 | ":sleep_op", 48 | ], 49 | ) 50 | -------------------------------------------------------------------------------- /deepray/custom_ops/text/BUILD: -------------------------------------------------------------------------------- 1 | load("//deepray:deepray.bzl", "custom_op_library") 2 | 3 | licenses(["notice"]) # Apache 2.0 4 | 5 | package(default_visibility = ["//visibility:public"]) 6 | 7 | custom_op_library( 8 | name = "_skip_gram_ops.so", 9 | srcs = [ 10 | "cc/kernels/skip_gram_kernels.cc", 11 | "cc/ops/skip_gram_ops.cc", 12 | ], 13 | ) 14 | 15 | custom_op_library( 16 | name = "_parse_time_op.so", 17 | srcs = select({ 18 | "//deepray:windows": [], 19 | "//conditions:default": [ 20 | "cc/kernels/parse_time_kernel.cc", 21 | "cc/ops/parse_time_op.cc", 22 | ], 23 | }), 24 | ) 25 | -------------------------------------------------------------------------------- /deepray/custom_ops/training_ops/BUILD: -------------------------------------------------------------------------------- 1 | load("//deepray:deepray.bzl", "custom_op_library") 2 | 3 | licenses(["notice"]) # Apache 2.0 4 | 5 | custom_op_library( 6 | name = "_training_ops.so", 7 | srcs = [ 8 | "cc/kernels/training_ops.cc", 9 | "cc/kernels/training_ops.h", 10 | "cc/ops/training_ops.cc", 11 | ], 12 | cuda_srcs = [ 13 | "cc/kernels/training_ops.h", 14 | "cc/kernels/training_ops_gpu.cu.cc", 15 | ], 16 | ) 17 | 18 | py_library( 19 | name = "training_ops", 20 | srcs = glob( 21 | [ 22 | "python/*.py", 23 | "python/**/*.py", 24 | "*.py", 25 | ], 26 | ), 27 | data = [ 28 | ":_training_ops.so", 29 | ], 30 | visibility = ["//visibility:public"], 31 | ) 32 | -------------------------------------------------------------------------------- /deepray/custom_ops/training_ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | 17 | from .python.training_ops import gen_training_ops 18 | -------------------------------------------------------------------------------- /deepray/custom_ops/training_ops/python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/training_ops/python/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/training_ops/python/training_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The Deepray Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Load the training_ops kernel.""" 16 | 17 | import tensorflow as tf 18 | from tensorflow.python.platform import resource_loader 19 | 20 | gen_training_ops = tf.load_op_library(resource_loader.get_path_to_datafile("../_training_ops.so")) 21 | -------------------------------------------------------------------------------- /deepray/custom_ops/unique_ops/__init__.py: -------------------------------------------------------------------------------- 1 | from deepray.custom_ops.unique_ops.python.unique_ops import gen_array_ops 2 | -------------------------------------------------------------------------------- /deepray/custom_ops/unique_ops/cc/kernels/random_test.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | #include "tensorflow/core/lib/random/random.h" 17 | 18 | #include 19 | 20 | #include "tensorflow/core/platform/test.h" 21 | #include "tensorflow/core/platform/types.h" 22 | 23 | namespace tensorflow { 24 | namespace random { 25 | namespace { 26 | 27 | TEST(New64Test, SanityCheck) { 28 | std::set values; 29 | for (int i = 0; i < 1000000; i++) { 30 | uint64 x = New64(); 31 | EXPECT_TRUE(values.insert(x).second) << "duplicate " << x; 32 | } 33 | } 34 | 35 | } // namespace 36 | } // namespace random 37 | } // namespace tensorflow 38 | -------------------------------------------------------------------------------- /deepray/custom_ops/unique_ops/python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/unique_ops/python/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/unique_ops/python/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/unique_ops/python/tests/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/unique_ops/python/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main(['-s', str(dirname)])) 9 | # sys.exit(pytest.main([str(dirname)])) 10 | -------------------------------------------------------------------------------- /deepray/custom_ops/unique_ops/python/unique_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Sonnet Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Use array ops in python.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | import tensorflow as tf 21 | from tensorflow.python.platform import resource_loader 22 | 23 | gen_array_ops = tf.load_op_library(resource_loader.get_path_to_datafile("../_unique_ops.so")) 24 | -------------------------------------------------------------------------------- /deepray/custom_ops/zero_out/BUILD: -------------------------------------------------------------------------------- 1 | load("//deepray:deepray.bzl", "custom_op_library") 2 | 3 | licenses(["notice"]) # Apache 2.0 4 | 5 | package(default_visibility = ["//visibility:public"]) 6 | 7 | custom_op_library( 8 | name = "_zero_out_ops.so", 9 | srcs = [ 10 | "cc/kernels/zero_out_kernels.cc", 11 | "cc/ops/zero_out_ops.cc", 12 | ], 13 | ) 14 | 15 | py_library( 16 | name = "zero_out_ops", 17 | srcs = glob( 18 | [ 19 | "python/*.py", 20 | "python/**/*.py", 21 | "*.py", 22 | ], 23 | ), 24 | data = [ 25 | ":_zero_out_ops.so", 26 | "//deepray:options.py", 27 | ], 28 | deps = [ 29 | "//deepray/utils", 30 | ], 31 | ) 32 | 33 | py_test( 34 | name = "zero_out_ops_test", 35 | size = "small", 36 | srcs = glob(["python/tests/*"]), 37 | main = "python/tests/run_all_test.py", 38 | deps = [ 39 | ":zero_out_ops", 40 | ], 41 | ) 42 | -------------------------------------------------------------------------------- /deepray/custom_ops/zero_out/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """TensorFlow custom op example.""" 16 | 17 | from __future__ import absolute_import 18 | 19 | from deepray.custom_ops.zero_out.python.ops.zero_out_ops import zero_out 20 | -------------------------------------------------------------------------------- /deepray/custom_ops/zero_out/cc/ops/zero_out_ops.cc: -------------------------------------------------------------------------------- 1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | ==============================================================================*/ 15 | 16 | #include "tensorflow/core/framework/op.h" 17 | #include "tensorflow/core/framework/shape_inference.h" 18 | 19 | using namespace tensorflow; 20 | 21 | REGISTER_OP("ZeroOut") 22 | .Input("to_zero: int32") 23 | .Output("zeroed: int32") 24 | .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { 25 | c->set_output(0, c->input(0)); 26 | return Status::OK(); 27 | }); 28 | -------------------------------------------------------------------------------- /deepray/custom_ops/zero_out/python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/zero_out/python/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/zero_out/python/ops/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /deepray/custom_ops/zero_out/python/ops/zero_out_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Sonnet Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Use zero_out ops in python.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from deepray.utils.resource_loader import LazySO 22 | 23 | _zero_out_ops_so = LazySO("custom_ops/zero_out/_zero_out_ops.so") 24 | zero_out = _zero_out_ops_so.ops.zero_out 25 | -------------------------------------------------------------------------------- /deepray/custom_ops/zero_out/python/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/custom_ops/zero_out/python/tests/__init__.py -------------------------------------------------------------------------------- /deepray/custom_ops/zero_out/python/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/custom_ops/zero_out/python/tests/zero_out_ops_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 The Sonnet Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================ 15 | """Tests for zero_out ops.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import numpy as np 21 | 22 | from tensorflow.python.platform import test 23 | from deepray.custom_ops.zero_out import zero_out 24 | 25 | 26 | class ZeroOutTest(test.TestCase): 27 | 28 | def testZeroOut(self): 29 | with self.test_session(): 30 | self.assertAllClose(zero_out([[1, 2], [3, 4]]), np.array([[1, 0], [0, 0]])) 31 | 32 | 33 | if __name__ == '__main__': 34 | test.main() 35 | -------------------------------------------------------------------------------- /deepray/datasets/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | filegroup( 6 | name = "feature_map", 7 | srcs = glob([ 8 | "**/feature_map.csv", 9 | ]), 10 | ) 11 | 12 | py_library( 13 | name = "datapipeline", 14 | srcs = ["datapipeline.py"], 15 | deps = [ 16 | "//deepray", 17 | "//deepray/utils", 18 | ], 19 | ) 20 | 21 | py_library( 22 | name = "datasets", 23 | srcs = glob([ 24 | "*.py", 25 | "**/*.py", 26 | ]), 27 | data = [ 28 | ":feature_map", 29 | ], 30 | deps = [ 31 | "//deepray/testing", 32 | "//deepray/utils", 33 | ], 34 | ) 35 | 36 | # py_test( 37 | # name = "datasets_test", 38 | # size = "small", 39 | # srcs = glob(["tests/*"]), 40 | # main = "tests/run_all_test.py", 41 | # deps = [ 42 | # ":datasets", 43 | # ], 44 | # ) 45 | -------------------------------------------------------------------------------- /deepray/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # @Time : 2021/10/15 7:16 PM 3 | # @Author : Hailin.Fu 4 | # @license : Copyright(C), 5 | -------------------------------------------------------------------------------- /deepray/datasets/adult_census_income/__init__.py: -------------------------------------------------------------------------------- 1 | from .adult_census_income import Adult_census_income 2 | -------------------------------------------------------------------------------- /deepray/datasets/adult_census_income/feature_map.csv: -------------------------------------------------------------------------------- 1 | name,voc_size,dim,length,dtype,ftype 2 | workclass,9,1,1,int32,Categorical 3 | education,16,1,1,int32,Categorical 4 | marital.status,7,1,1,int32,Categorical 5 | occupation,15,1,1,int32,Categorical 6 | relationship,6,1,1,int32,Categorical 7 | race,5,1,1,int32,Categorical 8 | sex,2,1,1,int32,Categorical 9 | native.country,42,1,1,int32,Categorical 10 | income,,1,1,int32,Label 11 | age,,1,1,float32,Numerical 12 | fnlwgt,,1,1,float32,Numerical 13 | education.num,,1,1,float32,Numerical 14 | capital.gain,,1,1,float32,Numerical 15 | capital.loss,,1,1,float32,Numerical 16 | hours.per.week,,1,1,float32,Numerical -------------------------------------------------------------------------------- /deepray/datasets/ali-ccp/README.md: -------------------------------------------------------------------------------- 1 | https://tianchi.aliyun.com/dataset/408 2 | 3 | 4 | https://github.com/datawhalechina/torch-rechub/blob/main/examples/ranking/data/ali-ccp/preprocess_ali_ccp.py -------------------------------------------------------------------------------- /deepray/datasets/ali-ccp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/ali-ccp/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/ali-ccp/ali_ccp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/ali-ccp/ali_ccp.py -------------------------------------------------------------------------------- /deepray/datasets/ali-ccp/ali_ccp_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/ali-ccp/ali_ccp_test.py -------------------------------------------------------------------------------- /deepray/datasets/ali_display_ad_click/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/ali_display_ad_click/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/ali_display_ad_click/feature_map.csv: -------------------------------------------------------------------------------- 1 | name,dtype,voc_size,dimension,length,log,embedding,default,norm 2 | label,int32,,,1,,,-1, 3 | ts,int64,,,1,,,-1, 4 | item_price,float32,,,1,True,,-1.0,32.0 5 | pid,int32,2,4,1,,,0, 6 | ad,int32,846811,36,1,,,0, 7 | ad_campaign,int32,423436,36,1,,,0, 8 | ad_customer,int32,255875,24,1,,,0, 9 | item_category,int32,6769,12,1,,,0, 10 | item_brand,int32,99815,24,1,,,0, 11 | user,int32,1141729,36,1,,,0, 12 | user_cms_seg,int32,98,4,1,,,0, 13 | user_cms_group,int32,14,4,1,,,0, 14 | user_gender,int32,2,4,1,,,0, 15 | user_age,int32,8,4,1,,,0, 16 | user_pvalue,int32,4,4,1,,,0, 17 | user_shopping,int32,4,4,1,,,0, 18 | user_occupation,int32,3,4,1,,,0, 19 | user_city,int32,5,4,1,,,0, 20 | user_pv_category_list,int32,6769,12,-1,,,0, 21 | user_cart_category_list,int32,6769,12,-1,,,0, 22 | user_fav_category_list,int32,6769,12,-1,,,0, 23 | user_buy_category_list,int32,6769,12,-1,,,0, 24 | user_pv_brand_list,int32,99815,24,-1,,,0, 25 | user_cart_brand_list,int32,99815,24,-1,,,0, 26 | user_fav_brand_list,int32,99815,24,-1,,,0, 27 | user_buy_brand_list,int32,99815,24,-1,,,0, 28 | -------------------------------------------------------------------------------- /deepray/datasets/amazon_books_2014/__init__.py: -------------------------------------------------------------------------------- 1 | from .amazon_books_2014 import AmazonBooks2014 2 | -------------------------------------------------------------------------------- /deepray/datasets/amazon_books_2014/defaults.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | REMAINDER_FILENAME = 'remainder.tfrecord' 16 | 17 | USER_FEATURES_CHANNEL = 'user_features' 18 | TARGET_ITEM_FEATURES_CHANNEL = 'target_item_features' 19 | POSITIVE_HISTORY_CHANNEL = 'positive_history' 20 | NEGATIVE_HISTORY_CHANNEL = 'negative_history' 21 | LABEL_CHANNEL = 'label' 22 | 23 | TRAIN_MAPPING = "train" 24 | TEST_MAPPING = "test" 25 | 26 | FILES_SELECTOR = "files" 27 | 28 | DTYPE_SELECTOR = "dtype" 29 | CARDINALITY_SELECTOR = "cardinality" 30 | DIMENSIONS_SELECTOR = 'dimensions' 31 | -------------------------------------------------------------------------------- /deepray/datasets/amazon_books_2014/feature_map.csv: -------------------------------------------------------------------------------- 1 | name,dtype,ftype,dim,length,voc_size 2 | label,bool,Label,1,1, 3 | item_feat_0_neg,int64,,1,100,1209081 4 | item_feat_1_neg,int64,,1,100,2330 5 | item_feat_0_pos,int64,,1,100,1209081 6 | item_feat_1_pos,int64,,1,100,2330 7 | item_feat_0_trgt,int64,,1,1,1209081 8 | item_feat_1_trgt,int64,,1,1,2330 9 | user_feat_0,int64,,1,1,105925 -------------------------------------------------------------------------------- /deepray/datasets/amazon_books_2014/preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/amazon_books_2014/preprocessing/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/avazu/README.md: -------------------------------------------------------------------------------- 1 | https://www.kaggle.com/c/avazu-ctr-prediction/data 2 | 3 | https://github.com/aimetrics/jarvis/blob/master/tools/dataset/avazu.py -------------------------------------------------------------------------------- /deepray/datasets/avazu/__init__.py: -------------------------------------------------------------------------------- 1 | from .avazu import Avazu 2 | -------------------------------------------------------------------------------- /deepray/datasets/avazu/feature_map.csv: -------------------------------------------------------------------------------- 1 | name,voc_size,dim,length,dtype,ftype,hash_size 2 | hour,24,11,1,int32,Categorical, 3 | id,45006427,11,1,int32,Categorical, 4 | C1,7,11,1,int32,Categorical, 5 | banner_pos,7,11,1,int32,Categorical, 6 | site_id,4842,11,1,int32,Categorical, 7 | site_domain,7912,11,1,int32,Categorical, 8 | site_category,26,11,1,int32,Categorical, 9 | app_id,9136,11,1,int32,Categorical, 10 | app_domain,580,11,1,int32,Categorical, 11 | app_category,36,11,1,int32,Categorical, 12 | device_id,2895973,11,1,int32,Categorical, 13 | device_ip,7338655,11,1,int32,Categorical, 14 | device_model,8303,11,1,int32,Categorical, 15 | device_type,5,11,1,int32,Categorical, 16 | device_conn_type,4,11,1,int32,Categorical, 17 | C14,2673,11,1,int32,Categorical, 18 | C15,8,11,1,int32,Categorical, 19 | C16,9,11,1,int32,Categorical, 20 | C17,441,11,1,int32,Categorical, 21 | C18,4,11,1,int32,Categorical, 22 | C19,69,11,1,int32,Categorical, 23 | C20,172,11,1,int32,Categorical, 24 | C21,62,11,1,int32,Categorical, 25 | click,1,1,1,int32,Label, -------------------------------------------------------------------------------- /deepray/datasets/bookscorpus/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/bookscorpus/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/cifar/__init__.py: -------------------------------------------------------------------------------- 1 | from .cifar import CIFAR10, CIFAR100 2 | -------------------------------------------------------------------------------- /deepray/datasets/cifar/cifar_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # @Time : 2021/8/10 2:50 PM 3 | # @Author : Hailin.Fu 4 | # @license : Copyright(C), 5 | import sys 6 | from datetime import datetime 7 | 8 | from absl import app, flags 9 | 10 | from .cifar import CIFAR100, CIFAR10 11 | 12 | FLAGS = flags.FLAGS 13 | 14 | TIME_STAMP = datetime.now().strftime("%Y%m%d-%H%M%S") 15 | 16 | 17 | def runner(argv=None): 18 | if len(argv) <= 1: 19 | argv = [ 20 | sys.argv[0], 21 | "--batch_size=16", 22 | "-epochs=1", 23 | "--train_data=cifar100", 24 | # f"--feature_map={dir_path}/feature_map.csv", 25 | "--label=clicked", 26 | ] 27 | if argv: 28 | FLAGS(argv, known_only=True) 29 | if FLAGS.train_data == "cifar100": 30 | data_pipe = CIFAR100() 31 | else: 32 | data_pipe = CIFAR10() 33 | 34 | # create data pipline of train & test dataset 35 | train_dataset = data_pipe(FLAGS.train_data, FLAGS.batch_size, is_training=True) 36 | num_examples = 0 37 | for x in train_dataset: 38 | num_examples += FLAGS.batch_size 39 | 40 | print(x) 41 | print(num_examples) 42 | 43 | 44 | if __name__ == "__main__": 45 | app.run(runner) 46 | -------------------------------------------------------------------------------- /deepray/datasets/creditcardfraud/__init__.py: -------------------------------------------------------------------------------- 1 | from .creditcardfraud import CreditCardFraud 2 | -------------------------------------------------------------------------------- /deepray/datasets/creditcardfraud/creditcardfraud_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # @Time : 2021/8/10 2:50 PM 3 | # @Author : Hailin.Fu 4 | # @license : Copyright(C), 5 | import sys 6 | from datetime import datetime 7 | 8 | from absl import app, flags 9 | 10 | from .creditcardfraud import CreditCardFraud 11 | 12 | FLAGS = flags.FLAGS 13 | 14 | TIME_STAMP = datetime.now().strftime("%Y%m%d-%H%M%S") 15 | 16 | 17 | def runner(argv=None): 18 | if len(argv) <= 1: 19 | argv = [ 20 | sys.argv[0], 21 | "--batch_size=10", 22 | "-epochs=1", 23 | "--train_data=movielens/1m-ratings", 24 | # f"--feature_map={dir_path}/feature_map.csv", 25 | "--label=clicked", 26 | ] 27 | if argv: 28 | FLAGS(argv, known_only=True) 29 | 30 | data_pipe = CreditCardFraud() 31 | # create data pipline of train & test dataset 32 | train_dataset = data_pipe(FLAGS.train_data, FLAGS.batch_size, is_training=True) 33 | num_examples = 0 34 | for x, y in train_dataset: 35 | num_examples += FLAGS.batch_size 36 | 37 | print(x) 38 | print(num_examples) 39 | 40 | 41 | if __name__ == "__main__": 42 | app.run(runner) 43 | -------------------------------------------------------------------------------- /deepray/datasets/criteo/__init__.py: -------------------------------------------------------------------------------- 1 | from .criteo import Criteo 2 | from .criteo_tsv_reader import CriteoTsvReader -------------------------------------------------------------------------------- /deepray/datasets/criteo/criteo.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Criteo dataset.""" 16 | 17 | import sys 18 | 19 | from absl import flags 20 | 21 | from deepray.datasets.datapipeline import DataPipeLine 22 | 23 | FLAGS = flags.FLAGS 24 | FLAGS([ 25 | sys.argv[0], 26 | "--num_train_examples=11932672", 27 | ]) 28 | 29 | 30 | class Criteo(DataPipeLine): 31 | 32 | def build_dataset(self, input_file_pattern, batch_size, is_training=True, prebatch_size=0, *args, **kwargs): 33 | pass 34 | -------------------------------------------------------------------------------- /deepray/datasets/criteo/docker/requirements_preprocessing.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | joblib 4 | tqdm 5 | -------------------------------------------------------------------------------- /deepray/datasets/criteo/feature_map.csv: -------------------------------------------------------------------------------- 1 | name,composition_factor 2 | _c14,"16,16" -------------------------------------------------------------------------------- /deepray/datasets/criteo/preproc/gpu/get_gpu_resources.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | ADDRS=`nvidia-smi --query-gpu=index --format=csv,noheader | sed -e ':a' -e 'N' -e'$!ba' -e 's/\n/","/g'` 4 | echo {\"name\": \"gpu\", \"addresses\":[\"$ADDRS\"]} 5 | -------------------------------------------------------------------------------- /deepray/datasets/criteo/preproc/run_spark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | ######################################################################### 18 | # File Name: run_spark.sh 19 | 20 | 21 | echo "Input mode option: $1" 22 | if [ "$1" = "CPU" ] 23 | then 24 | echo "Run with CPU."; 25 | shift 26 | ./run_spark_cpu.sh ${@} 27 | elif [ "$1" = "GPU" ] 28 | then 29 | echo "Run with GPU."; 30 | shift 31 | if [ "$DGX_VERSION" = "DGX-2" ] 32 | then 33 | ./run_spark_gpu_DGX-2.sh ${@} 34 | else 35 | ./run_spark_gpu_DGX-A100.sh ${@} 36 | fi 37 | else 38 | echo "Please choose mode (CPU/GPU)."; 39 | fi 40 | -------------------------------------------------------------------------------- /deepray/datasets/criteo/preproc/verify_criteo_downloaded.sh: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #! /bin/bash 16 | 17 | set -e 18 | set -x 19 | 20 | download_dir=${1:-'/data/dlrm/criteo'} 21 | 22 | cd ${download_dir} 23 | for i in $(seq 0 23); do 24 | filename=day_${i} 25 | if [ -f $filename ]; then 26 | echo "$filename exists, OK" 27 | else 28 | echo "$filename does not exist. Please follow the instructions at: http://labs.criteo.com/2013/12/download-terabyte-click-logs/ to download it" 29 | exit 1 30 | fi 31 | done 32 | cd - 33 | 34 | echo "Criteo data verified" 35 | -------------------------------------------------------------------------------- /deepray/datasets/csv_pipeline.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from deepray.datasets.datapipeline import DataPipeLine 3 | from absl import flags 4 | 5 | FLAGS = flags.FLAGS 6 | 7 | 8 | class CSVPipeLine(DataPipeLine): 9 | 10 | def build_dataset(self, csv_path): 11 | dataset = tf.data.experimental.make_csv_dataset( 12 | csv_path, 13 | record_defaults=list(self.feature_map["dtype"]), 14 | column_names=list(self.feature_map["name"]), 15 | batch_size=FLAGS.batch_size, 16 | label_name=FLAGS.label, 17 | field_delim=",", 18 | header=True, 19 | ) 20 | return dataset 21 | -------------------------------------------------------------------------------- /deepray/datasets/downloader/NVIDIAPretrainedWeightDownloader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | import os 15 | 16 | 17 | class NVIDIAPretrainedWeightDownloader: 18 | 19 | def __init__(self, save_path): 20 | self.save_path = save_path + '/nvidia_pretrained_weights' 21 | 22 | if not os.path.exists(self.save_path): 23 | os.makedirs(self.save_path) 24 | 25 | pass 26 | 27 | def download(self): 28 | assert False, 'NVIDIAPretrainedWeightDownloader not implemented yet.' 29 | -------------------------------------------------------------------------------- /deepray/datasets/downloader/bookscorpus/BooksDownloader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | import subprocess 15 | 16 | 17 | class BooksDownloader: 18 | 19 | def __init__(self, save_path): 20 | self.save_path = save_path 21 | pass 22 | 23 | def download(self): 24 | bookscorpus_download_command = 'pwd && ls && python3 bookscorpus/download_files.py --list bookscorpus/url_list.jsonl --out' 25 | bookscorpus_download_command += ' ' + self.save_path + '/bookscorpus' 26 | bookscorpus_download_command += ' --trash-bad-count' 27 | bookscorpus_download_process = subprocess.run(bookscorpus_download_command, shell=True, check=True) 28 | -------------------------------------------------------------------------------- /deepray/datasets/downloader/bookscorpus/BookscorpusTextFormatting.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | import glob 15 | 16 | 17 | class BookscorpusTextFormatting: 18 | 19 | def __init__(self, books_path, output_filename, recursive=False): 20 | self.books_path = books_path 21 | self.recursive = recursive 22 | self.output_filename = output_filename 23 | 24 | # This puts one book per line 25 | def merge(self): 26 | with open(self.output_filename, mode='w', newline='\n') as ofile: 27 | for filename in glob.glob(self.books_path + '/' + '*.txt', recursive=True): 28 | with open(filename, mode='r', encoding='utf-8-sig', newline='\n') as file: 29 | for line in file: 30 | if line.strip() != '': 31 | ofile.write(line.strip() + ' ') 32 | ofile.write("\n\n") 33 | -------------------------------------------------------------------------------- /deepray/datasets/downloader/bookscorpus/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Sosuke Kobayashi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /deepray/datasets/downloader/bookscorpus/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/downloader/bookscorpus/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/downloader/bookscorpus/make_sentlines.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from glob import glob 4 | 5 | from blingfire import text_to_sentences 6 | 7 | file_dir = sys.argv[1] 8 | 9 | 10 | def convert_into_sentences(lines): 11 | stack = [] 12 | sent_L = [] 13 | n_sent = 0 14 | for chunk in lines: 15 | if not chunk.strip(): 16 | if stack: 17 | sents = text_to_sentences(" ".join(stack).strip().replace('\n', ' ')).split('\n') 18 | sent_L.extend(sents) 19 | n_sent += len(sents) 20 | sent_L.append('\n') 21 | stack = [] 22 | continue 23 | stack.append(chunk.strip()) 24 | 25 | if stack: 26 | sents = text_to_sentences(" ".join(stack).strip().replace('\n', ' ')).split('\n') 27 | sent_L.extend(sents) 28 | n_sent += len(sents) 29 | return sent_L, n_sent 30 | 31 | 32 | file_list = list(sorted(glob(os.path.join(file_dir, '*.txt')))) 33 | 34 | for i, file_path in enumerate(file_list): 35 | sents, n_sent = convert_into_sentences(open(file_path).readlines()) 36 | print('\n'.join(sents)) 37 | print('\n\n\n\n') 38 | sys.stderr.write('{}/{}\t{}\t{}\n'.format(i, len(file_list), n_sent, file_path)) 39 | -------------------------------------------------------------------------------- /deepray/datasets/downloader/bookscorpus/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4>=4.6.3 2 | html2text>=2018.1.9 3 | blingfire>=0.0.9 4 | progressbar>=2.5 5 | lxml>=4.3.2 -------------------------------------------------------------------------------- /deepray/datasets/downloader/bookscorpus/tokenize_sentlines.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from blingfire import text_to_words 3 | 4 | for l in sys.stdin: 5 | if l.strip(): 6 | print(text_to_words(l.strip())) 7 | else: 8 | print('') 9 | -------------------------------------------------------------------------------- /deepray/datasets/downloader/download.sh: -------------------------------------------------------------------------------- 1 | python3 bertPrep.py --action download --dataset bookscorpus 2 | # python3 bertPrep.py --action download --dataset pubmed_baseline 3 | -------------------------------------------------------------------------------- /deepray/datasets/downloader/glue/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/downloader/glue/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/downloader/process.sh: -------------------------------------------------------------------------------- 1 | DATASET="wikicorpus_en" 2 | BERT_PREP_WORKING_DIR="/workspaces/deepray/deepray/datasets/openwebtext" 3 | 4 | # Properly format the text files 5 | python3 bertPrep.py --action text_formatting --dataset wikicorpus_en 6 | 7 | # Shard the text files 8 | python3 bertPrep.py --action sharding --dataset $DATASET 9 | 10 | # Create TFRecord files Phase 1 11 | python3 bertPrep.py --action create_tfrecord_files --dataset ${DATASET} --max_seq_length 128 \ 12 | --max_predictions_per_seq 20 --vocab_file ${BERT_PREP_WORKING_DIR}/vocab.txt 13 | 14 | 15 | # Create TFRecord files Phase 2 16 | python3 bertPrep.py --action create_tfrecord_files --dataset ${DATASET} --max_seq_length 512 \ 17 | --max_predictions_per_seq 80 --vocab_file ${BERT_PREP_WORKING_DIR}/vocab.txt 18 | -------------------------------------------------------------------------------- /deepray/datasets/downloader/pubmed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/downloader/pubmed/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/downloader/squad/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/downloader/squad/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/downloader/wikicorpus/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/downloader/wikicorpus/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/enwik8/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/enwik8/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/fashion_mnist/__init__.py: -------------------------------------------------------------------------------- 1 | from .fashion_mnist import FashionMNIST 2 | -------------------------------------------------------------------------------- /deepray/datasets/fashion_mnist/fashion_mnist_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # @Time : 2021/8/10 2:50 PM 3 | # @Author : Hailin.Fu 4 | # @license : Copyright(C), 5 | import sys 6 | from datetime import datetime 7 | 8 | from absl import app, flags 9 | 10 | from .fashion_mnist import FashionMNIST 11 | 12 | FLAGS = flags.FLAGS 13 | 14 | TIME_STAMP = datetime.now().strftime("%Y%m%d-%H%M%S") 15 | 16 | 17 | def runner(argv=None): 18 | if len(argv) <= 1: 19 | argv = [ 20 | sys.argv[0], 21 | "--batch_size=16", 22 | "-epochs=1", 23 | "--train_data=movielens/1m-ratings", 24 | # f"--feature_map={dir_path}/feature_map.csv", 25 | "--label=clicked", 26 | ] 27 | if argv: 28 | FLAGS(argv, known_only=True) 29 | 30 | data_pipe = FashionMNIST() 31 | # create data pipline of train & test dataset 32 | train_dataset = data_pipe(FLAGS.train_data, FLAGS.batch_size, is_training=True) 33 | num_examples = 0 34 | for x in train_dataset: 35 | num_examples += FLAGS.batch_size 36 | 37 | print(x) 38 | print(num_examples) 39 | 40 | 41 | if __name__ == "__main__": 42 | app.run(runner) 43 | -------------------------------------------------------------------------------- /deepray/datasets/glue/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/glue/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/imagenet-1k/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/imagenet-1k/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/imdb/__init__.py: -------------------------------------------------------------------------------- 1 | from .imdb import IMDB 2 | -------------------------------------------------------------------------------- /deepray/datasets/imdb/imdb_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # @Time : 2021/8/10 2:50 PM 3 | # @Author : Hailin.Fu 4 | # @license : Copyright(C), 5 | import sys 6 | from datetime import datetime 7 | 8 | from absl import app, flags 9 | 10 | from .imdb import IMDB 11 | 12 | FLAGS = flags.FLAGS 13 | 14 | TIME_STAMP = datetime.now().strftime("%Y%m%d-%H%M%S") 15 | 16 | 17 | def runner(argv=None): 18 | if len(argv) <= 1: 19 | argv = [ 20 | sys.argv[0], 21 | "--batch_size=16", 22 | "-epochs=1", 23 | "--train_data=movielens/1m-ratings", 24 | # f"--feature_map={dir_path}/feature_map.csv", 25 | "--label=clicked", 26 | ] 27 | if argv: 28 | FLAGS(argv, known_only=True) 29 | 30 | data_pipe = IMDB() 31 | # create data pipline of train & test dataset 32 | train_dataset = data_pipe(FLAGS.train_data, FLAGS.batch_size, is_training=True) 33 | num_examples = 0 34 | for x in train_dataset: 35 | num_examples += FLAGS.batch_size 36 | 37 | print(x) 38 | print(num_examples) 39 | 40 | 41 | if __name__ == "__main__": 42 | app.run(runner) 43 | -------------------------------------------------------------------------------- /deepray/datasets/mnist/__init__.py: -------------------------------------------------------------------------------- 1 | from .mnist import Mnist 2 | -------------------------------------------------------------------------------- /deepray/datasets/mnist/mnist_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # @Time : 2021/8/10 2:50 PM 3 | # @Author : Hailin.Fu 4 | # @license : Copyright(C), 5 | import sys 6 | from datetime import datetime 7 | 8 | from absl import app, flags 9 | 10 | from .mnist import Mnist 11 | 12 | FLAGS = flags.FLAGS 13 | 14 | TIME_STAMP = datetime.now().strftime("%Y%m%d-%H%M%S") 15 | 16 | 17 | def runner(argv=None): 18 | if len(argv) <= 1: 19 | argv = [ 20 | sys.argv[0], 21 | "--batch_size=16", 22 | "-epochs=1", 23 | "--train_data=movielens/1m-ratings", 24 | # f"--feature_map={dir_path}/feature_map.csv", 25 | "--label=clicked", 26 | ] 27 | if argv: 28 | FLAGS(argv, known_only=True) 29 | 30 | data_pipe = Mnist() 31 | # create data pipline of train & test dataset 32 | train_dataset = data_pipe(FLAGS.train_data, FLAGS.batch_size, is_training=True) 33 | num_examples = 0 34 | for x in train_dataset: 35 | num_examples += FLAGS.batch_size 36 | 37 | print(x) 38 | print(num_examples) 39 | 40 | 41 | if __name__ == "__main__": 42 | app.run(runner) 43 | -------------------------------------------------------------------------------- /deepray/datasets/movielens/__init__.py: -------------------------------------------------------------------------------- 1 | from deepray.datasets.movielens.movielens import Movielens 2 | from deepray.datasets.movielens.movielens_100k_ratings import Movielens100kRating 3 | from deepray.datasets.movielens.movielens_1m_ratings import Movielens1MRating 4 | from deepray.datasets.movielens.producer import Produce 5 | -------------------------------------------------------------------------------- /deepray/datasets/movielens/movielens.csv: -------------------------------------------------------------------------------- 1 | name,dtype,ftype,dim,length,voc_size,lr,optimizer,storage_type,composition_factor,ev_filter 2 | user_rating,int64,Label,1,1 3 | user_id,int64,Categorical,32,1 4 | movie_id,int64,Categorical,32,1 -------------------------------------------------------------------------------- /deepray/datasets/movielens/movielens_100k_ratings_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # @Time : 2021/8/10 2:50 PM 3 | # @Author : Hailin.Fu 4 | # @license : Copyright(C), 5 | import sys 6 | from datetime import datetime 7 | 8 | from absl import app, flags, logging 9 | 10 | from deepray.datasets.movielens.movielens_100k_ratings import Movielens100kRating 11 | 12 | FLAGS = flags.FLAGS 13 | logging.set_verbosity(logging.INFO) 14 | 15 | TIME_STAMP = datetime.now().strftime("%Y%m%d-%H%M%S") 16 | 17 | 18 | def runner(argv=None): 19 | if len(argv) <= 1: 20 | argv = [ 21 | sys.argv[0], 22 | # "--batch_size=16", 23 | "-epochs=1", 24 | "--train_data=movielens/100k-ratings", 25 | # f"--feature_map={dir_path}/feature_map.csv", 26 | # "--label=clicked", 27 | ] 28 | if argv: 29 | FLAGS(argv, known_only=True) 30 | 31 | data_pipe = Movielens100kRating() 32 | # create data pipline of train & test dataset 33 | train_dataset = data_pipe(FLAGS.train_data, FLAGS.batch_size, is_training=True) 34 | num_examples = 0 35 | for x in train_dataset: 36 | num_examples += FLAGS.batch_size 37 | 38 | print(x) 39 | print(num_examples) 40 | 41 | 42 | if __name__ == "__main__": 43 | app.run(runner) 44 | -------------------------------------------------------------------------------- /deepray/datasets/movielens/movielens_1m_ratings_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # @Time : 2021/8/10 2:50 PM 3 | # @Author : Hailin.Fu 4 | # @license : Copyright(C), 5 | import sys 6 | from datetime import datetime 7 | 8 | from absl import app, flags, logging 9 | 10 | from deepray.datasets.movielens.movielens_1m_ratings import Movielens1MRating 11 | 12 | FLAGS = flags.FLAGS 13 | logging.set_verbosity(logging.INFO) 14 | 15 | TIME_STAMP = datetime.now().strftime("%Y%m%d-%H%M%S") 16 | 17 | 18 | def runner(argv=None): 19 | if len(argv) <= 1: 20 | argv = [ 21 | sys.argv[0], 22 | "--batch_size=16", 23 | "-epochs=1", 24 | "--train_data=movielens/1m-ratings", 25 | # f"--feature_map={dir_path}/feature_map.csv", 26 | "--label=clicked", 27 | ] 28 | if argv: 29 | FLAGS(argv, known_only=True) 30 | 31 | data_pipe = Movielens1MRating() 32 | # create data pipline of train & test dataset 33 | train_dataset = data_pipe(FLAGS.train_data, FLAGS.batch_size, is_training=True) 34 | num_examples = 0 35 | for x in train_dataset: 36 | num_examples += FLAGS.batch_size 37 | 38 | print(x) 39 | print(num_examples) 40 | 41 | 42 | if __name__ == "__main__": 43 | app.run(runner) 44 | -------------------------------------------------------------------------------- /deepray/datasets/openwebtext/README.md: -------------------------------------------------------------------------------- 1 | 2 | #### Setup 3 | 1. Place a vocabulary file in `$DATA_DIR/vocab.txt`. Our ELECTRA models all used the exact same vocabulary as English uncased BERT, which you can download [here](https://storage.googleapis.com/electra-data/vocab.txt). 4 | 2. Download the [OpenWebText](https://skylion007.github.io/OpenWebTextCorpus/) corpus (12G) and extract it (i.e., run `tar xf openwebtext.tar.xz`). Place it in `$DATA_DIR/openwebtext`. 5 | 3. Run `python3 build_openwebtext_pretraining_dataset.py --data-dir $DATA_DIR --num-processes 5`. It pre-processes/tokenizes the data and outputs examples as [tfrecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) files under `$DATA_DIR/pretrain_tfrecords`. The tfrecords require roughly 30G of disk space. 6 | -------------------------------------------------------------------------------- /deepray/datasets/openwebtext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/openwebtext/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/openwebtext/openwebtext_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # @Time : 2021/8/10 2:50 PM 3 | # @Author : Hailin.Fu 4 | # @license : Copyright(C), 5 | import json 6 | import os 7 | import sys 8 | from datetime import datetime 9 | 10 | import tensorflow as tf 11 | from absl import app, flags 12 | 13 | from .openwebtext import Openwebtext 14 | 15 | FLAGS = flags.FLAGS 16 | 17 | TIME_STAMP = datetime.now().strftime("%Y%m%d-%H%M%S") 18 | 19 | 20 | def runner(argv=None): 21 | if len(argv) <= 1: 22 | dir_path = os.path.dirname(os.path.realpath(__file__)) 23 | argv = [ 24 | sys.argv[0], 25 | "--batch_size=10240", 26 | "-epochs=1", 27 | f"--train_data=/workspaces/dataset/openwebtext/pretrain_tfrecords/*", 28 | # "--label=clicked", 29 | ] 30 | if argv: 31 | FLAGS(argv, known_only=True) 32 | 33 | data_pipe = Openwebtext(max_seq_length=128) 34 | # create data pipline of train & test dataset 35 | train_dataset = data_pipe(FLAGS.train_data, FLAGS.batch_size, is_training=True) 36 | num_examples = 0 37 | for x in train_dataset: 38 | num_examples += FLAGS.batch_size 39 | if num_examples % 100 == 0: 40 | print(num_examples) 41 | 42 | print(x) 43 | print(num_examples) 44 | 45 | 46 | if __name__ == "__main__": 47 | app.run(runner) 48 | -------------------------------------------------------------------------------- /deepray/datasets/openwebtext/run.sh: -------------------------------------------------------------------------------- 1 | python build_openwebtext_pretraining_dataset.py --data-dir /workspaces/dataset/openwebtext -------------------------------------------------------------------------------- /deepray/datasets/openwebtext/util/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. -------------------------------------------------------------------------------- /deepray/datasets/openwebtext/util/training_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Utilities for training the models.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import datetime 22 | import re 23 | 24 | 25 | def secs_to_str(secs): 26 | s = str(datetime.timedelta(seconds=int(round(secs)))) 27 | s = re.sub("^0:", "", s) 28 | s = re.sub("^0", "", s) 29 | s = re.sub("^0:", "", s) 30 | s = re.sub("^0", "", s) 31 | return s 32 | -------------------------------------------------------------------------------- /deepray/datasets/parquet_pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # @Time : 2021/10/15 7:16 PM 3 | # @Author : Hailin.Fu 4 | # @license : Copyright(C), 5 | -------------------------------------------------------------------------------- /deepray/datasets/pubmed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/pubmed/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/squad/__init__.py: -------------------------------------------------------------------------------- 1 | from .squad import Squad 2 | -------------------------------------------------------------------------------- /deepray/datasets/squad/squad.csv: -------------------------------------------------------------------------------- 1 | name,dtype,ftype,dim,length,voc_size,lr,optimizer,storage_type,composition_factor,ev_filter 2 | unique_ids,int64,Label,1,1 3 | input_ids,int64,Categorical,32,1 4 | input_mask,int64,Categorical,32,1 5 | segment_ids,int64, 6 | start_positions,int64, 7 | end_positions,int64, 8 | is_impossibleint64, -------------------------------------------------------------------------------- /deepray/datasets/squad/v1.1/squad_v1.1_meta_data: -------------------------------------------------------------------------------- 1 | { 2 | "task_type": "bert_squad", 3 | "train_data_size": 88641, 4 | "max_seq_length": 384, 5 | "max_query_length": 64, 6 | "doc_stride": 128, 7 | "version_2_with_negative": false 8 | } 9 | -------------------------------------------------------------------------------- /deepray/datasets/squad/v2.0/squad_v2.0_meta_data: -------------------------------------------------------------------------------- 1 | { 2 | "task_type": "bert_squad", 3 | "train_data_size": 131944, 4 | "max_seq_length": 384, 5 | "max_query_length": 64, 6 | "doc_stride": 128, 7 | "version_2_with_negative": true 8 | } 9 | -------------------------------------------------------------------------------- /deepray/datasets/taobao/README.md: -------------------------------------------------------------------------------- 1 | Ali-CCP: Alibaba Click and Conversion Prediction: https://tianchi.aliyun.com/dataset/408 -------------------------------------------------------------------------------- /deepray/datasets/taobao/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/taobao/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/tfrecord_pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | from .tfrecord_pipeline import TFRecordPipeline 2 | -------------------------------------------------------------------------------- /deepray/datasets/toxic_comment_classification_challenge/__init__.py: -------------------------------------------------------------------------------- 1 | from .toxic_comment_classification_challenge import ToxicCommentClassificationChallenge 2 | -------------------------------------------------------------------------------- /deepray/datasets/toxic_comment_classification_challenge/toxic_comment_classification_challenge_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # @Time : 2021/8/10 2:50 PM 3 | # @Author : Hailin.Fu 4 | # @license : Copyright(C), 5 | import sys 6 | from datetime import datetime 7 | 8 | from absl import app, flags 9 | 10 | from .toxic_comment_classification_challenge import ToxicCommentClassificationChallenge 11 | 12 | FLAGS = flags.FLAGS 13 | 14 | TIME_STAMP = datetime.now().strftime("%Y%m%d-%H%M%S") 15 | 16 | 17 | def runner(argv=None): 18 | if len(argv) <= 1: 19 | argv = [ 20 | sys.argv[0], 21 | "--batch_size=1", 22 | "-epochs=1", 23 | # "--train_data=cifar100", 24 | # f"--feature_map={dir_path}/feature_map.csv", 25 | # "--label=clicked", 26 | ] 27 | if argv: 28 | FLAGS(argv, known_only=True) 29 | 30 | data_pipe = ToxicCommentClassificationChallenge() 31 | # create data pipline of train & test dataset 32 | train_dataset = data_pipe(FLAGS.train_data, FLAGS.batch_size, is_training=True) 33 | num_examples = 0 34 | for current_step, batch in enumerate(train_dataset): 35 | num_examples += FLAGS.batch_size 36 | if num_examples % 100 == 0: 37 | print(num_examples) 38 | 39 | print(batch) 40 | print(num_examples) 41 | 42 | 43 | if __name__ == "__main__": 44 | app.run(runner) 45 | -------------------------------------------------------------------------------- /deepray/datasets/wikicorpus_en/README.md: -------------------------------------------------------------------------------- 1 | ## Quick Start Guide 2 | 3 | 4 | 5 | Required data is downloaded into the `data/` directory by default. 6 | 7 | 1. Download and preprocess the dataset. 8 | 9 | This repository provides scripts to download, verify, and extract the following datasets: 10 | 11 | - Wikipedia (pre-training) 12 | 13 | To download, verify, extract the datasets, and create the shards in `tfrecord` format, run: 14 | ``` 15 | export DATA_PREP_WORKING_DIR=/workspaces/dataset/wikicorpus_en/data 16 | bash create_datasets_from_start.sh wiki_only 17 | ``` 18 | 19 | The processing scripts colletced from https://github.com/NVIDIA/DeepLearningExamples/blob/master/TensorFlow2/LanguageModeling/ELECTRA/README.md#quick-start-guide -------------------------------------------------------------------------------- /deepray/datasets/wikicorpus_en/processing/Downloader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | from WikiDownloader import WikiDownloader 15 | 16 | 17 | class Downloader: 18 | 19 | def __init__(self, dataset_name, save_path): 20 | self.dataset_name = dataset_name 21 | self.save_path = save_path 22 | 23 | def download(self): 24 | 25 | if self.dataset_name == 'wikicorpus_en': 26 | self.download_wikicorpus('en') 27 | 28 | elif self.dataset_name == 'wikicorpus_zh': 29 | self.download_wikicorpus('zh') 30 | 31 | else: 32 | print(self.dataset_name) 33 | assert False, 'Unknown dataset_name provided to downloader' 34 | 35 | def download_wikicorpus(self, language): 36 | downloader = WikiDownloader(language, self.save_path) 37 | downloader.download() 38 | -------------------------------------------------------------------------------- /deepray/datasets/wikicorpus_en/processing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/wikicorpus_en/processing/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/wikicorpus_en/processing/run.sh: -------------------------------------------------------------------------------- 1 | DATASET="wikicorpus_en" 2 | # Properly format the text files 3 | python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action text_formatting --dataset wikicorpus_en 4 | 5 | # Shard the text files 6 | python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action sharding --dataset $DATASET 7 | 8 | # Create TFRecord files Phase 1 9 | python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action create_tfrecord_files --dataset ${DATASET} --max_seq_length 128 \ 10 | --max_predictions_per_seq 20 --vocab_file ${BERT_PREP_WORKING_DIR}/download/google_pretrained_weights/uncased_L-24_H-1024_A-16/vocab.txt 11 | 12 | 13 | # Create TFRecord files Phase 2 14 | python3 ${BERT_PREP_WORKING_DIR}/bertPrep.py --action create_tfrecord_files --dataset ${DATASET} --max_seq_length 512 \ 15 | --max_predictions_per_seq 80 --vocab_file ${BERT_PREP_WORKING_DIR}/download/google_pretrained_weights/uncased_L-24_H-1024_A-16/vocab.txt 16 | -------------------------------------------------------------------------------- /deepray/datasets/wikicorpus_en/processing/wikiextractor/extract.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # NOTES 4 | # 5 | # - Must expand templates to avoid a large loss of content. 6 | # - Text will not (redundantly) contain the title string. 7 | # - Keep sections. Section title will be marked by "Section::::". 8 | # - Keep lists. List bullets will be marked by "BULLET::::". 9 | # - Keep tables. They're mostly garbage but can be removed later (remove "^!*"). 10 | # - Remove disambiguation pages. Right now there is no use for them. 11 | 12 | INPUT=$1 13 | PROCESSES=$2 14 | TEMPLATES=$3 15 | OUTPUT=$4 16 | 17 | python WikiExtractor.py $INPUT \ 18 | --json \ 19 | --processes $PROCESSES \ 20 | --templates $TEMPLATES \ 21 | --output $OUTPUT \ 22 | --bytes 1M \ 23 | --compress \ 24 | --links \ 25 | --sections \ 26 | --lists \ 27 | --keep_tables \ 28 | --min_text_length 0 \ 29 | --filter_disambig_pages 30 | -------------------------------------------------------------------------------- /deepray/datasets/wikitext103/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/wikitext103/__init__.py -------------------------------------------------------------------------------- /deepray/datasets/wikitext2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/datasets/wikitext2/__init__.py -------------------------------------------------------------------------------- /deepray/design_patterns.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | 4 | class SingletonType(type): 5 | _instance_lock = threading.Lock() 6 | 7 | def __call__(cls, *args, **kwargs): 8 | if not hasattr(cls, "_instance"): 9 | with SingletonType._instance_lock: 10 | if not hasattr(cls, "_instance"): 11 | cls._instance = super().__call__(*args, **kwargs) 12 | return cls._instance 13 | -------------------------------------------------------------------------------- /deepray/layers/BUILD: -------------------------------------------------------------------------------- 1 | package( 2 | default_visibility = ["//visibility:public"], 3 | licenses = ["notice"], # Apache 2.0 4 | ) 5 | 6 | py_library( 7 | name = "layers", 8 | srcs = glob([ 9 | "*.py", 10 | "**/*.py", 11 | ]), 12 | deps = [ 13 | "//deepray/activations", 14 | "//deepray/layers/rnn", 15 | "//deepray/testing", 16 | "//deepray/text", 17 | "//deepray/utils", 18 | ], 19 | ) 20 | 21 | # py_test( 22 | # name = "layers_test", 23 | # size = "large", 24 | # srcs = glob(["tests/*"]), 25 | # main = "tests/run_all_test.py", 26 | # deps = [ 27 | # ":layers", 28 | # ], 29 | # ) 30 | -------------------------------------------------------------------------------- /deepray/layers/README.md: -------------------------------------------------------------------------------- 1 | # Deepray - Layers 2 | 3 | ## Components 4 | https://www.tensorflow.org/deepray/api_docs/python/dp/layers 5 | 6 | ## Contribution Guidelines 7 | #### Standard API 8 | In order to conform with the current API standard, all layers 9 | must: 10 | * Inherit from either `keras.layers.Layer` or its subclasses. 11 | * Register as a keras global object so it can be serialized properly: `@tf.keras.utils.register_keras_serializable(package='Deepray')` 12 | 13 | #### Testing Requirements 14 | * Simple unittests that demonstrate the layer is behaving as expected. 15 | * To run your `tf.functions` in eager mode and graph mode in the tests, 16 | you can use the `@pytest.mark.usefixtures("maybe_run_functions_eagerly")` 17 | decorator. This will run the tests twice, once normally, and once 18 | with `tf.config.run_functions_eagerly(True)`. 19 | * Run `layer_test` on the layer. 20 | 21 | #### Documentation Requirements 22 | * Update the [CODEOWNERS file](https://github.com/deepray-AI/deepray/blob/main/.github/CODEOWNERS) 23 | -------------------------------------------------------------------------------- /deepray/layers/networks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Networks package definition.""" 16 | from deepray.layers.networks.transformer_encoder import TransformerEncoder 17 | from .span_labeling import SpanLabeling -------------------------------------------------------------------------------- /deepray/layers/nlp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/layers/nlp/__init__.py -------------------------------------------------------------------------------- /deepray/layers/nlp/transformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/layers/nlp/transformer/__init__.py -------------------------------------------------------------------------------- /deepray/layers/rnn/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "rnn", 7 | srcs = glob(["*.py"]), 8 | deps = [ 9 | "//deepray/testing", 10 | "//deepray/utils", 11 | ], 12 | ) 13 | 14 | py_test( 15 | name = "rnn_test", 16 | size = "small", 17 | srcs = glob(["tests/*"]), 18 | main = "tests/run_all_test.py", 19 | deps = [ 20 | ":rnn", 21 | ], 22 | ) 23 | -------------------------------------------------------------------------------- /deepray/layers/rnn/README.md: -------------------------------------------------------------------------------- 1 | # Deepray - RNN 2 | 3 | ## Components 4 | https://www.tensorflow.org/deepray/api_docs/python/dp/rnn 5 | 6 | ## Contribution Guidelines 7 | #### Prerequisites 8 | * For any cell based on research paper, the original paper has to be well recognized. 9 | The criteria here is >= 100 citation based on Google scholar. If the contributor feels 10 | this requirement need to be overruled, please specify the detailed justification in the 11 | PR. 12 | 13 | #### Standard API 14 | In order to conform with the current API standard, all cells must: 15 | * Inherit from either `keras.layers.AbstractRNNCell` or `keras.layers.Layer` with 16 | required properties. 17 | * Register as a keras global object so it can be serialized properly: `@tf.keras.utils.register_keras_serializable(package="Deepray")` 18 | 19 | #### Testing Requirements 20 | * To run your `tf.functions` in eager mode and graph mode in the tests, 21 | you can use the `@pytest.mark.usefixtures("maybe_run_functions_eagerly")` 22 | decorator. This will run the tests twice, once normally, and once 23 | with `tf.config.run_functions_eagerly(True)`. 24 | 25 | #### Documentation Requirements 26 | * Update the [CODEOWNERS file](https://github.com/deepray-AI/deepray/blob/main/.github/CODEOWNERS) 27 | -------------------------------------------------------------------------------- /deepray/layers/rnn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Additional RNN cells that corform to Keras API.""" 16 | 17 | from deepray.layers.rnn.nas_cell import NASCell 18 | from deepray.layers.rnn.layer_norm_lstm_cell import LayerNormLSTMCell 19 | from deepray.layers.rnn.layer_norm_simple_rnn_cell import LayerNormSimpleRNNCell 20 | from deepray.layers.rnn.esn_cell import ESNCell 21 | from deepray.layers.rnn.peephole_lstm_cell import PeepholeLSTMCell 22 | -------------------------------------------------------------------------------- /deepray/layers/rnn/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/layers/rnn/tests/__init__.py -------------------------------------------------------------------------------- /deepray/layers/rnn/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/layers/tests_bak/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/layers/tests_bak/__init__.py -------------------------------------------------------------------------------- /deepray/layers/tests_bak/dnn_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from typing import List 3 | from ..mlp import MLP 4 | 5 | 6 | def test_DNN(): 7 | # 测试DNN类的初始化 8 | dnn = MLP(hidden_units=[8, 16], use_bn=True) 9 | assert len(dnn.kernel) == 2 10 | assert isinstance(dnn.bn, tf.keras.layers.BatchNormalization) 11 | assert dnn._fn == dnn.apply_kernel_bn 12 | 13 | # 测试DNN类的call方法 14 | x = tf.ones((4, 4)) 15 | y = dnn(x) 16 | assert y.shape == (4, 16) 17 | 18 | # 测试DNN类的apply_kernel方法 19 | x = tf.ones((4, 4)) 20 | y = dnn.apply_kernel(0, x) 21 | assert y.shape == (4, 8) 22 | 23 | # 测试DNN类的apply_kernel_bn方法 24 | x = tf.ones((4, 4)) 25 | y = dnn.apply_kernel_bn(0, x) 26 | assert y.shape == (4, 8) 27 | -------------------------------------------------------------------------------- /deepray/layers/tests_bak/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/losses/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "losses", 7 | srcs = glob(["*.py"]), 8 | deps = [ 9 | "//deepray/activations", 10 | "//deepray/testing", 11 | "//deepray/utils", 12 | ], 13 | ) 14 | 15 | py_test( 16 | name = "losses_test", 17 | size = "small", 18 | srcs = glob(["tests/*"]), 19 | main = "tests/run_all_test.py", 20 | deps = [ 21 | ":losses", 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /deepray/losses/README.md: -------------------------------------------------------------------------------- 1 | # Deepray - Losses 2 | 3 | ## Components 4 | https://www.tensorflow.org/deepray/api_docs/python/dp/losses 5 | 6 | ## Contribution Guidelines 7 | #### Standard API 8 | In order to conform with the current API standard, all losses 9 | must: 10 | * Inherit from `keras.losses.Loss`. 11 | * Register as a keras global object so it can be serialized properly: `@tf.keras.utils.register_keras_serializable(package='Deepray')` 12 | 13 | #### Testing Requirements 14 | * Simple unittests that demonstrate the loss is behaving as expected on 15 | some set of known inputs and outputs. 16 | * To run your `tf.functions` in eager mode and graph mode in the tests, 17 | you can use the `@pytest.mark.usefixtures("maybe_run_functions_eagerly")` 18 | decorator. This will run the tests twice, once normally, and once 19 | with `tf.config.run_functions_eagerly(True)`. 20 | 21 | #### Documentation Requirements 22 | * Update the [CODEOWNERS file](https://github.com/deepray-AI/deepray/blob/main/.github/CODEOWNERS) 23 | 24 | -------------------------------------------------------------------------------- /deepray/losses/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/losses/tests/__init__.py -------------------------------------------------------------------------------- /deepray/losses/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/metrics/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "metrics", 7 | srcs = glob(["*.py"]), 8 | deps = [ 9 | "//deepray/testing", 10 | "//deepray/utils", 11 | ], 12 | ) 13 | 14 | py_test( 15 | name = "metrics_test", 16 | size = "small", 17 | srcs = glob(["tests/*"]), 18 | main = "tests/run_all_test.py", 19 | deps = [ 20 | ":metrics", 21 | ], 22 | ) 23 | -------------------------------------------------------------------------------- /deepray/metrics/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/metrics/tests/__init__.py -------------------------------------------------------------------------------- /deepray/metrics/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/models/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "models", 7 | srcs = glob([ 8 | "*.py", 9 | "**/*.py", 10 | ]), 11 | deps = [ 12 | "//deepray/testing", 13 | "//deepray/utils", 14 | ], 15 | ) 16 | 17 | # py_test( 18 | # name = "models_test", 19 | # size = "small", 20 | # srcs = glob(["tests/*"]), 21 | # main = "tests/run_all_test.py", 22 | # deps = [ 23 | # ":models", 24 | # ], 25 | # ) 26 | -------------------------------------------------------------------------------- /deepray/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/models/__init__.py -------------------------------------------------------------------------------- /deepray/models/framework.py: -------------------------------------------------------------------------------- 1 | """Defines the base task abstraction.""" 2 | import abc 3 | import functools 4 | from typing import Optional 5 | 6 | from absl import logging 7 | import tensorflow as tf 8 | 9 | 10 | class FrameWork(tf.keras.Model, metaclass=abc.ABCMeta): 11 | 12 | @abc.abstractmethod 13 | def build_network(self, flags=None, features=None): 14 | """ 15 | must defined in subclass 16 | """ 17 | raise NotImplementedError("build_network: not implemented!") 18 | 19 | def build_features(self): 20 | pass 21 | -------------------------------------------------------------------------------- /deepray/models/generative/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/models/generative/__init__.py -------------------------------------------------------------------------------- /deepray/models/rec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/models/rec/__init__.py -------------------------------------------------------------------------------- /deepray/models/stable_diffusion/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The KerasCV Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .clip_tokenizer import SimpleTokenizer 16 | from .decoder import Decoder 17 | from .diffusion_model import DiffusionModel 18 | from .diffusion_model import DiffusionModelV2 19 | from .image_encoder import ImageEncoder 20 | from .noise_scheduler import NoiseScheduler 21 | from .stable_diffusion import StableDiffusion 22 | from .stable_diffusion import StableDiffusionV2 23 | from .text_encoder import TextEncoder 24 | from .text_encoder import TextEncoderV2 25 | -------------------------------------------------------------------------------- /deepray/models/stable_diffusion/__internal__/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The KerasCV Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /deepray/models/stable_diffusion/__internal__/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The KerasCV Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /deepray/models/stable_diffusion/__internal__/layers/padded_conv2d.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The KerasCV Authors 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from tensorflow import keras 16 | 17 | 18 | class PaddedConv2D(keras.layers.Layer): 19 | 20 | def __init__(self, filters, kernel_size, padding=0, strides=1, **kwargs): 21 | super().__init__(**kwargs) 22 | self.padding2d = keras.layers.ZeroPadding2D(padding) 23 | self.conv2d = keras.layers.Conv2D(filters, kernel_size, strides=strides) 24 | 25 | def call(self, inputs): 26 | x = self.padding2d(inputs) 27 | return self.conv2d(x) 28 | -------------------------------------------------------------------------------- /deepray/models/word2vec.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers 3 | 4 | 5 | class Word2Vec(tf.keras.Model): 6 | 7 | def __init__(self, vocab_size, embedding_dim): 8 | super(Word2Vec, self).__init__() 9 | self.target_embedding = layers.Embedding(vocab_size, embedding_dim, input_length=1, name="w2v_embedding") 10 | self.context_embedding = layers.Embedding(vocab_size, embedding_dim, input_length=num_ns + 1) 11 | 12 | def call(self, pair): 13 | target, context = pair 14 | # target: (batch, dummy?) # The dummy axis doesn't exist in TF2.7+ 15 | # context: (batch, context) 16 | if len(target.shape) == 2: 17 | target = tf.squeeze(target, axis=1) 18 | # target: (batch,) 19 | word_emb = self.target_embedding(target) 20 | # word_emb: (batch, embed) 21 | context_emb = self.context_embedding(context) 22 | # context_emb: (batch, context, embed) 23 | dots = tf.einsum('be,bce->bc', word_emb, context_emb) 24 | # dots: (batch, context) 25 | return dots 26 | -------------------------------------------------------------------------------- /deepray/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Ops package definition.""" 15 | from official.nlp.modeling.ops.beam_search import sequence_beam_search 16 | from official.nlp.modeling.ops.beam_search import SequenceBeamSearch 17 | from official.nlp.modeling.ops.sampling_module import SamplingModule 18 | from official.nlp.modeling.ops.segment_extractor import get_next_sentence_labels 19 | from official.nlp.modeling.ops.segment_extractor import get_sentence_order_labels 20 | -------------------------------------------------------------------------------- /deepray/optimizers/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "optimizers", 7 | srcs = glob(["*.py"]), 8 | deps = [ 9 | "//deepray/testing", 10 | "//deepray/utils", 11 | ], 12 | ) 13 | 14 | py_test( 15 | name = "optimizers_test", 16 | size = "medium", 17 | srcs = glob(["tests/*"]), 18 | main = "tests/run_all_test.py", 19 | deps = [ 20 | ":optimizers", 21 | "//deepray/custom_ops/training_ops", 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /deepray/optimizers/README.md: -------------------------------------------------------------------------------- 1 | # Deepray - Optimizers 2 | 3 | ## Components 4 | https://www.tensorflow.org/deepray/api_docs/python/dp/optimizers 5 | 6 | ## Contribution Guidelines 7 | #### Standard API 8 | In order to conform with the current API standard, all optimizers 9 | must: 10 | * Inherit from either `keras.optimizer_v2.OptimizerV2` or its subclasses. 11 | * Register as a keras global object so it can be serialized properly: `@tf.keras.utils.register_keras_serializable(package='Deepray')` 12 | 13 | #### Testing Requirements 14 | * To run your `tf.functions` in eager mode and graph mode in the tests, 15 | you can use the `@pytest.mark.usefixtures("maybe_run_functions_eagerly")` 16 | decorator. This will run the tests twice, once normally, and once 17 | with `tf.config.run_functions_eagerly(True)`. 18 | 19 | #### Documentation Requirements 20 | * Update the [CODEOWNERS file](https://github.com/deepray-AI/deepray/blob/main/.github/CODEOWNERS) 21 | -------------------------------------------------------------------------------- /deepray/optimizers/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | import tensorflow as tf 16 | 17 | if ( 18 | hasattr(tf.keras.optimizers, "experimental") and 19 | tf.keras.optimizers.Optimizer.__module__ == tf.keras.optimizers.experimental.Optimizer.__module__ 20 | ): 21 | # If the default optimizer points to new Keras optimizer, deepray optimizers 22 | # should use the legacy path. 23 | KerasLegacyOptimizer = tf.keras.optimizers.legacy.Optimizer 24 | else: 25 | KerasLegacyOptimizer = tf.keras.optimizers.Optimizer 26 | -------------------------------------------------------------------------------- /deepray/optimizers/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/optimizers/tests/__init__.py -------------------------------------------------------------------------------- /deepray/optimizers/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/seq2seq/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "seq2seq", 7 | srcs = glob(["*.py"]), 8 | data = [ 9 | "//deepray:options.py", 10 | "//deepray/custom_ops/seq2seq:_beam_search_ops.so", 11 | ], 12 | deps = [ 13 | "//deepray/testing", 14 | "//deepray/utils", 15 | ], 16 | ) 17 | 18 | py_test( 19 | name = "seq2seq_test", 20 | size = "medium", 21 | srcs = glob(["tests/*"]), 22 | main = "tests/run_all_test.py", 23 | deps = [ 24 | ":seq2seq", 25 | ], 26 | ) 27 | -------------------------------------------------------------------------------- /deepray/seq2seq/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/seq2seq/tests/__init__.py -------------------------------------------------------------------------------- /deepray/seq2seq/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/testing/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "testing", 7 | srcs = glob(["*.py"]), 8 | ) 9 | 10 | py_test( 11 | name = "serialization_test", 12 | size = "small", 13 | srcs = glob(["tests/*"]), 14 | main = "tests/run_all_test.py", 15 | deps = [ 16 | ":testing", 17 | ], 18 | ) 19 | -------------------------------------------------------------------------------- /deepray/testing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/testing/__init__.py -------------------------------------------------------------------------------- /deepray/testing/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/testing/tests/__init__.py -------------------------------------------------------------------------------- /deepray/testing/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/tests/__init__.py -------------------------------------------------------------------------------- /deepray/tests/register_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import pytest 4 | import tensorflow as tf 5 | from deepray.register import register_all, _get_all_shared_objects 6 | from deepray.utils import resource_loader 7 | 8 | 9 | def test_multiple_register(): 10 | if resource_loader.SKIP_CUSTOM_OPS: 11 | pytest.skip("Skipping the test because a custom ops " 12 | "was being loaded while --skip-custom-ops was set.") 13 | register_all() 14 | register_all() 15 | 16 | 17 | def test_get_all_shared_objects(): 18 | if resource_loader.SKIP_CUSTOM_OPS: 19 | pytest.skip("Skipping the test because a custom ops " 20 | "was being loaded while --skip-custom-ops was set.") 21 | all_shared_objects = _get_all_shared_objects() 22 | assert len(all_shared_objects) >= 4 23 | 24 | for file in all_shared_objects: 25 | tf.load_op_library(file) 26 | 27 | 28 | if __name__ == "__main__": 29 | sys.exit(pytest.main([__file__])) 30 | -------------------------------------------------------------------------------- /deepray/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/text/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | # TODO: Once TF exports symbols in a DLL we can enable parse_time_op for windows 6 | # https://github.com/tensorflow/deepray/issues/782 7 | py_library( 8 | name = "text", 9 | srcs = glob(["*.py"]), 10 | data = select({ 11 | "//deepray:windows": [ 12 | "//deepray/custom_ops/text:_skip_gram_ops.so", 13 | "//deepray/testing", 14 | "//deepray/utils", 15 | ], 16 | "//conditions:default": [ 17 | "//deepray/custom_ops/text:_parse_time_op.so", 18 | "//deepray/custom_ops/text:_skip_gram_ops.so", 19 | "//deepray/testing", 20 | "//deepray/utils", 21 | ], 22 | }), 23 | ) 24 | 25 | py_test( 26 | name = "text_test", 27 | size = "small", 28 | srcs = glob(["tests/*"]), 29 | main = "tests/run_all_test.py", 30 | deps = [ 31 | ":text", 32 | "//deepray/layers", 33 | ], 34 | ) 35 | -------------------------------------------------------------------------------- /deepray/text/README.md: -------------------------------------------------------------------------------- 1 | # Deepray - Text 2 | 3 | ## Components 4 | https://www.tensorflow.org/deepray/api_docs/python/dp/text 5 | 6 | ## Contribution Guidelines 7 | #### Standard API 8 | In order to conform with the current API standard, all text ops 9 | must: 10 | * Be impossible to implement in one of the other API 11 | standards (Layers, Losses, etc.). 12 | * Be related to text processing. 13 | 14 | #### Testing Requirements 15 | * Simple unittests that demonstrate the text op is behaving as 16 | expected. 17 | * To run your `tf.functions` in eager mode and graph mode in the tests, 18 | you can use the `@pytest.mark.usefixtures("maybe_run_functions_eagerly")` 19 | decorator. This will run the tests twice, once normally, and once 20 | with `tf.config.run_functions_eagerly(True)`. 21 | 22 | #### Documentation Requirements 23 | * Update the [CODEOWNERS file](https://github.com/deepray-AI/deepray/blob/main/.github/CODEOWNERS) 24 | -------------------------------------------------------------------------------- /deepray/text/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/text/tests/__init__.py -------------------------------------------------------------------------------- /deepray/text/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/utils/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "utils", 7 | srcs = glob([ 8 | "*.py", 9 | "**/*.py", 10 | ]), 11 | data = [ 12 | "//deepray:conftest.py", 13 | "//deepray:options.py", 14 | ], 15 | ) 16 | 17 | py_test( 18 | name = "keras_utils_test", 19 | size = "small", 20 | srcs = glob(["tests/*"]), 21 | main = "tests/run_all_test.py", 22 | deps = [ 23 | ":utils", 24 | ], 25 | ) 26 | -------------------------------------------------------------------------------- /deepray/utils/README.md: -------------------------------------------------------------------------------- 1 | # Deepray Utils 2 | -------------------------------------------------------------------------------- /deepray/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/utils/__init__.py -------------------------------------------------------------------------------- /deepray/utils/accelerator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/utils/accelerator/__init__.py -------------------------------------------------------------------------------- /deepray/utils/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/utils/data/__init__.py -------------------------------------------------------------------------------- /deepray/utils/export/__init__.py: -------------------------------------------------------------------------------- 1 | from .export import SavedModel, TFTRTModel, export_to_savedmodel, export_to_checkpoint, optimize_for_inference 2 | -------------------------------------------------------------------------------- /deepray/utils/flags/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /deepray/utils/flags/_data.py: -------------------------------------------------------------------------------- 1 | """Flags which will be nearly universal across models.""" 2 | 3 | from absl import flags 4 | from deepray.utils.flags import core as flags_core 5 | from deepray.utils.flags._conventions import help_wrap 6 | 7 | 8 | def define_data_download_flags( 9 | dataset=False, 10 | data_dir=False, 11 | download_if_missing=False, 12 | ): 13 | """Add flags specifying data download and usage arguments.""" 14 | key_flags = [] 15 | if dataset: 16 | flags.DEFINE_string( 17 | 'dataset', default=None, help=flags_core.help_wrap('The name of the dataset, e.g. ImageNet, etc.') 18 | ) 19 | key_flags.append("dataset") 20 | if data_dir: 21 | flags.DEFINE_string( 22 | name="data_dir", 23 | default="/tmp/movielens-data/", 24 | help=flags_core.help_wrap("Directory to download and extract data.") 25 | ) 26 | key_flags.append("data_dir") 27 | if download_if_missing: 28 | flags.DEFINE_boolean( 29 | name="download_if_missing", 30 | default=True, 31 | help=flags_core.help_wrap("Download data to data_dir if it is not already present.") 32 | ) 33 | key_flags.append("download_if_missing") 34 | return key_flags 35 | -------------------------------------------------------------------------------- /deepray/utils/horovod_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. 2 | # Licensed under the Apache License, Version 2.0 (the "License"); 3 | # you may not use this file except in compliance with the License. 4 | # You may obtain a copy of the License at 5 | # 6 | # http://www.apache.org/licenses/LICENSE-2.0 7 | # 8 | # Unless required by applicable law or agreed to in writing, software 9 | # distributed under the License is distributed on an "AS IS" BASIS, 10 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | # See the License for the specific language governing permissions and 12 | # limitations under the License. 13 | 14 | import horovod.tensorflow.keras as hvd 15 | from absl import logging, flags 16 | 17 | FLAGS = flags.FLAGS 18 | 19 | 20 | def get_rank(): 21 | try: 22 | return hvd.rank() 23 | except: 24 | return 0 25 | 26 | 27 | def get_world_size(): 28 | try: 29 | return hvd.size() 30 | except: 31 | return 1 32 | 33 | 34 | def is_main_process(): 35 | return not FLAGS.use_horovod or get_rank() == 0 36 | -------------------------------------------------------------------------------- /deepray/utils/logs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/utils/logs/__init__.py -------------------------------------------------------------------------------- /deepray/utils/misc/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /deepray/utils/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/deepray/utils/tests/__init__.py -------------------------------------------------------------------------------- /deepray/utils/tests/run_all_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | import pytest 5 | 6 | if __name__ == "__main__": 7 | dirname = Path(__file__).absolute().parent 8 | sys.exit(pytest.main([str(dirname)])) 9 | -------------------------------------------------------------------------------- /deepray/utils/tfrecord_inspecter.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | file = "/workspaces/dataset/avazu/raw/train_train.tfrecord" 4 | 5 | raw_dataset = tf.data.TFRecordDataset(file) 6 | 7 | for raw_record in raw_dataset.take(1): 8 | example = tf.train.Example() 9 | example.ParseFromString(raw_record.numpy()) 10 | print(example) 11 | -------------------------------------------------------------------------------- /deepray/workspace0.bzl: -------------------------------------------------------------------------------- 1 | """TensorFlow workspace initialization. Consult the WORKSPACE on how to use it.""" 2 | 3 | load("@rules_foreign_cc//foreign_cc:repositories.bzl", "rules_foreign_cc_dependencies") 4 | load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies") 5 | load("@rules_compressor//tensorflow:workspace2.bzl", rules_compressor_deps = "tf_workspace2") 6 | load("@com_github_nelhage_rules_boost//:boost/boost.bzl", "boost_deps") 7 | 8 | def workspace(): 9 | # If a target is bound twice, the later one wins, so we have to do tf bindings 10 | # at the end of the WORKSPACE file. 11 | # This sets up some common toolchains for building targets. For more details, please see 12 | # https://bazelbuild.github.io/rules_foreign_cc/0.10.1/flatten.html#rules_foreign_cc_dependencies 13 | rules_foreign_cc_dependencies() 14 | rules_pkg_dependencies() 15 | rules_compressor_deps() 16 | boost_deps() 17 | 18 | # Alias so it can be loaded without assigning to a different symbol to prevent 19 | # shadowing previous loads and trigger a buildifier warning. 20 | dp_workspace0 = workspace 21 | -------------------------------------------------------------------------------- /docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x -e 4 | 5 | PY_VERSION=${1:-"3.8"} 6 | TF_VERSION=${2:-"2.9.1"} 7 | CUDA_VERSION=${3:-"11.6.2"} 8 | OS_VERSION=${3:-"20.04"} 9 | 10 | docker pull hailinfufu/deepray-release:latest-py${PY_VERSION}-tf${TF_VERSION}-cu${CUDA_VERSION}-ubuntu${OS_VERSION} 11 | 12 | docker run --gpus all -it \ 13 | --rm=true \ 14 | --name="deepray_dev" \ 15 | -w /workspaces \ 16 | --volume=dev-build:/workspaces \ 17 | --shm-size=1g \ 18 | --device /dev/fuse \ 19 | --network host \ 20 | --privileged \ 21 | hailinfufu/deepray-release:latest-py${PY_VERSION}-tf${TF_VERSION}-cu${CUDA_VERSION}-ubuntu${OS_VERSION} /bin/bash 22 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # [tensorflow.org/deepray](https://tensorflow.org/deepray) 2 | 3 | This directory contains the source for [tensorflow.org/deepray](https://tensorflow.org/deepray). 4 | 5 | It comprises two main components: 6 | 7 | ## 1. Narrative Docs 8 | 9 | Any markdown or notebook files in this directory will be published to tensorflow.org/deepray. 10 | 11 | `tutorials/_toc.yaml` controls the left-nav on the tutorials tab. Make sure to keep that file up to date. 12 | Notify the tensorflow/docs team if you need to major changes. 13 | 14 | The preferred formatting for TensorFlow notebooks is to use the [tensorflow/docs](https://github.com/tensorflow/docs) [`nbfmt` tool](https://github.com/tensorflow/docs/tree/master/tools/tensorflow_docs/tools). If modifying a tutorial gives you 15 | an unreadable diff use the following commands to re-apply the standard formatting: 16 | 17 | ``` 18 | pip install git+https://github.com/tensorflow/docs 19 | python -m tensorflow_docs.tools.nbfmt {path to notebook file or directory} 20 | ``` 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /docs/tutorials/README.md: -------------------------------------------------------------------------------- 1 | # Deepray Tutorials 2 | 3 | Deepray welcomes and highly encourages tutorial contributions. 4 | 5 | 6 | ## How To Contribute 7 | 8 | Deepray tutorials are created using [Google Colab](https://colab.research.google.com/) 9 | and the jupyter notebooks are saved to this directory in the repository. To do 10 | this, follow the below steps: 11 | 12 | 1. Create a new branch on your fork of Deepray 13 | 2. Goto [Google Colab](https://colab.research.google.com/) and start a new 14 | notebook using deepray example template: 15 | [docs/tutorials/_template.ipynb](_template.ipynb) 16 | 3. Edit the links for the "View source on GitHub" and "Run in Google Colab" 17 | URL boxes so that they match the name of your new example notebook 18 | 4. Follow the guidelines of the template 19 | 5. "Save a copy in GitHub" and select your new branch. The notebook should be 20 | named `subpackage_submodule` 21 | 6. Submit the branch as a PR on the Deepray GitHub 22 | -------------------------------------------------------------------------------- /docs/tutorials/_toc.yaml: -------------------------------------------------------------------------------- 1 | toc: 2 | - title: Overview 3 | path: /deepray/overview 4 | - heading: Tutorials 5 | - title: Triplet loss 6 | path: /deepray/tutorials/losses_triplet 7 | - title: Image Ops 8 | path: /deepray/tutorials/image_ops 9 | - title: Normalization layers 10 | path: /deepray/tutorials/layers_normalizations 11 | - title: Weight normalization layer 12 | path: /deepray/tutorials/layers_weightnormalization 13 | - title: LazyAdam optimizer 14 | path: /deepray/tutorials/optimizers_lazyadam 15 | - title: ConditionalGradient Optimizer 16 | path: /deepray/tutorials/optimizers_conditionalgradient 17 | - title: CyclicalLearningRate Schedule 18 | path: /deepray/tutorials/optimizers_cyclicallearningrate 19 | - title: TQDM Progress Bar 20 | path: /deepray/tutorials/tqdm_progress_bar 21 | - title: Seq2Seq for Translation 22 | path: /deepray/tutorials/networks_seq2seq_nmt 23 | - title: Moving Average Optimizer Checkpoint 24 | path: /deepray/tutorials/average_optimizers_callback 25 | - title: Time Stopping Callback 26 | path: /deepray/tutorials/time_stopping 27 | -------------------------------------------------------------------------------- /modelzoo/CV/Classify_images_of_clothing/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | from absl import app, flags 7 | 8 | from deepray.core.base_trainer import Trainer 9 | from deepray.core.common import distribution_utils 10 | from deepray.datasets.fashion_mnist import FashionMNIST 11 | 12 | FLAGS = flags.FLAGS 13 | 14 | 15 | def main(_): 16 | _strategy = distribution_utils.get_distribution_strategy() 17 | data_pipe = FashionMNIST() 18 | with distribution_utils.get_strategy_scope(_strategy): 19 | model = tf.keras.Sequential( 20 | [ 21 | tf.keras.layers.Flatten(input_shape=(28, 28)), 22 | tf.keras.layers.Dense(128, activation='relu'), 23 | tf.keras.layers.Dense(10) 24 | ] 25 | ) 26 | 27 | trainer = Trainer( 28 | model=model, 29 | loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 30 | metrics=['accuracy'], 31 | ) 32 | 33 | train_input_fn = data_pipe(FLAGS.train_data, FLAGS.batch_size, is_training=True) 34 | trainer.fit(train_input=train_input_fn,) 35 | 36 | 37 | if __name__ == "__main__": 38 | flags.mark_flag_as_required("model_dir") 39 | app.run(main) 40 | -------------------------------------------------------------------------------- /modelzoo/CV/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/modelzoo/CV/__init__.py -------------------------------------------------------------------------------- /modelzoo/CV/mnist/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/modelzoo/CV/mnist/__init__.py -------------------------------------------------------------------------------- /modelzoo/LanguageModeling/BERT/scripts/data_download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # ============================================================================== 17 | 18 | to_download=${1:-"all"} 19 | pretrained_to_download=${2:-"wiki_only"} # By default, we don't download BooksCorpus dataset due to recent issues with the host server 20 | 21 | docker run --runtime=nvidia -v $PWD:/workspace/bert_tf2 \ 22 | --rm --shm-size=1g --ulimit memlock=-1 \ 23 | --ulimit stack=67108864 --ipc=host -t -i \ 24 | bert_tf2 bash -c "bash data/create_datasets_from_start.sh ${to_download} ${pretrained_to_download}" 25 | -------------------------------------------------------------------------------- /modelzoo/LanguageModeling/Multi-label-classification-with-BERT/README.md: -------------------------------------------------------------------------------- 1 | This demo forked from https://github.com/abyanjan/Toxic-Comment-Classification to show how to calculate tf.metrics.AUC(multi_label=True) -------------------------------------------------------------------------------- /modelzoo/Recommendation/WideDeep/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/modelzoo/Recommendation/WideDeep/model.py -------------------------------------------------------------------------------- /modelzoo/Recommendation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/modelzoo/Recommendation/__init__.py -------------------------------------------------------------------------------- /modelzoo/Recommendation/avazu-ctr-prediction/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | py_library( 6 | name = "flen", 7 | srcs = glob( 8 | [ 9 | "*.py", 10 | "**/*.py", 11 | ], 12 | ), 13 | # deps = [ 14 | # "//deepray/custom_ops/parquet_dataset", 15 | # "//deepray/datasets:datapipeline", 16 | # ], 17 | ) 18 | 19 | py_binary( 20 | name = "ali_parquet_dataset_test", 21 | srcs = ["ali_parquet_dataset_test.py"], 22 | deps = [ 23 | ":ali_parquet_dataset", 24 | ], 25 | ) 26 | 27 | sh_binary( 28 | name = "run_horovod", 29 | srcs = ["run_horovod.sh"], 30 | data = [ 31 | ":flen", 32 | ], 33 | ) 34 | -------------------------------------------------------------------------------- /modelzoo/Recommendation/avazu-ctr-prediction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/modelzoo/Recommendation/avazu-ctr-prediction/__init__.py -------------------------------------------------------------------------------- /modelzoo/Recommendation/avazu-ctr-prediction/feature_map.csv: -------------------------------------------------------------------------------- 1 | code,name,voc_size,dim,length,dtype,ftype,hash_size 2 | 1,hour,24,11,1,int32,Categorical, 3 | 2,id,45006427,11,1,int32,Categorical, 4 | 3,C1,7,11,1,int32,Categorical, 5 | 4,banner_pos,7,11,1,int32,Categorical, 6 | 5,site_id,4842,11,1,int32,Categorical, 7 | 6,site_domain,7912,11,1,int32,Categorical, 8 | 7,site_category,26,11,1,int32,Categorical, 9 | 8,app_id,9136,11,1,int32,Categorical, 10 | 9,app_domain,580,11,1,int32,Categorical, 11 | 10,app_category,36,11,1,int32,Categorical, 12 | 11,device_id,2895973,11,1,int32,Categorical, 13 | 12,device_ip,7338655,11,1,int32,Categorical, 14 | 13,device_model,8303,11,1,int32,Categorical, 15 | 14,device_type,5,11,1,int32,Categorical, 16 | 15,device_conn_type,4,11,1,int32,Categorical, 17 | 16,C14,2673,11,1,int32,Categorical, 18 | 17,C15,8,11,1,int32,Categorical, 19 | 18,C16,9,11,1,int32,Categorical, 20 | 19,C17,441,11,1,int32,Categorical, 21 | 20,C18,4,11,1,int32,Categorical, 22 | 21,C19,69,11,1,int32,Categorical, 23 | 22,C20,172,11,1,int32,Categorical, 24 | 23,C21,62,11,1,int32,Categorical, 25 | 24,click,1,1,1,int32,Label, -------------------------------------------------------------------------------- /modelzoo/Recommendation/criteo_ctr/Frozen-Graph-TensorFlow/README.md: -------------------------------------------------------------------------------- 1 | # Frozen Graph TensorFlow 2 | 3 | Lei Mao 4 | 5 | ## Introduction 6 | 7 | This repository has the examples of saving, loading, and running inference for frozen graph in TensorFlow 1.x and 2.x. 8 | 9 | ## Files 10 | 11 | ``` 12 | . 13 | ├── LICENSE.md 14 | ├── README.md 15 | ├── TensorFlow_v1 16 | │   ├── cifar.py 17 | │   ├── cnn.py 18 | │   ├── inspect_signature.py 19 | │   ├── main.py 20 | │   ├── README.md 21 | │   ├── test_pb.py 22 | │   └── utils.py 23 | └── TensorFlow_v2 24 | ├── example_1.py 25 | ├── example_2.py 26 | ├── README.md 27 | └── utils.py 28 | ``` 29 | 30 | ## Blogs 31 | 32 | * [Save, Load and Inference From TensorFlow Frozen Graph](https://leimao.github.io/blog/Save-Load-Inference-From-TF-Frozen-Graph/) 33 | * [Save, Load and Inference From TensorFlow 2.x Frozen Graph](https://leimao.github.io/blog/Save-Load-Inference-From-TF2-Frozen-Graph/) 34 | 35 | ## Examples 36 | 37 | * [TensorFlow 1.x](https://github.com/leimao/Frozen_Graph_TensorFlow/tree/master/TensorFlow_v1) 38 | * [TensorFlow 2.x](https://github.com/leimao/Frozen_Graph_TensorFlow/tree/master/TensorFlow_v2) -------------------------------------------------------------------------------- /modelzoo/Recommendation/criteo_ctr/Frozen-Graph-TensorFlow/TensorFlow_v2/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow import keras 3 | import numpy as np 4 | 5 | 6 | def get_fashion_mnist_data(): 7 | 8 | fashion_mnist = keras.datasets.fashion_mnist 9 | (train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data() 10 | class_names = [ 11 | "T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot" 12 | ] 13 | train_images = train_images.astype(np.float32) / 255.0 14 | test_images = test_images.astype(np.float32) / 255.0 15 | 16 | return (train_images, train_labels), (test_images, test_labels) 17 | 18 | 19 | def wrap_frozen_graph(graph_def, inputs, outputs, print_graph=False): 20 | 21 | def _imports_graph_def(): 22 | tf.compat.v1.import_graph_def(graph_def, name="") 23 | 24 | wrapped_import = tf.compat.v1.wrap_function(_imports_graph_def, []) 25 | import_graph = wrapped_import.graph 26 | 27 | if print_graph == True: 28 | print("-" * 50) 29 | print("Frozen model layers: ") 30 | layers = [op.name for op in import_graph.get_operations()] 31 | for layer in layers: 32 | print(layer) 33 | print("-" * 50) 34 | 35 | return wrapped_import.prune( 36 | tf.nest.map_structure(import_graph.as_graph_element, inputs), 37 | tf.nest.map_structure(import_graph.as_graph_element, outputs) 38 | ) 39 | -------------------------------------------------------------------------------- /modelzoo/Recommendation/criteo_ctr/optimize_for_inference.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import tempfile 4 | 5 | import tensorflow as tf 6 | from absl import app, flags, logging 7 | 8 | from dcn_v2 import Ranking 9 | from deepray.datasets.criteo import CriteoTsvReader 10 | from deepray.utils.export import export_to_savedmodel 11 | 12 | FLAGS = flags.FLAGS 13 | 14 | 15 | def main(_): 16 | model = Ranking(interaction="cross", training=False) 17 | data_pipe = CriteoTsvReader(use_synthetic_data=True) 18 | 19 | # Why do we perfer to use only one example to rebuild model? 20 | # 21 | train_dataset = data_pipe(FLAGS.train_data, batch_size=1, is_training=True) 22 | for x, y in train_dataset.take(1): 23 | preds = model(x) 24 | 25 | tmp_path = tempfile.mkdtemp(dir='/tmp/') 26 | 27 | src = os.path.join(FLAGS.model_dir, "export_main") 28 | 29 | export_to_savedmodel(model, savedmodel_dir=tmp_path) 30 | 31 | file = os.path.join(src, "saved_model.pb") 32 | if tf.io.gfile.exists(file): 33 | tf.io.gfile.remove(file) 34 | logging.info(f"Replace optimized saved_modle.pb for {file}") 35 | tf.io.gfile.copy(os.path.join(tmp_path + "_main", "saved_model.pb"), file, overwrite=True) 36 | else: 37 | raise FileNotFoundError(f"{file} does not exist.") 38 | 39 | 40 | if __name__ == "__main__": 41 | app.run(main) 42 | -------------------------------------------------------------------------------- /modelzoo/Recommendation/keras_horovod_dis/start_train.sh: -------------------------------------------------------------------------------- 1 | # gpu_num = nvidia-smi --query-gpu=name --format=csv,noheader | wc -l 2 | # horovodrun -np $gpu_num python keras_horovod_distributed_demo.py 3 | 4 | horovodrun -np 4 python -m examples.Recommendation.keras_horovod_dis.keras_horovod_distributed_demo 5 | -------------------------------------------------------------------------------- /modelzoo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/modelzoo/__init__.py -------------------------------------------------------------------------------- /modelzoo/horovod_test/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # ============================================================================== 17 | set -eu 18 | set -o pipefail 19 | 20 | num_gpu=${1:-"4"} 21 | profile=${2:-"false"} 22 | 23 | if [ $num_gpu -gt 1 ]; then 24 | hvd_command="horovodrun -np $num_gpu " 25 | else 26 | hvd_command="" 27 | fi 28 | 29 | if [ "$profile" = "true" ]; then 30 | nsys_command="--timeline-filename $RESULTS_DIR/timeline.json --timeline-mark-cycles" 31 | echo "profile activated" 32 | else 33 | nsys_command="" 34 | fi 35 | 36 | set -x 37 | $hvd_command $nsys_command python tensorflow2_synthetic_benchmark.py 38 | set +x 39 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = -ra 3 | doctest_optionflags = ELLIPSIS NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL DONT_ACCEPT_BLANKLINE 4 | -------------------------------------------------------------------------------- /recommendation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /recommendation/ranking/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /recommendation/ranking/configs/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /recommendation/ranking/configs/yaml/dcn_v2_criteo_tpu.yaml: -------------------------------------------------------------------------------- 1 | runtime: 2 | distribution_strategy: 'tpu' 3 | task: 4 | model: 5 | bottom_mlp: [512, 256, 64] 6 | embedding_dim: 64 7 | num_dense_features: 13 8 | top_mlp: [1024, 1024, 512, 256, 1] 9 | interaction: 'cross' 10 | vocab_sizes: [39884406, 39043, 17289, 7420, 20263, 3, 7120, 1543, 63, 38532951, 2953546, 403346, 11 | 10, 2208, 11938, 155, 4, 976, 14, 39979771, 25641295, 39664984, 585935, 12972, 12 | 108, 36] 13 | train_data: 14 | global_batch_size: 16384 15 | input_path: path_to_training_data_dir/* 16 | is_training: true 17 | num_shards_per_host: 4 18 | sharding: true 19 | validation_data: 20 | global_batch_size: 16384 21 | input_path: path_to_eval_data_dir/* 22 | is_training: false 23 | sharding: false 24 | trainer: 25 | checkpoint_interval: 85352 26 | eval_tf_function: true 27 | eval_tf_while_loop: false 28 | max_to_keep: 5 29 | train_steps: 256054 30 | train_tf_function: true 31 | train_tf_while_loop: true 32 | use_orbit: true 33 | validation_interval: 85352 34 | validation_steps: 5440 35 | validation_summary_subdir: 'validation' 36 | -------------------------------------------------------------------------------- /recommendation/ranking/configs/yaml/dlrm_criteo_tpu.yaml: -------------------------------------------------------------------------------- 1 | runtime: 2 | distribution_strategy: 'tpu' 3 | task: 4 | model: 5 | bottom_mlp: [512, 256, 64] 6 | embedding_dim: 64 7 | num_dense_features: 13 8 | top_mlp: [1024, 1024, 512, 256, 1] 9 | interaction: 'dot' 10 | vocab_sizes: [39884406, 39043, 17289, 7420, 20263, 3, 7120, 1543, 63, 38532951, 2953546, 403346, 11 | 10, 2208, 11938, 155, 4, 976, 14, 39979771, 25641295, 39664984, 585935, 12972, 12 | 108, 36] 13 | train_data: 14 | global_batch_size: 16384 15 | input_path: path_to_training_data_dir/* 16 | is_training: true 17 | num_shards_per_host: 4 18 | sharding: true 19 | validation_data: 20 | global_batch_size: 16384 21 | input_path: path_to_eval_data_dir/* 22 | is_training: false 23 | sharding: false 24 | trainer: 25 | checkpoint_interval: 85352 26 | eval_tf_function: true 27 | eval_tf_while_loop: false 28 | max_to_keep: 5 29 | train_steps: 256054 30 | train_tf_function: true 31 | train_tf_while_loop: true 32 | use_orbit: true 33 | validation_interval: 85352 34 | validation_steps: 5440 35 | validation_summary_subdir: 'validation' 36 | -------------------------------------------------------------------------------- /recommendation/ranking/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /recommendation/ranking/preprocessing/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Setup configuration for criteo dataset preprocessing. 15 | 16 | This is used while running Tensorflow transform on Cloud Dataflow. 17 | """ 18 | 19 | import setuptools 20 | 21 | version = "0.1.0" 22 | 23 | if __name__ == "__main__": 24 | setuptools.setup( 25 | name="criteo_preprocessing", 26 | version=version, 27 | install_requires=["tensorflow-transform"], 28 | packages=setuptools.find_packages(), 29 | ) 30 | -------------------------------------------------------------------------------- /recommendation/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | python -m recommendation.ncf_keras_main \ 5 | --use_synthetic_data \ 6 | --num_gpus=1 \ 7 | --train_dataset_path=/tmp/movielens-data/training_cycle_0/*.tfrecords \ 8 | --input_meta_data_path=/tmp/movielens-data/meta_data \ 9 | --eval_dataset_path=/tmp/movielens-data/eval_data/*.tfrecords 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | typeguard>=2.7,<3.0.0 2 | packaging 3 | pandas 4 | pyarrow 5 | nvidia-ml-py 6 | dllogger@git+https://github.com/NVIDIA/dllogger#egg=dllogger 7 | pudb 8 | tabulate 9 | tensorflow_hub 10 | sentencepiece 11 | boto3 12 | fastavro -------------------------------------------------------------------------------- /third_party/BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # Apache 2.0 2 | -------------------------------------------------------------------------------- /third_party/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/third_party/__init__.py -------------------------------------------------------------------------------- /third_party/arrow/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/third_party/arrow/BUILD -------------------------------------------------------------------------------- /third_party/aws-c-common.BUILD: -------------------------------------------------------------------------------- 1 | # Description: 2 | # AWS C Common 3 | 4 | package(default_visibility = ["//visibility:public"]) 5 | 6 | licenses(["notice"]) # Apache 2.0 7 | 8 | exports_files(["LICENSE"]) 9 | 10 | cc_library( 11 | name = "aws-c-common", 12 | srcs = glob([ 13 | "include/aws/common/*.h", 14 | "include/aws/common/private/*.h", 15 | "source/*.c", 16 | ]) + select({ 17 | "@bazel_tools//src/conditions:windows": glob([ 18 | "source/windows/*.c", 19 | ]), 20 | "//conditions:default": glob([ 21 | "source/posix/*.c", 22 | ]), 23 | }), 24 | hdrs = [ 25 | "include/aws/common/config.h", 26 | ], 27 | defines = [], 28 | includes = [ 29 | "include", 30 | ], 31 | textual_hdrs = glob([ 32 | "include/**/*.inl", 33 | ]), 34 | deps = [], 35 | ) 36 | 37 | genrule( 38 | name = "config_h", 39 | srcs = [ 40 | "include/aws/common/config.h.in", 41 | ], 42 | outs = [ 43 | "include/aws/common/config.h", 44 | ], 45 | cmd = "sed 's/cmakedefine/undef/g' $< > $@", 46 | ) 47 | -------------------------------------------------------------------------------- /third_party/aws-c-event-stream.BUILD: -------------------------------------------------------------------------------- 1 | # Description: 2 | # AWS C Event Stream 3 | 4 | package(default_visibility = ["//visibility:public"]) 5 | 6 | licenses(["notice"]) # Apache 2.0 7 | 8 | exports_files(["LICENSE"]) 9 | 10 | cc_library( 11 | name = "aws-c-event-stream", 12 | srcs = glob([ 13 | "include/**/*.h", 14 | "source/**/*.c", 15 | ]), 16 | hdrs = [ 17 | ], 18 | defines = [], 19 | includes = [ 20 | "include", 21 | ], 22 | deps = [ 23 | "@aws-c-common", 24 | "@aws-checksums", 25 | ], 26 | ) 27 | -------------------------------------------------------------------------------- /third_party/aws-checksums.BUILD: -------------------------------------------------------------------------------- 1 | # Description: 2 | # AWS CheckSums 3 | 4 | package(default_visibility = ["//visibility:public"]) 5 | 6 | licenses(["notice"]) # Apache 2.0 7 | 8 | exports_files(["LICENSE"]) 9 | 10 | cc_library( 11 | name = "aws-checksums", 12 | srcs = glob([ 13 | "include/aws/checksums/*.h", 14 | "include/aws/checksums/private/*.h", 15 | "source/*.c", 16 | ]) + [ 17 | "crc_hw.c", 18 | ], 19 | hdrs = [], 20 | defines = [], 21 | includes = [ 22 | "include", 23 | ], 24 | deps = [], 25 | ) 26 | 27 | genrule( 28 | name = "crc_hw_c", 29 | outs = ["crc_hw.c"], 30 | cmd = "\n".join([ 31 | "cat <<'EOF' >$@", 32 | "#include ", 33 | "#include ", 34 | "int aws_checksums_do_cpu_id(int32_t *cpuid) {", 35 | " return 0;", 36 | "}", 37 | "uint32_t aws_checksums_crc32c_hw(const uint8_t *input, int length, uint32_t previousCrc32) {", 38 | " return aws_checksums_crc32c_sw(input, length, previousCrc32);", 39 | "}", 40 | "EOF", 41 | ]), 42 | ) 43 | -------------------------------------------------------------------------------- /third_party/brotli.BUILD: -------------------------------------------------------------------------------- 1 | # Description: 2 | # Brotli library 3 | 4 | package(default_visibility = ["//visibility:public"]) 5 | 6 | licenses(["notice"]) # MIT license 7 | 8 | exports_files(["LICENSE"]) 9 | 10 | cc_library( 11 | name = "brotli", 12 | srcs = glob([ 13 | "c/common/*.c", 14 | "c/common/*.h", 15 | "c/dec/*.c", 16 | "c/dec/*.h", 17 | "c/enc/*.c", 18 | "c/enc/*.h", 19 | "c/include/brotli/*.h", 20 | ]), 21 | hdrs = [], 22 | defines = [], 23 | includes = [ 24 | "c/dec", 25 | "c/include", 26 | ], 27 | linkopts = [], 28 | visibility = ["//visibility:public"], 29 | ) 30 | -------------------------------------------------------------------------------- /third_party/cucollection/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/third_party/cucollection/BUILD -------------------------------------------------------------------------------- /third_party/cucollection/cuco.BUILD: -------------------------------------------------------------------------------- 1 | load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts") 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | licenses(["notice"]) 6 | 7 | 8 | exports_files(["LICENSE"]) 9 | 10 | cc_library( 11 | name = "cuco_hash_table", 12 | hdrs = glob(["include/**"]), 13 | includes = [ 14 | "include", 15 | ], 16 | deps = [ 17 | "@local_config_cuda//cuda:cuda_headers", 18 | ], 19 | visibility = ["//visibility:public"], 20 | ) 21 | -------------------------------------------------------------------------------- /third_party/cutlass.BUILD: -------------------------------------------------------------------------------- 1 | load("@local_config_cuda//cuda:build_defs.bzl", "cuda_header_library", "if_cuda") 2 | 3 | licenses(["notice"]) # # BSD 3-Clause 4 | 5 | package(default_visibility = ["//visibility:public"]) 6 | 7 | cuda_header_library( 8 | name = "cutlass", 9 | hdrs = if_cuda(glob([ 10 | "include/cutlass/**", 11 | ])), 12 | includes = if_cuda([ 13 | "include", 14 | ]), 15 | strip_include_prefix = "include", 16 | visibility = ["//visibility:public"], 17 | ) 18 | -------------------------------------------------------------------------------- /third_party/double_conversion.BUILD: -------------------------------------------------------------------------------- 1 | # vim: ft=bzl 2 | load("@//deepray:copts.bzl", "DEFAULT_CPP_COPTS") 3 | 4 | licenses(["notice"]) 5 | 6 | exports_files(["LICENSE"]) 7 | 8 | cc_library( 9 | name = "double-conversion", 10 | srcs = glob(["double-conversion/*.cc"]), 11 | hdrs = glob(["double-conversion/*.h"]), 12 | copts = DEFAULT_CPP_COPTS, 13 | includes = ["."], 14 | visibility = ["//visibility:public"], 15 | ) 16 | -------------------------------------------------------------------------------- /third_party/eigen3.BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | filegroup( 6 | name = "all_srcs", 7 | srcs = glob(["**"]), 8 | ) 9 | 10 | cmake( 11 | name = "eigen3", 12 | build_args = [ 13 | "--verbose", 14 | "-j `nproc`", 15 | ], 16 | # These options help CMake to find prebuilt OpenBLAS, which will be copied into 17 | # $EXT_BUILD_DEPS/openblas by the cmake_external script 18 | cache_entries = { 19 | "BLAS_VENDOR": "OpenBLAS", 20 | "BLAS_LIBRARIES": "$EXT_BUILD_DEPS/openblas/lib/libopenblas.a", 21 | }, 22 | lib_source = ":all_srcs", 23 | linkopts = [ 24 | "-pthread", 25 | ], 26 | out_headers_only = True, 27 | out_include_dir = "include/eigen3", 28 | # Dependency on other cmake_external rule; can also depend on cc_import, cc_library rules 29 | deps = [ 30 | "@com_github_OpenMathLib_OpenBLAS//:openblas", 31 | ], 32 | ) 33 | -------------------------------------------------------------------------------- /third_party/farmhash.BUILD: -------------------------------------------------------------------------------- 1 | licenses(["notice"]) # MIT 2 | 3 | exports_files(["COPYING"]) 4 | 5 | config_setting( 6 | name = "windows", 7 | values = { 8 | "cpu": "x64_windows", 9 | }, 10 | ) 11 | 12 | cc_library( 13 | name = "farmhash", 14 | srcs = ["src/farmhash.cc"], 15 | hdrs = ["src/farmhash.h"], 16 | # Disable __builtin_expect support on Windows 17 | copts = select({ 18 | ":windows": ["/DFARMHASH_OPTIONAL_BUILTIN_EXPECT"], 19 | "//conditions:default": [], 20 | }), 21 | includes = ["src/."], 22 | strip_include_prefix = "src", 23 | visibility = ["//visibility:public"], 24 | ) 25 | -------------------------------------------------------------------------------- /third_party/hadoop/BUILD: -------------------------------------------------------------------------------- 1 | package( 2 | default_visibility = ["//visibility:public"], 3 | licenses = ["notice"], # Apache 2.0 4 | ) 5 | 6 | exports_files(["LICENSE.txt"]) 7 | 8 | cc_library( 9 | name = "hdfs", 10 | hdrs = ["hdfs.h"], 11 | includes = ["."], 12 | ) 13 | -------------------------------------------------------------------------------- /third_party/libcuckoo.BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) # Boost Software License, Version 1.0. 4 | 5 | cc_library( 6 | name = "libcuckoo", 7 | hdrs = glob(["libcuckoo/*.hh"]), 8 | includes = ["."], 9 | copts = [ 10 | "-std=c++11", 11 | ], 12 | visibility = ["//visibility:public"], 13 | ) -------------------------------------------------------------------------------- /third_party/murmurhash.BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) # # BSD 3-Clause 4 | 5 | genrule( 6 | name = "build_murmurhash", 7 | srcs = glob(["**"]) + [ 8 | "@local_config_cc//:toolchain", 9 | ], 10 | outs = [ 11 | "libmurmurhashh.a", 12 | ], 13 | cmd = """ 14 | set -e 15 | WORK_DIR=$$PWD 16 | DEST_DIR=$$PWD/$(@D) 17 | export CXXFLAGS="-msse4.2 -msse4.1 -mavx -mavx2 -mfma -mfpmath=both -frecord-gcc-switches -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++17 -fPIC -DNDEBUG -D_GLIBCXX_USE_CXX11_ABI=0 -D__STDC_FORMAT_MACROS -fno-canonical-system-headers -Wno-builtin-macro-redefined -D__DATE__=redacted -D__TIMESTAMP__=redacted -D__TIME__=redacted" 18 | pushd external/murmurhash 19 | g++ $$CXXFLAGS -c src/MurmurHash1.h src/MurmurHash2.h src/MurmurHash3.h src/MurmurHash1.cpp src/MurmurHash2.cpp src/MurmurHash3.cpp 20 | ar -crv libmurmurhash.a MurmurHash1.o MurmurHash2.o MurmurHash3.o 21 | cp libmurmurhash.a $$DEST_DIR/libmurmurhashh.a 22 | popd 23 | """, 24 | ) 25 | 26 | cc_library( 27 | name = "murmurhash", 28 | hdrs = [ 29 | "src/MurmurHash1.h", 30 | "src/MurmurHash2.h", 31 | "src/MurmurHash3.h" 32 | ], 33 | srcs = [ 34 | "libmurmurhashh.a", 35 | ], 36 | visibility = ["//visibility:public"], 37 | linkstatic = 1, 38 | ) -------------------------------------------------------------------------------- /third_party/openblas.BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | filegroup( 6 | name = "all_srcs", 7 | srcs = glob(["**"]), 8 | ) 9 | 10 | cmake( 11 | name = "openblas", 12 | build_args = [ 13 | "--verbose", 14 | "-j `nproc`", 15 | ], 16 | cache_entries = { 17 | "BUILD_TESTING": "OFF", 18 | }, 19 | lib_source = ":all_srcs", 20 | out_lib_dir = select({ 21 | "@platforms//os:linux": "lib", 22 | "//conditions:default": "lib", 23 | }), 24 | out_static_libs = select({ 25 | "@platforms//os:macos": ["libopenblas.a"], 26 | "@platforms//os:linux": ["libopenblas.a"], 27 | "@platforms//os:windows": ["openblas.lib"], 28 | }), 29 | ) 30 | -------------------------------------------------------------------------------- /third_party/rapidjson.BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) # MIT/JSON license 4 | 5 | cc_library( 6 | name = "rapidjson", 7 | srcs = glob([ 8 | "include/**/*.h", 9 | ]), 10 | copts = [], 11 | includes = [ 12 | "include", 13 | ], 14 | ) 15 | -------------------------------------------------------------------------------- /third_party/sparsehash.BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) # # BSD 3-Clause 4 | 5 | cc_library( 6 | name = "sparsehash", 7 | hdrs = glob([ 8 | "sparsehash/**", 9 | ]), 10 | includes = ["."], 11 | visibility = ["//visibility:public"], 12 | ) -------------------------------------------------------------------------------- /third_party/sparsehash_c11.BUILD: -------------------------------------------------------------------------------- 1 | package(default_visibility = ["//visibility:public"]) 2 | 3 | licenses(["notice"]) # # BSD 3-Clause 4 | 5 | cc_library( 6 | name = "dense_hash_map", 7 | hdrs = glob([ 8 | "sparsehash/**", 9 | ]), 10 | includes = ["."], 11 | visibility = ["//visibility:public"], 12 | ) 13 | 14 | -------------------------------------------------------------------------------- /third_party/thrift/BUILD: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepray-AI/deepray/83ffd09e121f3df4530014a04d4ce8b2a23c27a2/third_party/thrift/BUILD -------------------------------------------------------------------------------- /third_party/thrift/extra/BUILD: -------------------------------------------------------------------------------- 1 | # Copyright 2021 curoky(cccuroky@gmail.com). 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | load("@rules_cc//cc:defs.bzl", "cc_library") 16 | 17 | cc_library( 18 | name = "config", 19 | hdrs = ["config.h"], 20 | include_prefix = "thrift", 21 | visibility = ["//visibility:public"], 22 | ) 23 | -------------------------------------------------------------------------------- /third_party/xsimd.BUILD: -------------------------------------------------------------------------------- 1 | load("@rules_foreign_cc//foreign_cc:defs.bzl", "cmake") 2 | 3 | package(default_visibility = ["//visibility:public"]) 4 | 5 | licenses(["notice"]) # BSD 3-Clause 6 | 7 | exports_files(["LICENSE"]) 8 | 9 | filegroup( 10 | name = "all_srcs", 11 | srcs = glob(["**"]), 12 | ) 13 | 14 | cmake( 15 | name = "xsimd", 16 | build_args = [ 17 | "-j `nproc`", 18 | ], 19 | lib_source = ":all_srcs", 20 | out_headers_only = True, 21 | ) 22 | -------------------------------------------------------------------------------- /tools/build_base_container.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x -e 4 | 5 | PY_VERSION=${1:-"3.8"} 6 | TF_VERSION=${2:-"2.9.1"} 7 | CUDA_VERSION=${3:-"11.6.2"} 8 | 9 | docker build \ 10 | -f tools/docker/base_container.Dockerfile \ 11 | --network=host \ 12 | --build-arg CUDA_VERSION=${CUDA_VERSION} \ 13 | --build-arg TF_VERSION=${TF_VERSION} \ 14 | --build-arg TF_PACKAGE=tensorflow-gpu \ 15 | --build-arg PY_VERSION=${PY_VERSION} \ 16 | --target base_container \ 17 | -t hailinfufu/deepray-release:latest-py${PY_VERSION}-tf${TF_VERSION}-cu${CUDA_VERSION}-ubuntu20.04 ./ 18 | -------------------------------------------------------------------------------- /tools/docker/bashrc.bash: -------------------------------------------------------------------------------- 1 | # Set breakpoint() in Python to call pudb 2 | export PYTHONBREAKPOINT=pudb.set_trace 3 | 4 | export CUDA_HOME="/usr/local/cuda-11.6" 5 | export PATH="$CUDA_HOME/bin:$PATH" 6 | export LD_LIBRARY_PATH="$CUDA_HOME/lib64:$LD_LIBRARY_PATH" 7 | export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH 8 | 9 | # Enable jemalloc to optimize memory usage 10 | export LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libjemalloc.so" -------------------------------------------------------------------------------- /tools/docker/pre-commit.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | 3 | COPY tools/install_deps /install_deps 4 | RUN pip install -r /install_deps/yapf.txt 5 | 6 | COPY tools/install_deps/buildifier.sh ./buildifier.sh 7 | RUN bash buildifier.sh 8 | 9 | COPY tools/install_deps/clang-format.sh ./clang-format.sh 10 | RUN bash clang-format.sh 11 | 12 | WORKDIR /deepray 13 | 14 | 15 | CMD ["python", "tools/format.py"] 16 | -------------------------------------------------------------------------------- /tools/docs/BUILD: -------------------------------------------------------------------------------- 1 | # Description: 2 | # Doc generator 3 | 4 | licenses(["notice"]) # Apache 2.0 5 | 6 | exports_files(["LICENSE"]) 7 | 8 | package( 9 | default_visibility = ["//deepray:__subpackages__"], 10 | ) 11 | 12 | py_binary( 13 | name = "build_docs", 14 | srcs = ["build_docs.py"], 15 | deps = [ 16 | "//deepray", 17 | ], 18 | ) 19 | -------------------------------------------------------------------------------- /tools/docs/Readme.md: -------------------------------------------------------------------------------- 1 | ## 1. Generated API docs 2 | 3 | [tensorflow.org/deepray/api_docs/python/dp](https://tensorflow.org/deepray/api_docs/python/dp) 4 | 5 | `build_docs.py` controls executed this docs generation. To test-run it: 6 | 7 | ```bash 8 | # Install dependencies: 9 | pip install -r tools/install_deps/doc_requirements.txt 10 | 11 | # Build tool: 12 | bazel build //tools/docs:build_docs 13 | 14 | # Generate API doc: 15 | # Use current branch 16 | bazel-bin/tools/docs/build_docs --git_branch=$(git rev-parse --abbrev-ref HEAD) 17 | # or specified explicitly 18 | bazel-bin/tools/docs/build_docs --git_branch=master --output_dir=/tmp/dp_api 19 | ``` 20 | -------------------------------------------------------------------------------- /tools/format.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from subprocess import check_call, CalledProcessError 3 | 4 | 5 | def check_bash_call(string): 6 | check_call(["bash", "-c", string]) 7 | 8 | 9 | def _run_format_and_yapf(): 10 | files_changed = False 11 | 12 | try: 13 | check_bash_call("find . -name '*.py' -print0 | xargs -0 yapf --style=./.yapf -dr") 14 | except CalledProcessError: 15 | check_bash_call("find . -name '*.py' -print0 | xargs -0 yapf --style=./.yapf -ir") 16 | files_changed = True 17 | 18 | if files_changed: 19 | print("Some files have changed.") 20 | print("Please use 'find . -name '*.py' -print0 | xargs -0 yapf --style=./.yapf -ir' before commit.") 21 | else: 22 | print("No formatting needed.") 23 | 24 | if files_changed: 25 | exit(1) 26 | 27 | 28 | def run_format_and_yapf(): 29 | try: 30 | _run_format_and_yapf() 31 | except CalledProcessError as error: 32 | print("Yapf check returned exit code", error.returncode) 33 | exit(error.returncode) 34 | 35 | 36 | if __name__ == "__main__": 37 | run_format_and_yapf() 38 | -------------------------------------------------------------------------------- /tools/install_deps/buildifier.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | set -x -e 17 | 18 | wget -O /usr/local/bin/buildifier https://github.com/bazelbuild/buildtools/releases/download/v6.1.2/buildifier-linux-amd64 19 | chmod +x /usr/local/bin/buildifier 20 | -------------------------------------------------------------------------------- /tools/install_deps/clang-format.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | wget --progress=dot:mega -O /usr/local/bin/clang-format-9 https://github.com/DoozyX/clang-format-lint-action/raw/master/clang-format/clang-format9 18 | chmod +x /usr/local/bin/clang-format-9 19 | ln -s /usr/local/bin/clang-format-9 /usr/local/bin/clang-format 20 | -------------------------------------------------------------------------------- /tools/install_deps/doc_requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/tensorflow/docs@99113f26039f6c042df7f2898e05019dbcdf3675 2 | pyyaml 3 | -------------------------------------------------------------------------------- /tools/install_deps/install_bazelisk.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2023 The Deepray Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | set -x -e 17 | 18 | # Downloads bazelisk to ${output_dir} as `bazel`. 19 | date 20 | 21 | output_dir=${1:-"/usr/local/bin"} 22 | 23 | mkdir -p "${output_dir}" 24 | wget --progress=dot:mega -O ${output_dir}/bazel https://github.com/bazelbuild/bazelisk/releases/latest/download/bazelisk-linux-$([ $(uname -m) = "aarch64" ] && echo "arm64" || echo "amd64") 25 | 26 | chmod u+x "${output_dir}/bazel" 27 | 28 | if [[ ! ":$PATH:" =~ :${output_dir}/?: ]]; then 29 | PATH="${output_dir}:$PATH" 30 | fi 31 | 32 | which bazel 33 | date 34 | -------------------------------------------------------------------------------- /tools/install_deps/install_clang.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2023 The Deepray Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | set -x -e 17 | 18 | CLANG_VERSION=${1:-"16"} 19 | 20 | apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \ 21 | lsb-release wget software-properties-common gnupg 22 | 23 | wget https://apt.llvm.org/llvm.sh \ 24 | --progress=dot:mega -O /tmp/llvm-install.sh && 25 | chmod u+x /tmp/llvm-install.sh && 26 | /tmp/llvm-install.sh ${CLANG_VERSION} && 27 | /usr/bin/clang-${CLANG_VERSION} --version 28 | -------------------------------------------------------------------------------- /tools/install_deps/install_cmake.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2023 The Deepray Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | set -x -e 17 | 18 | CMAKE_VERSION=${1:-"3.23.2"} 19 | 20 | apt autoremove cmake -y 21 | wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-Linux-x86_64.sh \ 22 | --progress=dot:mega -O /tmp/cmake-install.sh && 23 | chmod u+x /tmp/cmake-install.sh && 24 | mkdir /usr/bin/cmake && 25 | /tmp/cmake-install.sh --skip-license --prefix=/usr && 26 | rm /tmp/cmake-install.sh && 27 | cmake --version 28 | -------------------------------------------------------------------------------- /tools/install_deps/install_python.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright 2023 The Deepray Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | set -x -e 17 | 18 | PY_VERSION=${1:-"3.8"} 19 | 20 | apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \ 21 | python${PY_VERSION} \ 22 | python${PY_VERSION}-dev \ 23 | python${PY_VERSION}-distutils && 24 | apt-get clean && rm -rf /var/lib/apt/lists/* 25 | 26 | ln -s /usr/bin/python${PY_VERSION} /usr/bin/python 27 | 28 | curl -O https://bootstrap.pypa.io/get-pip.py && 29 | python get-pip.py && 30 | rm get-pip.py 31 | 32 | pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple -------------------------------------------------------------------------------- /tools/install_deps/pytest.txt: -------------------------------------------------------------------------------- 1 | pytest~=6.2.5 2 | pytest-xdist~=1.31 3 | pytest-extra-durations~=0.1.3 4 | scikit-learn~=1.2.2 5 | scikit-image~=0.20.0 6 | Pillow~=9.4.0 7 | tqdm>=4.36.1 -------------------------------------------------------------------------------- /tools/install_deps/tensorflow-cpu.txt: -------------------------------------------------------------------------------- 1 | tensorflow-cpu~=2.9.1 -------------------------------------------------------------------------------- /tools/install_deps/tensorflow.txt: -------------------------------------------------------------------------------- 1 | tensorflow~=2.9.3 -------------------------------------------------------------------------------- /tools/install_deps/typedapi.txt: -------------------------------------------------------------------------------- 1 | typedapi~=0.2.0 2 | -------------------------------------------------------------------------------- /tools/install_deps/yapf.txt: -------------------------------------------------------------------------------- 1 | yapf == 0.32.0 -------------------------------------------------------------------------------- /tools/pre-commit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # usage: bash tools/pre-commit.sh 3 | 4 | 5 | set -e 6 | 7 | if [ -z "${DEEPRAY_DEV_CONTAINER}" ]; then 8 | export DOCKER_BUILDKIT=1 9 | docker build -t deepray_formatting -f tools/docker/pre-commit.Dockerfile . 10 | 11 | export MSYS_NO_PATHCONV=1 12 | docker run --rm -t -v "$(pwd -P):/deepray" deepray_formatting 13 | else 14 | python tools/format.py 15 | fi 16 | -------------------------------------------------------------------------------- /tools/releases/tf_auditwheel_patch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2019 The TensorFlow Authors. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | set -e -x 18 | 19 | SITE_PKG_LOCATION=$(python -c "import site; print(site.getsitepackages()[0])") 20 | TF_SHARED_LIBRARY_NAME=$(grep -r TF_SHARED_LIBRARY_NAME .bazelrc | awk -F= '{print$2}') 21 | POLICY_JSON="${SITE_PKG_LOCATION}/auditwheel/policy/manylinux-policy.json" 22 | sed -i "s/libresolv.so.2\"/libresolv.so.2\", $TF_SHARED_LIBRARY_NAME/g" $POLICY_JSON 23 | -------------------------------------------------------------------------------- /tools/run_build.sh: -------------------------------------------------------------------------------- 1 | # usage: bash tools/run_build.sh 2 | # by default uses docker buildkit. 3 | # to disable it: 4 | # DOCKER_BUILDKIT=0 bash tools/run_build.sh 5 | set -e 6 | 7 | export DOCKER_BUILDKIT=1 8 | docker build -f tools/docker/sanity_check.Dockerfile --target=${1} ./ 9 | -------------------------------------------------------------------------------- /tools/run_google_cloud_tests.sh: -------------------------------------------------------------------------------- 1 | set -x -e 2 | 3 | bash tools/run_gpu_tests.sh 4 | -------------------------------------------------------------------------------- /tools/run_gpu_tests.sh: -------------------------------------------------------------------------------- 1 | # usage: bash tools/run_gpu_tests.sh 2 | 3 | set -x -e 4 | 5 | export DOCKER_BUILDKIT=1 6 | docker build \ 7 | -f tools/docker/build_wheel.Dockerfile \ 8 | --target dp_gpu_tests \ 9 | --build-arg TF_VERSION=2.9.3 \ 10 | --build-arg PY_VERSION=3.9 \ 11 | -t dp_gpu_tests ./ 12 | docker run --rm -t --gpus=all dp_gpu_tests 13 | -------------------------------------------------------------------------------- /tools/run_sanity_check.sh: -------------------------------------------------------------------------------- 1 | # usage: bash tools/run_sanity_check.sh 2 | 3 | set -e 4 | 5 | export DOCKER_BUILDKIT=1 6 | docker build -f tools/docker/sanity_check.Dockerfile ./ 7 | --------------------------------------------------------------------------------